Reputation: 11
This is my table as dataframe:
col1 col2 col3 col4 col5 col6 col7
1 1 1 1 137 500 11
1 1 1 1 120 500 11
1 1 2 1 101 500 11
1 1 3 1 55 500 11
1 2 2 1 133 340 12
1 2 2 1 125 340 12
1 2 1 1 63 340 12
I have to update the data frame using difference between col6 value and cumulative sum of col5 upto that particular row and store that value in separate column called 'updated'.Cumulative sum should be upto the value in Col7 As an example: col8 values will be:
col8
(500-137)
(500-137-120)
(500-137-120-101)
(500-137-120-101-55)
(340-133)
(340-133-125)
(340-133-125-63)
Could you please suggest some solution? I have to use python pandas.
Upvotes: 0
Views: 1863
Reputation: 21401
Its for SQL Server
SAMPLE TABLE
CREATE TABLE #TEMP(col1 INT, col2 INT, col3 INT, col4 INT, col5 INT, col6 INT, col7 INT)
INSERT INTO #TEMP
SELECT 1, 1, 1, 1, 137, 500, 11
UNION ALL
SELECT 1, 1, 1, 1, 120, 500, 11
UNION ALL
SELECT 1, 1, 2, 1, 101, 500, 11
UNION ALL
SELECT 1, 1, 3, 1, 55, 500, 11
UNION ALL
SELECT 1, 2, 2, 1, 133, 340, 12
UNION ALL
SELECT 1, 2, 2, 1, 125, 340, 12
UNION ALL
SELECT 1, 2, 1, 1, 63, 340, 12
QUERY
;WITH CTE AS
(
-- Retrieve row number for each type of COL6 in default order
SELECT ROW_NUMBER() OVER(PARTITION BY COL6 ORDER BY (SELECT 0))rNO,*
FROM #TEMP
)
-- Append current COL6 with each items
SELECT col1 , col2, col3 , col4 , col5 , col6 , col7,
'(' + CAST(C2.COL6 AS VARCHAR(200))+'-'+
-- Retrives comma separated values of previous COL5 values
-- for each type in COL6
SUBSTRING(
(SELECT
REPLACE (', ' + CAST(COL6 AS VARCHAR(200)) + '-' + CAST(COL5 AS VARCHAR(200)),', '+CAST(COL6 AS VARCHAR(200)),'')
FROM CTE
WHERE RNO<=C2.RNO AND COL6=C2.COL6
ORDER BY RNO
FOR XML PATH('')),2,200000) +')' COL8
FROM CTE C2
EDIT :
If you want running total and store in COL8
, you can try the below query
;WITH CTE AS
(
-- Retrieve row number for each type of COL6 in default order
SELECT ROW_NUMBER() OVER(PARTITION BY COL6 ORDER BY (SELECT 0))rNO,*
FROM #TEMP
)
SELECT col1 , col2, col3 , col4 , col5 , col6 , col7,
(
SELECT SUM(COL5)+col6
FROM CTE
WHERE RNO<=C2.RNO AND COL6=C2.COL6
GROUP BY col6
)COL8
FROM CTE C2
EDIT 2 : Updated with update query for the table
;WITH CTE AS
(
-- Retrieve row number for each type of COL6 in default order
SELECT ROW_NUMBER() OVER(PARTITION BY COL6 ORDER BY (SELECT 0))rNO,*
FROM #TEMP
)
UPDATE #TEMP SET COL8 = TAB.COL8
FROM
(
SELECT col1 , col2, col3 , col4 , col5 , col6 , col7,
(
SELECT SUM(COL5)+col6
FROM CTE
WHERE RNO<=C2.RNO AND COL6=C2.COL6
GROUP BY col6
)COL8
FROM CTE C2
)TAB
WHERE TAB.COL5=#TEMP.COL5 AND TAB.COL6=#TEMP.COL6
Upvotes: -1
Reputation: 93754
Try this. To find running total
there are different ways.
;WITH cte
AS (SELECT *,
Row_number()OVER(partition BY col6 ORDER BY col6)rn
FROM Yourtable) As Col8
SELECT *,
col6 - (SELECT Sum(b.col5)
FROM cte b
WHERE a.col6 = b.col6
AND b.rn <= a.rn)
FROM cte a;
If you are using Sql Server 2012+
then try this.
WITH cte
AS (SELECT *,
Row_number()OVER(partition BY col6 ORDER BY col6)rn
FROM Yourtable)
SELECT *,
col6 - Sum(col5)
OVER(partition BY col6 ORDER BY col6 rows UNBOUNDED PRECEDING) As Col8
FROM cte a
Upvotes: 0
Reputation: 60230
I think your attempt to use cumsum
may not have worked because you didn't group by col7
- it's apparent from your example calculations that you only calculate the cumulative sum within each value of col7
, so I think you want:
df['cumsums'] = df.groupby('col7')['col5'].cumsum()
df['updated'] = df['col6'] - df['cumsums']
df
Out[8]:
col1 col2 col3 col4 col5 col6 col7 updated cumsums
0 1 1 1 1 137 500 11 363 137
1 1 1 1 1 120 500 11 243 257
2 1 1 2 1 101 500 11 142 358
3 1 1 3 1 55 500 11 87 413
4 1 2 2 1 133 340 12 207 133
5 1 2 2 1 125 340 12 82 258
6 1 2 1 1 63 340 12 19 321
Upvotes: 3