user7950617
user7950617

Reputation:

SQL Server grouping rows

I have this data:

     Id    | Name  | count |  Group_number      
     ------+-------+-------+--------------
     1     | cdd   |  50   |       0  
     2     | cdd   |  15   |       0  
     3     | cdd   |  0    |       0  
     4     | cdd   |  25   |       0   
     5     | cdd   |  11   |       0

I want a script that makes three or four groups on condition: Sum(count) for each group < 50

I want this output:

     1    | cdd    |  50    |      1  
     2    | cdd    |  15    |      2  
     3    | cdd    |  0     |      2  
     4    | cdd    |  25    |      2  
     5    | cdd    |  11    |      3

Upvotes: 2

Views: 61

Answers (2)

Edmond Quinton
Edmond Quinton

Reputation: 1739

For records where the count is less than 50 we can simply generate a grouping id by calculating a running total on the count and then divide this running total by 50. However, since some records may already have a count that is greater than or equal to 50 might generate an incorrect id. To solve this problem, we need to somehow force the generation of a new grouping id on the next record. This can be done by simply adjusting the count the next record by 50 if the current records count is 50 or greater. The following example demonstrates how this can be done:

CREATE TABLE #Items
(
     [Id]       INT NOT NULL PRIMARY KEY
    ,[Name]     VARCHAR(50) NOT NULL
    ,[Count]    INT NOT NULL
)

INSERT INTO #Items
VALUES 
(1, 'cdd', 50 ),         
(2, 'cdd', 15 ),       
(3, 'cdd', 0  ),        
(4, 'cdd', 25 ),         
(5, 'cdd', 11 );       

;WITH CTE_ItemCountsAdjusted
AS
(
    SELECT   [Id]       
            ,[Name] 
            ,[Count]
            ,LAG([Count], 1, 0) OVER (PARTITION BY [Name] ORDER BY [Id]) AS PrevCount
            ,(
                CASE 
                    WHEN LAG([Count], 1, 0) OVER (PARTITION BY [Name] ORDER BY [Id]) >= 50 THEN [Count] + 50
                    ELSE [Count]
                END
            ) AdjustedCount
    FROM    #Items
)
SELECT   [Id]       
        ,[Name] 
        ,[Count]
        ,SUM([AdjustedCount]) OVER (PARTITION BY [Name] ORDER BY [Id] ROWS UNBOUNDED PRECEDING) / 50 AS [Group_number]
FROM    CTE_ItemCountsAdjusted
ORDER BY    [Id]   

This method eliminates the need for recursive calls. Note if you need the group id to be strictly sequential (no gaps between group numbers) then you can make use of the DENSE_RANK() windowing function to achieve this as per following example:

INSERT INTO #Items
VALUES 
(1, 'cdd', 50 ),         
(2, 'cdd', 15 ),       
(3, 'cdd', 0  ),        
(4, 'cdd', 25 ),         
(5, 'cdd', 11 ),      
(6, 'cdd', 200 ),
(7, 'cdd', 10 );  

;WITH CTE_ItemCountsAdjusted
AS
(
    SELECT   [Id]       
            ,[Name] 
            ,[Count]
            ,LAG([Count], 1, 0) OVER (PARTITION BY [Name] ORDER BY [Id]) AS PrevCount
            ,(
                CASE 
                    WHEN LAG([Count], 1, 0) OVER (PARTITION BY [Name] ORDER BY [Id]) >= 50 THEN [Count] + 50
                    ELSE [Count]
                END
            ) AdjustedCount
    FROM    #Items

),CTE_ItemCountsWithGroupID
AS
(
    SELECT   [Id]       
            ,[Name] 
            ,[Count]
            ,SUM([AdjustedCount]) OVER (PARTITION BY [Name] ORDER BY [Id] ROWS UNBOUNDED PRECEDING) / 50 AS [Group_number]
    FROM    CTE_ItemCountsAdjusted  
)
SELECT   [Id]       
        ,[Name] 
        ,[Count]
        ,[Group_number]

Upvotes: 1

Vamsi Prabhala
Vamsi Prabhala

Reputation: 49260

Assuming this has to be done for each name, you can use a recursive cte.

with rownums as (select t.*,row_number() over(partition by name order by id) as rnum from t)
,cte(rnum,id,name,cnt,runningsum,grp) as  
(select rnum,id,name,cnt,cnt,1  from rownums where rnum=1
 union all
 select t.rnum,t.id,t.name,t.cnt
 ,case when c.runningsum+t.cnt > 50 then t.cnt else c.runningsum+t.cnt end
 ,case when c.runningsum+t.cnt > 50 then t.id else c.grp end
 from cte c
 join rownums t on t.rnum=c.rnum+1 and t.name=c.name
) 
select id,cnt,name,dense_rank() over(partition by name order by grp) as grp
from cte

Sample Demo

Keep track of the running sum and reset it when it goes over 50. Also remember the id when the sum goes over 50. This can be used to assign group numbers.

Upvotes: 3

Related Questions