Alavi
Alavi

Reputation: 708

Counting consecutive duplicate field with SQL

I have this data in myTable :

  Date           Status    PersonID
-----------------------------------------
   2018/01/01         2        2015     ┐  2
   2018/01/02         2        2015     ┘
   2018/01/05         2        2015     ┐
   2018/01/06         2        2015       3
   2018/01/07         2        2015     ┘
   2018/01/11         2        2015     - 1
   2018/01/01         2        1018     - 1
   2018/01/03         2        1018     - 1
   2018/01/05         2        1018     ┐ 2
   2018/01/06         2        1018     ┘
   2018/01/08         2        1018     ┐ 2
   2018/01/09         2        1018     ┘
   2018/01/03         2        1625     ┐
   2018/01/04         2        1625       4
   2018/01/05         2        1625     
   2018/01/06         2        1625     ┘
   2018/01/17         2        1625     - 1
   2018/01/29         2        1625     - 1
-----------------------------------

and I need to count consecutive duplicate values like this:

This is the result I need:

   count    personid
    -----------------
    2        2015
    3        2015
    1        2015
    1        1018
    1        1018
    2        1018
    2        1018
    4        1625
    1        1625
    1        1625

I am using SQL Server 2016 - please help

Upvotes: 3

Views: 371

Answers (5)

Chetan Sanghani
Chetan Sanghani

Reputation: 2111

Here is the easiest and small query

 CREATE TABLE #T (
      [Date] date,
      [Status] int,
      PersonId int
    );
    INSERT #T
      VALUES ('2018/01/01', 2, 2015),
      ('2018/01/02', 2, 2015),
      ('2018/01/05', 2, 2015),
      ('2018/01/06', 2, 2015),
      ('2018/01/07', 2, 2015),
      ('2018/01/11', 2, 2015),
      ('2018/01/01', 2, 1018),
      ('2018/01/03', 2, 1018),
      ('2018/01/05', 2, 1018),
      ('2018/01/06', 2, 1018),
      ('2018/01/08', 2, 1018),
      ('2018/01/09', 2, 1018),
      ('2018/01/03', 2, 1625),
      ('2018/01/04', 2, 1625),
      ('2018/01/05', 2, 1625),
      ('2018/01/06', 2, 1625),
      ('2018/01/17', 2, 1625),
      ('2018/01/29', 2, 1625)


    SELECT
      MAX(cnt),
      personid
    FROM (SELECT
      ROW_NUMBER() OVER (PARTITION BY GRP ORDER BY [Date]) AS cnt,
      personid,
      GRP
    FROM (SELECT
      personid,
      [Date],
      DATEDIFF(DAY, '1900-01-01', [Date]) - ROW_NUMBER() OVER (ORDER BY Personid DESC) AS GRP
    FROM #T) A) AS B
    GROUP BY personid,
             GRP
    ORDER BY PersonId DESC

Upvotes: 0

Tom J Muthirenthi
Tom J Muthirenthi

Reputation: 3340

WITH T1 AS
(SELECT Date,
       Date - ROW_NUMBER() OVER (PARTITION BY Status, PersonID ORDER BY Date) AS Grp
FROM myTable)
SELECT personid,
       ROW_NUMBER() OVER (PARTITION BY Grp ORDER BY Date) AS Consecutive
FROM T1

On this result you can apply a MAX(), to get the number of records for each personid.

Refer this question to get breakdown details

Upvotes: 1

KrishnakumarS
KrishnakumarS

Reputation: 124

This type of problem is known as 'Gaps and Islands'. You are either identifying consecutive data sets (Islands) or range of values between two islands (Gaps). There are many different ways to achieve the results that also performs well with large data sets. You can refer the below well written articles for that.

https://www.itprotoday.com/sql-server/solving-gaps-and-islands-enhanced-window-functions

https://www.red-gate.com/simple-talk/sql/t-sql-programming/the-sql-of-gaps-and-islands-in-sequences/

https://www.sqlshack.com/data-boundaries-finding-gaps-islands-and-more/

Here is an attempt to your question.

CREATE TABLE #test 
(
     dt DATETIME
    ,Status INT
    ,PersonID INT
)

INSERT INTO #Test (dt, Status, PersonID) VALUES
('2018/01/01', 2, 2015),
('2018/01/02', 2, 2015),
('2018/01/05', 2, 2015),
('2018/01/06', 2, 2015),
('2018/01/07', 2, 2015),
('2018/01/11', 2, 2015),
('2018/01/01', 2, 1018),
('2018/01/03', 2, 1018),
('2018/01/05', 2, 1018),
('2018/01/06', 2, 1018),
('2018/01/08', 2, 1018),
('2018/01/09', 2, 1018),
('2018/01/03', 2, 1625),
('2018/01/04', 2, 1625),
('2018/01/05', 2, 1625),
('2018/01/06', 2, 1625),
('2018/01/17', 2, 1625),
('2018/01/29', 2, 1625)

;with cte_dt_from
AS
(
    SELECT PersonID, MIN(Dt) dt_from_start
    FROM #Test
    GROUP BY PersonID
),
cte_offset_num
AS
(
SELECT      T1.PersonID, T1.dt, DATEDIFF(DAY, T2.dt_from_start, T1.dt) dt_offset
FROM        #test T1
INNER JOIN  cte_dt_from T2 ON T2.PersonID = T1.PersonID
),
cte_starting_point
AS
(
    SELECT A.PersonID, A.dt_offset, ROW_NUMBER() OVER(PARTITION BY A.PersonID ORDER BY A.dt_offset) AS rownum
    FROM cte_offset_num AS A
    WHERE NOT EXISTS (
        SELECT *
        FROM cte_offset_num AS B
        WHERE B.PersonID = A.PersonID AND B.dt_offset = A.dt_offset - 1)
)
,
cte_ending_point
AS
(
    SELECT A.PersonID, A.dt_offset, ROW_NUMBER() OVER(PARTITION BY A.PersonID ORDER BY A.dt_offset) AS rownum
    FROM cte_offset_num AS A
    WHERE NOT EXISTS (
        SELECT *
        FROM cte_offset_num AS B
        WHERE B.PersonID = A.PersonID AND B.dt_offset = A.dt_offset + 1)
)
SELECT (E.dt_offset - S.dt_offset)  + 1 AS [count], S.PersonID
FROM cte_starting_point AS S
JOIN cte_ending_point AS E ON E.PersonID = S.PersonID AND E.rownum = S.rownum
ORDER BY S.PersonID;

DROP TABLE #Test;

Upvotes: 2

Zaynul Abadin Tuhin
Zaynul Abadin Tuhin

Reputation: 31993

The main challenge to find out the gap between two dates and regarding each date you can create that gap by using row_number() analytic function and datediff function

with cte as
(

select '2018-01-01' as d, 2 as id , 2015 as pid
union all
select '2018-01-02',2,2015
union all
select '2018-01-05',2,2015 union all
select '2018-01-06',2,2015 union all
select '2018-01-07',2,2015 
union all
select '2018-01-11',2,2015  


), cte1 as (SELECT *, 
                datediff(day, Row_number() 
                                OVER ( 
                                  partition BY id, pid 
                                  ORDER BY [d] ), [d]) AS dif
         FROM   cte
         ) select distinct pid,count(*) over(partition by pid,dif) as cnt from cte1

Upvotes: 1

PSK
PSK

Reputation: 17943

This is a 'Gaps and Islands' problem, you can try like following.

;with cte 
     as (select *, 
                dateadd(day, -row_number() 
                                over (partition by status, personid 
                                  order by [date] ), [date]) AS grp 
         FROM   @table
     )
     ,cte1 
     AS (select *,row_number() over(partition by  personid, grp,status order by [date]) rn,
                count(*) over(partition by personid, grp) ct 
         from   cte
        ) 

select  ct as count, personid 
from cte1 
where rn=1

Online Demo

Note: You might not get the rows in same sequence as you don't have any column which can be used for ordering the way you showed in the desired output.

Upvotes: 4

Related Questions