manom
manom

Reputation: 11

Aggregate dates within specific day range

I have a table with id's and dates that are grouped by close to each other dates

1       24/05/2010

10      18/06/2012
10      20/06/2012
10      21/06/2012

10      21/02/2014

10      04/07/2014

10      12/12/2016

1004    18/07/2016

1007    01/01/2017
1007    02/02/2017
1007    03/02/2017

1007    31/08/2017


create table tt ( id int, startdate date);  
Insert into TT values (1,'24/05/2010');
Insert into TT values (10,'18/06/2012');
Insert into TT values (10,'20/06/2012');
Insert into TT values (10,'21/06/2012');
Insert into TT values (10,'21/02/2014');
Insert into TT values (10,'04/07/2014');
Insert into TT values (10,'12/12/2016');
Insert into TT values (1004,'18/07/2016');
Insert into TT values (1007,'01/01/2017');
Insert into TT values (1007,'02/02/2017');
Insert into TT values (1007,'03/02/2017');
Insert into TT values (1007,'31/08/2017');

I am trying to merge overlapping dates and dates with gaps of maximum of 90 days and list them in single row showing date range grouped by id, so that the end result is like:

1       24/05/2010  24/05/2010
10      18/06/2012  21/06/2012
10      21/02/2014  21/02/2014
10      04/07/2014  04/07/2014
10      12/12/2016  12/12/2016
1004    18/07/2016  18/07/2016
1007    01/01/2017  03/02/2017
1007    31/08/2017  31/08/2017

Upvotes: 0

Views: 52

Answers (2)

Radagast81
Radagast81

Reputation: 3016

Ok and here is another solution with just using LAG and LEAD instead of recursive query:

WITH prv AS (SELECT id, startdate, lag(startdate) over (PARTITION BY id ORDER BY startdate) prev_date
             FROM tt)
   , NXT AS (SELECT id, startdate, lead(startdate) over (PARTITION BY id ORDER BY startdate) next_start
            FROM prv
           WHERE prev_date IS NULL OR prev_date < startdate - 90)
SELECT id,startdate
     , NVL((SELECT MAX(startdate) 
              FROM tt
             WHERE tt.id = nxt.id
               AND tt.startdate BETWEEN nxt.startdate AND nxt.next_start - 1)
          , startdate) enddate
  FROM nxt

Upvotes: 1

Radagast81
Radagast81

Reputation: 3016

I think you have to use a recursive query to solve your problem. Here is what I would use:

WITH ord AS (SELECT id, startdate, ROW_NUMBER() over(PARTITION BY ID ORDER BY startdate) ord FROM tt) -- Connect every record with a number
   , rek(ord, id, startdate, enddate) AS 
         (SELECT ord, id, startdate, startdate FROM ord WHERE ord = 1 -- Take the first record of every group
          UNION ALL
          -- Then recursively take the next record
          SELECT rek.ord + 1
               , ord.id
               -- If the distance between old enddate and new startdate is <= 90 keep the startdate from the previous record otherwise take new one
               , CASE WHEN rek.enddate + 90 => ord.startdate
                      THEN rek.startdate
                      ELSE ord.startdate
                END
               , ord.startdate
            FROM rek
            JOIN ORD
              ON ord.ord = rek.ord+1
              AND ord.id = rek.id)
-- Cumulate data and keep only one record per id, startdate combination
SELECT id, startdate, MAX(enddate) enddate
  FROM rek
GROUP BY id, startdate
ORDER BY id, startdate

Upvotes: 0

Related Questions