Coldchain9
Coldchain9

Reputation: 1745

Dividing an Ordered Set of Data into groups by indicator in SQL Server

I have some sample data created via the below query.

drop table if exists #test;
create table #test (id char(4), product char(1), begin_date date, end_date date);
insert into #test (id, product, begin_date, end_date) values('0001', 'T','2009-02-26','2010-02-26');
insert into #test (id, product, begin_date, end_date) values('0001', 'T','2011-02-26','2012-02-26');
insert into #test (id, product, begin_date, end_date) values('0001', 'T','2012-02-26','2013-02-26');
insert into #test (id, product, begin_date, end_date) values('0002', 'T','2013-02-26','2014-02-26');
insert into #test (id, product, begin_date, end_date) values('0002', 'T','2014-02-26','2015-01-02');
insert into #test (id, product, begin_date, end_date) values('0002', 'T','2015-01-02','2015-08-01');
insert into #test (id, product, begin_date, end_date) values('0002', 'T','2015-08-01','2016-08-01');
insert into #test (id, product, begin_date, end_date) values('0002', 'T','2016-08-01','2017-08-01');
insert into #test (id, product, begin_date, end_date) values('0002', 'T','2017-08-01','2018-08-01');
insert into #test (id, product, begin_date, end_date) values('0002', 'T','2019-08-01','2020-08-01');

select
    *,
    case 
        when lead(begin_date,1) over (partition by id, product order by id, begin_date) = end_date then 1
        else 0
    end as time_connects_flag
from
    #test;

Current Result

I would like to take this result set and identify the groups based on whenever the time_connects_flag changes from the prior row. The time_connects_flag is identifying if the current rows end_date aligns with the next row's begin_date partitioned by the id and product. Essentially if the bit switches from 0 to 1 or 1 to 0 or 0 to 0, I want to create a unique grouping for the current row in an effort to partition the whole data set into groupings.

I'd like to arrive at a programmatic solution that gives the following tabular result. The naming convention of 'grp' does not matter as long as it is in a way uniquely identifying this section of the data.

Desired Result

Upvotes: 0

Views: 59

Answers (1)

tinazmu
tinazmu

Reputation: 5139

We can convert to daily rows, and apply 'islands' treatment; but I don't think this will perform well with large volumes. I am giving it here just to give you an idea (and seeing that there are no other ansers so far):

with test as (
    select
        *,
        case 
            when lead(begin_date,1) over (partition by id, product order by id, begin_date) = end_date then 1
            else 0
        end as time_connects_flag
        , NextBeginDate=lead(begin_date) over (order by begin_date)
        , Gap=datediff(day,end_date, lead(begin_date) over (partition by id, product order by begin_date))
    from
        #test
)
, Daily as (
    select id, product, begin_date, end_date, time_connects_flag, this_date=begin_date
    from test a
    union all
    select a.id, a.product, a.begin_date, a.end_date, time_connects_flag, this_date=dateadd(day,1,a.this_date)
    from Daily a
    where a.this_date<a.end_date
), AssgnGrp as (
    select time_connects_flag, this_date
    ,  [Grp] = DATEADD(DAY,-1 * (DENSE_RANK() OVER (partition by id, product, time_connects_flag ORDER BY [this_date])-1), [this_date])
    from Daily
), Grps as (
    select time_connects_flag, grp, begin_date=min(this_date), end_date=max(this_date)
    from AssgnGrp
    group by time_connects_flag, grp
)
select t.id, t.product, t.begin_date, t.end_date, t.time_connects_flag, grp
from Grps g
     inner join
     Daily t
     on t.this_date between g.begin_date and g.end_date and t.time_connects_flag=g.time_connects_flag
group by t.id, t.product, t.begin_date, t.end_date, t.time_connects_flag, grp
    --order by 3
    OPTION (MAXRECURSION 0);

Grp is the date of the earliest row in the group. you can assign incremental values, if you wish.

Upvotes: 1

Related Questions