Reputation: 1745
I have some sample data created via the below query.
drop table if exists #test;
create table #test (id char(4), product char(1), begin_date date, end_date date);
insert into #test (id, product, begin_date, end_date) values('0001', 'T','2009-02-26','2010-02-26');
insert into #test (id, product, begin_date, end_date) values('0001', 'T','2011-02-26','2012-02-26');
insert into #test (id, product, begin_date, end_date) values('0001', 'T','2012-02-26','2013-02-26');
insert into #test (id, product, begin_date, end_date) values('0002', 'T','2013-02-26','2014-02-26');
insert into #test (id, product, begin_date, end_date) values('0002', 'T','2014-02-26','2015-01-02');
insert into #test (id, product, begin_date, end_date) values('0002', 'T','2015-01-02','2015-08-01');
insert into #test (id, product, begin_date, end_date) values('0002', 'T','2015-08-01','2016-08-01');
insert into #test (id, product, begin_date, end_date) values('0002', 'T','2016-08-01','2017-08-01');
insert into #test (id, product, begin_date, end_date) values('0002', 'T','2017-08-01','2018-08-01');
insert into #test (id, product, begin_date, end_date) values('0002', 'T','2019-08-01','2020-08-01');
select
*,
case
when lead(begin_date,1) over (partition by id, product order by id, begin_date) = end_date then 1
else 0
end as time_connects_flag
from
#test;
I would like to take this result set and identify the groups based on whenever the time_connects_flag
changes from the prior row. The time_connects_flag
is identifying if the current rows end_date
aligns with the next row's begin_date
partitioned by the id
and product
. Essentially if the bit switches from 0 to 1 or 1 to 0 or 0 to 0, I want to create a unique grouping for the current row in an effort to partition the whole data set into groupings.
I'd like to arrive at a programmatic solution that gives the following tabular result. The naming convention of 'grp' does not matter as long as it is in a way uniquely identifying this section of the data.
Upvotes: 0
Views: 59
Reputation: 5139
We can convert to daily rows, and apply 'islands' treatment; but I don't think this will perform well with large volumes. I am giving it here just to give you an idea (and seeing that there are no other ansers so far):
with test as (
select
*,
case
when lead(begin_date,1) over (partition by id, product order by id, begin_date) = end_date then 1
else 0
end as time_connects_flag
, NextBeginDate=lead(begin_date) over (order by begin_date)
, Gap=datediff(day,end_date, lead(begin_date) over (partition by id, product order by begin_date))
from
#test
)
, Daily as (
select id, product, begin_date, end_date, time_connects_flag, this_date=begin_date
from test a
union all
select a.id, a.product, a.begin_date, a.end_date, time_connects_flag, this_date=dateadd(day,1,a.this_date)
from Daily a
where a.this_date<a.end_date
), AssgnGrp as (
select time_connects_flag, this_date
, [Grp] = DATEADD(DAY,-1 * (DENSE_RANK() OVER (partition by id, product, time_connects_flag ORDER BY [this_date])-1), [this_date])
from Daily
), Grps as (
select time_connects_flag, grp, begin_date=min(this_date), end_date=max(this_date)
from AssgnGrp
group by time_connects_flag, grp
)
select t.id, t.product, t.begin_date, t.end_date, t.time_connects_flag, grp
from Grps g
inner join
Daily t
on t.this_date between g.begin_date and g.end_date and t.time_connects_flag=g.time_connects_flag
group by t.id, t.product, t.begin_date, t.end_date, t.time_connects_flag, grp
--order by 3
OPTION (MAXRECURSION 0);
Grp is the date of the earliest row in the group. you can assign incremental values, if you wish.
Upvotes: 1