Reputation: 39
I'm building a query in order to build consecutive dates which is
create table #consecutivedates (
sgid nvarchar(max),
metric nvarchar(max),
targetvalue nvarchar(max),
startdate datetime,
enddate datetime
)
insert into #consecutivedates values
('2177', '515818', '18', '2019-09-01', '2019-09-30'),
('2177', '515818', '125', '2019-08-01', '2019-08-31'),
('2177', '515818', '15', '2019-07-01', '2019-07-31')
SELECT sgid,metric, CAST(startdate AS DATE) startdate, CAST(enddate AS DATE) enddate,
ROW_NUMBER() OVER ( ORDER BY sgid, metric, startdate ) rn
INTO #temp
FROM #consecutivedates
-- GroupingColumn in cte used to identify and group consecutive dates
;WITH cte
AS ( SELECT sgid ,
metric ,
startdate ,
enddate ,
1 AS GroupingColumn ,
rn
FROM #temp
WHERE rn = 1
UNION ALL
SELECT t2.sgid ,
t2.metric,
t2.startdate,
t2.enddate ,
CASE WHEN t2.startdate = DATEADD(day, 1, cte.enddate)
AND cte.sgid = t2.sgid AND cte.metric=t2.metric
THEN cte.GroupingColumn
ELSE cte.GroupingColumn + 1
END AS GroupingColumn ,
t2.rn
FROM #temp t2
INNER JOIN cte ON t2.rn = cte.rn + 1
)
SELECT sgid,metric, MIN(startdate) AS startdate, MAX(enddate) AS enddate
FROM cte
GROUP BY sgid,metric, GroupingColumn
DROP TABLE #temp
DROP TABLE #consecutivedates
but I'm facing a problem with two things.
query is too expensive when files are big.
if the dates are something like
startdate enddate '2019-08-01' '2019-09-30' '2019-10-01' '2019-10-31'
Then the query will not group the dates and I need it to be smart enough to solve this kind of cases.
Any help would be nice,
Thanks.
Upvotes: 1
Views: 812
Reputation: 48769
Window frames could be more performant, but you'll need to test it. Here's the alternative solution:
select sgid, metric, min(startdate) as mindate, max(enddate) as maxdate
from (
select *,
sum(case when startdate > dateadd(day, 1, prev_enddate) then 1 else 0 end)
over(partition by sgid, metric order by startdate) as grp
from (
select
sgid, metric, startdate, enddate,
max(enddate)
over(partition by sgid, metric order by startdate
rows between unbounded preceding and 1 preceding) as prev_enddate
from #consecutivedates
) x
) y
group by sgid, metric, grp
Result (with more comprehensive data script):
sgid metric mindate maxdate
---- ------ --------------------- ---------------------
2177 515818 2019-03-01 00:00:00.0 2019-03-31 00:00:00.0
2177 515818 2019-07-01 00:00:00.0 2019-09-30 00:00:00.0
2177 515818 2019-11-01 00:00:00.0 2019-11-30 00:00:00.0
2177 515820 2019-10-01 00:00:00.0 2019-10-31 00:00:00.0
Here's the data script I used:
create table #consecutivedates (sgid nvarchar(max), metric nvarchar(max),
targetvalue nvarchar(max), startdate datetime, enddate datetime);
insert into #consecutivedates values
('2177', '515818', '18', '2019-09-01', '2019-09-30'),
('2177', '515818', '125', '2019-08-01', '2019-08-31'),
('2177', '515818', '15', '2019-07-01', '2019-07-31'),
('2177', '515820', '15', '2019-10-01', '2019-10-31'),
('2177', '515818', '15', '2019-03-01', '2019-03-31'),
('2177', '515818', '15', '2019-11-01', '2019-11-30')
Upvotes: 0
Reputation: 222442
I undersand that you want to group together records that have the same sgid
and metric
and that are adjacent (ie next record starts one day after the end of the current record).
Here is a gaps and island solution that uses window sum to define the groups:
select sgid, metric, min(startdate) startdate, max(enddate) enddate
from (
select
t.*,
sum(case when startdate = dateadd(day, 1, lag_enddate) then 0 else 1 end)
over(partition by sgid, metric order by startdate) grp
from (
select
t.*,
lag(enddate) over(partition by sgid, metric order by startdate) lag_enddate
from #consecutivedates t
) t
) t
group by sgid, metric, grp
For your sample data, where all three records are adjacent, this produces:
sgid | metric | startdate | enddate :--- | :----- | :---------------------- | :---------------------- 2177 | 515818 | 2019-07-01 00:00:00.000 | 2019-09-30 00:00:00.000
Note that the query uses SQL Server date functions (which I suspect that you are using): alternatives exists in other databases.
Upvotes: 2