Reputation: 27
I have table rows like this.
status start end
32 1/1/2017 1/2/2017
32 1/2/2017 4/2/2017
1 4/2/2017 6/3/2017
1 6/3/2017 9/5/2017
32 9/5/2017 19/5/2017
32 19/5/2017 22/6/2017
And I wanna group rows to
status start end
32 1/1/2017 4/2/2017
1 4/2/2017 9/5/2017
32 9/5/2017 22/6/2017
How can I do using SQL?
thank you for all help.
Upvotes: 0
Views: 1149
Reputation: 18
you may try this:
CREATE TABLE #STATUS
(
[STATUS] INT,
[START] DATE,
[END] DATE
)
INSERT INTO #STATUS
(
[STATUS], [START], [END]
)
VALUES
(32, '20170101', '20170201'),
(32, '20170201', '20170204'),
(1, '20170204', '20170306'),
(1, '20170306', '20170509'),
(32, '20170509', '20170519'),
(32, '20170519', '20170622')
SELECT
A.STATUS
,A.[START]
,B.[END]
FROM #STATUS A
LEFT JOIN #STATUS B
ON A.[STATUS]=B.[STATUS]
AND A.[END]=B.[START]
WHERE B.STATUS IS NOT NULL
Upvotes: 0
Reputation: 8881
I don't think you can easily do this one in one step. Maybe if you resort to some ugly recursive CTE or a very long chain of CTEs or nested sub-queries. Basically you need to reconfigure your dataset so you can tell the beginning and end of a period.
Assumptions:
I'm going to go with SQL-Server syntax because it's what I'm most comfortable with, but these operations should be something you could do in most sql environments with a little modification. (I'm using a temp table and CTE's, but you could use sub-queries)
create table dbo.[Status] (
[status] int,
[start] date,
[end] date
)
insert into dbo.[Status] ([status], [start], [end])
values
(32, '20170101', '20170201'),
(32, '20170201', '20170204'),
(1, '20170204', '20170306'),
(1, '20170306', '20170509'),
(32, '20170509', '20170519'),
(32, '20170519', '20170622')
create table dbo.Result (
PeriodID int identity, -- to make grouping and self-referential joins easier
Status int,
start date,
next_start date null,
[end] date null
)
select * from dbo.[Status]
-- This will get you all the periods and their start dates
;with cteExcludeTheseRows(status, start) as (
-- These are the records where the Start date matches an end date for the same status group. As a result these aren't real periods, just extensions.
select S.status, S.start
from [Status] S
join [Status] Prior on S.Status = Prior.status and S.start = Prior.[end]
)
insert into dbo.Result (status, start)
select
S.status,
S.start
from [Status] S
left join cteExcludetheserows Exclude on S.status = Exclude.status and S.start = Exclude.start
where Exclude.status is null
-- Reference the temp table to get the next start period for your status groups, that way you know that the end date for that period has to be less then that date
;with cteNextStart (PeriodID, next_start) as (
select
R.PeriodID,
next_start = min(next.start)
from dbo.Result R
join dbo.Result next on R.status = next.status and r.start < next.start
group by R.PeriodID
)
update R
set R.next_start = next.next_start
from dbo.Result R
join cteNextStart next on R.PeriodID = next.PeriodID
-- Now get the end date for each period by looking back at your main status table
;with cteEnd (PeriodID, [end]) as (
select
R.PeriodID,
[end] = max(s.[end])
from dbo.Result R
join [Status] S on R.status = s.status and S.[end] between R.start and isnull(R.next_start, '99991231')
group by R.PeriodID
)
update R
set R.[end] = [end].[end]
from dbo.Result R
join cteEnd [end] on R.PeriodID = [end].PeriodID
-- And finally, here you have your result set
select
status,
start,
[end]
from dbo.Result
order by start, status
drop table dbo.[Status]
drop table dbo.Result
Upvotes: 2
Reputation: 39
see also demo
SELECT * FROM aaa a
where a.sstatus != (select top 1 b.sstatus from aaa b
where b.start_date < a.start_date
order by b.start_date desc);
Upvotes: 1