Reputation: 13
I'm trying to create a sequential numeric flag upon a change in section.
The flag should be 1 when a student joins a section and continue to be 1 until a change in section. The flag should be 2 for the first change, 3 for the 2nd change and so forth.
Since a section can repeat after a change, I'm finding it challenging to create a desired outcome.
Any help would be greatly appreciated.
Sample data
create table dbo.cluster_test
(student_id int not null
,record_date date not null
,section varchar(30) null)
insert into cluster_test
(student_id, record_date, section)
values
(123, '2020-02-06', NULL)
,(123, '2020-05-14', 'A')
,(123, '2020-08-12', 'A')
,(123, '2020-09-01', 'B')
,(123, '2020-09-15', 'A')
,(123, '2020-09-29', 'A')
,(123, '2020-11-02', NULL)
,(123, '2020-11-30', NULL)
,(789, '2020-01-12', NULL)
,(789, '2020-04-12', 'A')
,(789, '2020-05-03', NULL)
,(789, '2020-06-13', 'A')
,(789, '2020-06-30', 'B')
,(789, '2020-07-01', 'B')
,(789, '2020-07-22', 'A')
Desired result
student_id | record_date | section | flag |
---|---|---|---|
123 | 2020-02-06 | NULL | NULL |
123 | 2020-05-14 | A | 1 |
123 | 2020-08-12 | A | 1 |
123 | 2020-09-01 | B | 2 |
123 | 2020-09-15 | A | 3 |
123 | 2020-09-29 | A | 3 |
123 | 2020-11-02 | NULL | NULL |
123 | 2020-11-30 | NULL | NULL |
789 | 2020-01-12 | NULL | NULL |
789 | 2020-04-12 | A | 1 |
789 | 2020-05-03 | NULL | NULL |
789 | 2020-06-13 | A | 2 |
789 | 2020-06-30 | B | 3 |
789 | 2020-07-01 | B | 3 |
789 | 2020-07-22 | A | 4 |
Attempt:
select
student_id
,record_date
,section
,case when section is not null then row_number() over(partition by student_id, section order by record_date asc)
end row#
,case when (section is not null) and (lag(section, 1) over(partition by student_id order by record_date asc) is null) then 'start'
when (lag(section, 1) over(partition by student_id order by record_date asc) is not null) and (section != lag(section, 1) over(partition by student_id order by record_date asc)) then 'change'
end chk_txt
,case when section is not null then (case when (section is not null) and (lag(section, 1) over(partition by student_id order by record_date asc) is null) then 1
when (lag(section, 1) over(partition by student_id order by record_date asc) is not null) and (section != lag(section, 1) over(partition by student_id order by record_date asc)) then 1
else 0
end)
end chk_val2
from cluster_test
order by 1, 2
Upvotes: 1
Views: 188
Reputation: 35930
It is gap and island problem. You can use analytical function as follows:
Select student_id, record_date, section,
Case when section is not null
Then sum(case when section is not null and (section <> lgs or lgs is null) then 1 end)
over (partition by student_id order by record_date)
End as flag
From (
Select student_id, record_date, section,
Lag(section) over (partition by student_id order by record_date) as lgs
From cluster_test t
) t
order by student_id, record_date;
Upvotes: 1
Reputation: 13009
You can go for multiple CTEs and get data as given below:
with cte_studentSection as (
SELECT student_id, record_Date, section
, lead(section) over(partition by student_id order by record_date) as nextSection
, row_number() over (partition by student_id order by record_date) as rnk
FROM dbo.Cluster_test
where section is not null
), cte_studentSectionFlag as (
SELECT Student_id, record_date, section, rnk, 1 as flag
from cte_studentSection as oc
where record_date = (SELECT MIN(record_Date) from cte_studentSection where student_id = oc.student_id)
union all
SELECT oc.Student_id, oc.record_date, oc.section,oc.rnk, case when oc.section = cte.section then cte.flag else cte.flag + 1 end
from cte_studentSection as oc
inner join cte_studentSectionFlag as cte on cte.rnk + 1 = oc.rnk and oc.student_id = cte.student_id
)
select student_id, record_date, section, flag
from cte_studentsectionflag
union all
select student_id, record_date, section, null as flag
from dbo.Cluster_test
where section is null
order by student_id, record_date;
student_id | record_date | section | flag |
---|---|---|---|
123 | 2020-02-06 | NULL | NULL |
123 | 2020-05-14 | A | 1 |
123 | 2020-08-12 | A | 1 |
123 | 2020-09-01 | B | 2 |
123 | 2020-09-15 | A | 3 |
123 | 2020-09-29 | A | 3 |
123 | 2020-11-02 | NULL | NULL |
123 | 2020-11-30 | NULL | NULL |
789 | 2020-01-12 | NULL | NULL |
789 | 2020-04-12 | A | 1 |
789 | 2020-05-03 | NULL | NULL |
789 | 2020-06-13 | A | 1 |
789 | 2020-06-30 | B | 2 |
789 | 2020-07-01 | B | 2 |
789 | 2020-07-22 | A | 3 |
Upvotes: 0