Reputation: 409
I have the data for student absence which I got after some transformations. The data is day by day:
WITH datasample AS (
SELECT 1 AS StudentID, 20180101 AS DateID, 0 AS AbsentToday, 0 AS AbsentYesterday UNION ALL
SELECT 1, 20180102, 1, 0 UNION ALL
SELECT 1, 20180103, 1, 1 UNION ALL
SELECT 1, 20180104, 1, 1 UNION ALL
SELECT 1, 20180105, 1, 1 UNION ALL
SELECT 1, 20180106, 0, 1 UNION ALL
SELECT 2, 20180101, 0, 0 UNION ALL
SELECT 2, 20180102, 1, 0 UNION ALL
SELECT 2, 20180103, 1, 1 UNION ALL
SELECT 2, 20180104, 0, 1 UNION ALL
SELECT 2, 20180105, 1, 0 UNION ALL
SELECT 2, 20180106, 1, 1 UNION ALL
SELECT 2, 20180107, 0, 1
)
SELECT *
FROM datasample
ORDER BY StudentID, DateID
I need to add a column (AbsencePeriodInMonth) which would calculate the student's absence period during the month. For example, StudentID=1 was absent in one consecutive period during the month and StudentID=2 had two periods, something like this:
StudentID DateID AbsentToday AbsentYesterday AbsencePeriodInMonth
1 20180101 0 0 0
1 20180102 1 0 1
1 20180103 1 1 1
1 20180104 1 1 1
1 20180105 1 1 1
1 20180106 0 1 0
2 20180101 0 0 0
2 20180102 1 0 1
2 20180103 1 1 1
2 20180104 0 1 0
2 20180105 1 0 2
2 20180106 1 1 2
2 20180107 0 1 0
My goal is actually to calculate the consecutive absent days prior to each day in the fact table, I think I can do it if I get the AbsencePeriodInMonth column, by having this added to my query after the *:
,CASE WHEN AbsentToday = 1 THEN DENSE_RANK() OVER(PARTITION BY StudentID, AbsencePeriodInMonth ORDER BY DateID)
ELSE 0
END AS DaysAbsent
Any idea on how I can add that AbsencePeriodInMonth or maybe calculate the consecutive absent days in some other way?
Upvotes: 1
Views: 70
Reputation: 1269923
You can identify each period by counting the number of 0s before hand. Then you can enumerate them using dense_rank()
.
select ds.*,
(case when absenttoday = 1 then dense_rank() over (partition by studentid order by grp)
else 0
end) as AbsencePeriodInMonth
from (select ds.*, sum(case when absenttoday = 0 then 1 else 0 end) over (partition by studentid order by dateid) as grp
from datasample ds
) ds
order by StudentID, DateID;
Here is a SQL Fiddle.
Upvotes: 1
Reputation: 1845
Using Recursive CTE
and Dense_Rank
WITH datasample AS (
SELECT 1 AS StudentID, 20180101 AS DateID, 0 AS AbsentToday, 0 AS AbsentYesterday UNION ALL
SELECT 1, 20180102, 1, 0 UNION ALL
SELECT 1, 20180103, 1, 1 UNION ALL
SELECT 1, 20180104, 1, 1 UNION ALL
SELECT 1, 20180105, 1, 1 UNION ALL
SELECT 1, 20180106, 0, 1 UNION ALL
SELECT 2, 20180101, 0, 0 UNION ALL
SELECT 2, 20180102, 1, 0 UNION ALL
SELECT 2, 20180103, 1, 1 UNION ALL
SELECT 2, 20180104, 0, 1 UNION ALL
SELECT 2, 20180105, 1, 0 UNION ALL
SELECT 2, 20180106, 1, 1 UNION ALL
SELECT 2, 20180107, 0, 1
), cte as
(Select *,DateID as dd
from datasample
where AbsentToday = 1 and AbsentYesterday = 0
union all
Select d.*, c.dd
from datasample d
join cte c
on d.StudentID = c.StudentID and d.DateID = c.DateID + 1
where d.AbsentToday = 1
), cte1 as
(
Select *, DENSE_RANK() over (partition by StudentId order by dd) as de
from cte
)
Select d.*, IsNull(c.de,0) as AbsencePeriodInMonth
from cte1 c
right join datasample d
on d.StudentID = c.StudentID and c.DateID = d.DateID
order by d.StudentID, d.DateID
Upvotes: 1