Reputation: 135
Input data
date | number |
---|---|
2024-11-02 | 1000 |
2024-11-03 | 500 |
2024-11-05 | 1000 |
2024-11-06 | 1000 |
2024-11-07 | 1000 |
2024-11-08 | 500 |
2024-11-14 | 1000 |
2024-11-15 | 1000 |
for a given date I want to get the streak (dates backward in a row where number is above/equals a certain threshold => in the examples always 1000)
Examples:
If I search using a date which doesn't appear in the data (for example, 2014-11-12), I want the result to be 0.
I would greatly appreciate your help. Thank you so much!
I have another example:
Input data
date | number |
---|---|
2024-12-10 | 1000 |
2024-12-13 | 1000 |
2024-12-14 | 1000 |
query for date 2024-12-14 should return 2 (count 14. 13.) There is a gap between 13. and 10. So the 10. should not count.
Upvotes: 1
Views: 124
Reputation: 3645
Streak
CTE will assign a row_number to all the dates where number is >= 1000.SELF JOINED
to find the dates which are consecutive and the number of days are counted based on difference of row numbers.EDIT
WITH streaks AS (
SELECT
t.date,
t.number,
ROW_NUMBER() OVER (ORDER BY t.date) AS rn
FROM test t
WHERE t.number >= 1000
),
consecutive_dates AS (
SELECT
s1.date,
COUNT(*) AS streak_length
FROM streaks s1
LEFT JOIN streaks s2
ON s2.rn <= s1.rn AND s2.date = s1.date - INTERVAL '1 day' * (s1.rn - s2.rn)
GROUP BY s1.date
)
SELECT
i.date,
COALESCE(cd.streak_length, 0) AS streak_length
FROM (SELECT '2024-11-13'::DATE AS date) i
LEFT JOIN consecutive_dates cd ON i.date = cd.date;
Output
date | streak_length |
---|---|
2024-11-13 | 0 |
EDIT : DATE contains timestamp, we just use the date portion from the timestamp.
WITH streaks AS (
SELECT
t.date::DATE AS date,
t.number,
ROW_NUMBER() OVER (ORDER BY t.date::DATE) AS rn
FROM test1 t
WHERE t.number >= 1000
),
consecutive_dates AS (
SELECT
s1.date,
COUNT(*) AS streak_length
FROM streaks s1
LEFT JOIN streaks s2
ON s2.rn <= s1.rn
AND s2.date = s1.date - INTERVAL '1 day' * (s1.rn - s2.rn)
GROUP BY s1.date
)
SELECT
i.date,
COALESCE(cd.streak_length, 0) AS streak_length
FROM (SELECT '2024-11-15'::DATE AS date) i
LEFT JOIN consecutive_dates cd ON i.date = cd.date;
Upvotes: 1
Reputation: 5986
This is one of cases of gaps and islands
.
select date,number
,sum(case when number>=1000 then 1 else 0 end)over(partition by grn order by date) result
from(
select *,sum(case when number<1000 then 1 else 0 end)over(order by date) grn
from test
)a
date | number | result |
---|---|---|
2024-11-02 | 1000 | 1 |
2024-11-03 | 500 | 0 |
2024-11-05 | 1000 | 1 |
2024-11-06 | 1000 | 2 |
2024-11-07 | 1000 | 3 |
2024-11-08 | 500 | 0 |
2024-11-14 | 1000 | 1 |
2024-11-15 | 1000 | 2 |
If you want to get a result for a specific (given) date, you can do this
select coalesce(max(date),cast('2024-11-04' as date)) date,count(*) cnt
from test
where date <= '2024-11-04'
and date > (select max(date) from test
where date <= '2024-11-04' and number < 1000)
date | cnt |
---|---|
2024-11-04 | 0 |
When no row in table for given date, query returns cnt=0.
For existing date
select coalesce(max(date),cast('2024-11-07' as date)) date,count(*)
from test
where date <= '2024-11-07'
and date > (select max(date) from test
where date <= '2024-11-07' and number < 1000)
Output is
date | cnt |
---|---|
2024-11-07 | 3 |
UPDATE1
Case when num<1000 is gap and missing date is also gap
Test data
date | number |
---|---|
2024-11-02 | 1000 |
2024-11-03 | 500 |
2024-11-05 | 1000 |
2024-11-06 | 1000 |
2024-11-07 | 1000 |
2024-11-08 | 500 |
2024-11-14 | 1000 |
2024-11-15 | 1000 |
2024-12-10 | 1000 |
2024-12-13 | 1000 |
2024-12-14 | 1000 |
Query for given date (paramDate)
with params as(select cast('2024-12-16' as date) paramDate)
select paramDate date,number, coalesce(result,0)result
from params p
left join (
select date,number
,sum(case when number>=1000 then 1 else 0 end)over(partition by grn order by date) result
,grn
from(
select *
,sum(case when number<1000 then 1
when (date-interval '1 day')<>prevDate then 1
else 0 end)
over(order by date) grn
from(
select *,lag(date,1,date)over(order by date) prevDate
from test
)a
)b
)c on p.paramDate=c.date
date | number | result |
---|---|---|
2024-12-12 | null | 0 |
select date,number
,sum(case when number>=1000 then 1 else 0 end)over(partition by grn order by date) result
,grn
from(
select *
,sum(case when number<1000 then 1
when (date-interval '1 day')<>prevDate then 1
else 0 end)
over(order by date) grn
from(
select *,lag(date,1,date)over(order by date) prevDate
from test
)a
)b
date | number | result | grn |
---|---|---|---|
2024-11-02 | 1000 | 1 | 1 |
2024-11-03 | 500 | 0 | 2 |
2024-11-05 | 1000 | 1 | 3 |
2024-11-06 | 1000 | 2 | 3 |
2024-11-07 | 1000 | 3 | 3 |
2024-11-08 | 500 | 0 | 4 |
2024-11-14 | 1000 | 1 | 5 |
2024-11-15 | 1000 | 2 | 5 |
2024-12-10 | 1000 | 1 | 6 |
2024-12-13 | 1000 | 1 | 7 |
2024-12-14 | 1000 | 2 | 7 |
Upvotes: 1
Reputation: 25632
This finds the number of rows that have a date less than or equal to a certain date and have dates later than the previous day where the number was below the threshold
select count(*)
from table
where date <= '2024-11-15'
and date > (select max(date) from table
where date <= '2024-11-15' and number < 1000)
Upvotes: 0
Reputation: 4670
With classical gaps and islands technique:
select date, number,
case when grp is not null then row_number()
over(partition by grp order by date)
else 0 end as streak_length
from (
select date, number, case when grpstart is not null then
sum(grpstart) over(order by date) end as grp
from (
select
date, number,
case when number >= 1000 then
case when (lag(number) over(order by date) < 1000) then 1 else 0 end
end as grpstart
from test
)
)
order by date
Upvotes: 0
Reputation: 31
Most simple is to use recursivity :
WITH RECURSIVE streak AS (
SELECT
d.date,
d.number,
1 AS streak_counter
FROM data d
WHERE d.date = '2024-11-07'. --> Your date here
UNION ALL
SELECT
d.date,
d.number,
streak.streak_counter + 1
FROM data d
INNER JOIN streak ON d.date = streak.date - INTERVAL '1 day'
WHERE d.number >= 1000
)
SELECT MAX(streak_counter) FROM streak;
Upvotes: 0