Reputation: 49
my data is something like this, timestamp is ordered in asc.
INSERT INTO timeseries (timestamp, value)
VALUES
('2022-01-01 00:00:00', 0.89),
**('2022-01-01 10:01:00', 6.89),
('2022-01-02 10:01:21', 10.99),
('2022-01-02 10:07:00', 11.89),**
('2022-01-02 12:01:00', 0.89),
**('2022-01-02 13:07:00', 6.39),**
('2022-01-02 14:00:00', 0.69),
**('2022-01-03 14:02:00', 5.39),
('2022-01-03 15:04:00', 6.89),
('2022-01-03 15:00:00', 7.3),**
('2022-01-03 15:10:00', 1.89),
('2022-01-03 15:50:00', 0.8);
my ask is to get min and max, so i can produce difference in minutes whenever value is going above 5. in above, there are three data sets formed
min max
2022-01-01 10:01:00 2022-01-02 10:07:00
2022-01-02 13:07:00 2022-01-02 13:07:00
2022-01-03 14:02:00 2022-01-03 15:00:00
I tried row_number to get sequence but since there are three sets there, i need to attribute column so i can use in partition clause but i am not getting any ideas for that?
WITH CTE AS (
SELECT CASE WHEN VALUE>5 THEN 'ON' ELSE 'OFF' END STATUS , TIMESTAMP, VALUE
FROM TIMESERIES)
SELECT ROW_NUMBER() OVER(PARTITION BY STATUS ORDER BY TIMESTAMP) RN,TIMESTAMP,VALUE FROM CTE
ORDER BY TIMESTAMP;
this is giving me row_number for all >5 but I need based on sets of data..
any help to start or a solution is highly appreciated.
BDW, trying this in snowflake but general SQL is also fine.
Upvotes: 1
Views: 730
Reputation: 26120
If we use the nice VALUES provided by shawnt00, in a CTE
with T(ts, val) as (
select * from values
('2022-01-01 00:00:00'::datetime, 0.89),
('2022-01-01 10:01:00'::datetime, 6.89),
('2022-01-02 10:01:21'::datetime, 10.99),
('2022-01-02 10:07:00'::datetime, 11.89),
('2022-01-02 12:01:00'::datetime, 0.89),
('2022-01-02 13:07:00'::datetime, 6.39),
('2022-01-02 14:00:00'::datetime, 0.69),
('2022-01-03 14:02:00'::datetime, 5.39),
('2022-01-03 15:04:00'::datetime, 6.89),
('2022-01-03 15:06:00'::datetime, 7.3),
('2022-01-03 15:10:00'::datetime, 1.89),
('2022-01-03 15:50:00'::datetime, 0.8)
)
we can then use CONDITIONAL_CHANGE_EVENT to find when the conditional "greater than 5" swaps state:
select
ts,
val > 5 as gt_five,
CONDITIONAL_CHANGE_EVENT(gt_five) over(order by ts) as ct
from T
this gives:
TS | GT_FIVE | CT |
---|---|---|
2022-01-01 00:00:00.000 | FALSE | 0 |
2022-01-01 10:01:00.000 | TRUE | 1 |
2022-01-02 10:01:21.000 | TRUE | 1 |
2022-01-02 10:07:00.000 | TRUE | 1 |
2022-01-02 12:01:00.000 | FALSE | 2 |
2022-01-02 13:07:00.000 | TRUE | 3 |
2022-01-02 14:00:00.000 | FALSE | 4 |
2022-01-03 14:02:00.000 | TRUE | 5 |
2022-01-03 15:04:00.000 | TRUE | 5 |
2022-01-03 15:06:00.000 | TRUE | 5 |
2022-01-03 15:10:00.000 | FALSE | 6 |
2022-01-03 15:50:00.000 | FALSE | 6 |
we can use the gt_five
to filter out the unwanted gaps with a QUALIFY, and then group via the output of the CONDITIONAL_CHANGE_EVENT to get the min/max. Thus the sql can be:
WITH flagged_and_stripped as (
select
ts,
val > 5 as gt_five,
CONDITIONAL_CHANGE_EVENT(gt_five) over(order by ts) as ct
from T
QUALIFY gt_five
)
select
min(ts) as min
,max(ts) as max
from flagged_and_stripped
group by ct
order by 1;
giving:
MIN | MAX |
---|---|
2022-01-01 10:01:00.000 | 2022-01-02 10:07:00.000 |
2022-01-02 13:07:00.000 | 2022-01-02 13:07:00.000 |
2022-01-03 14:02:00.000 | 2022-01-03 15:06:00.000 |
thus the complex example code is:
with T(ts, val) as (
select * from values
('2022-01-01 00:00:00'::datetime, 0.89),
('2022-01-01 10:01:00'::datetime, 6.89),
('2022-01-02 10:01:21'::datetime, 10.99),
('2022-01-02 10:07:00'::datetime, 11.89),
('2022-01-02 12:01:00'::datetime, 0.89),
('2022-01-02 13:07:00'::datetime, 6.39),
('2022-01-02 14:00:00'::datetime, 0.69),
('2022-01-03 14:02:00'::datetime, 5.39),
('2022-01-03 15:04:00'::datetime, 6.89),
('2022-01-03 15:06:00'::datetime, 7.3),
('2022-01-03 15:10:00'::datetime, 1.89),
('2022-01-03 15:50:00'::datetime, 0.8)
), flagged_and_stripped as (
select
ts,
val > 5 as gt_five,
CONDITIONAL_CHANGE_EVENT(gt_five) over(order by ts) as ct
from T
QUALIFY gt_five
)
select
min(ts) as min
,max(ts) as max
from flagged_and_stripped
group by ct
order by 1;
Upvotes: 1
Reputation: 17953
Standard gaps and islands:
with flagged as (
select *,
case when
lag(val, 1, 0.0) over (order by ts) <= 5
and val > 5 then 1 end as flag
from T
), grouped as (
select *,
count(flag) over (order by ts) as grp
from flagged
)
select
min(ts),
max(case when val > 5 then ts end)
from grouped
group by grp
having max(val) > 5
order by grp;
Upvotes: 2
Reputation: 616
Heres a solution using Snowflakes semi-structured functionality (arrays):
with ranges as (
select
case when lag(value,1,0.0) over (order by timestamp) < 5 and value >=5 then min(timestamp) end st_time,
case when lead(value,1,0.0) over (order by timestamp) < 5 and value >=5 then max(timestamp) end end_time
from timeseries
group by value, timestamp),
ranges_arrays as (
select
array_agg(st_time) st_time_arr,
array_agg(end_time) end_time_arr,
array_size(st_time_arr) row_count,
array_generate_range( 0 , row_count ) idx_arr
from ranges)
select
st_time_arr[idx]::timestamp st_time,
end_time_arr[idx]::timestamp end_time
from ranges_arrays, (select value::integer idx from ranges_arrays, lateral flatten(input=>idx_arr))
;
Upvotes: 0