Reputation: 731
How would I "pause" a customer's rank if the difference in hours vs their previous booking is less than 24? i.e. a conditional dense_rank()
with tmp_table as (
select 532 as customer_id, '2020-01-09 18:14:47'::timestamp as log_date union all
select 532 as customer_id, '2020-01-09 18:22:13'::timestamp as log_date union all
select 532 as customer_id, '2020-01-09 18:26:07'::timestamp as log_date union all
--
select 981 as customer_id, '2020-01-09 16:53:52'::timestamp as log_date union all
select 981 as customer_id, '2020-01-09 17:08:28'::timestamp as log_date union all
select 981 as customer_id, '2020-01-11 17:25:24'::timestamp as log_date union all
select 981 as customer_id, '2020-01-14 17:32:10'::timestamp as log_date union all
select 981 as customer_id, '2020-01-14 21:49:12'::timestamp as log_date union all
select 981 as customer_id, '2020-01-17 19:15:22'::timestamp as log_date union all
--
select 1983 as customer_id, '2019-11-05 10:30:58'::timestamp as log_date union all
select 1983 as customer_id, '2019-12-02 13:58:52'::timestamp as log_date union all
select 1983 as customer_id, '2019-12-05 17:25:01'::timestamp as log_date union all
select 1983 as customer_id, '2019-12-11 13:58:24'::timestamp as log_date union all
select 1983 as customer_id, '2019-12-12 14:15:25'::timestamp as log_date
)
select *
, datediff(hour, lag(log_date, 1) over (partition by customer_id order by log_date), log_date) as hour_diff
, rank() over (partition by customer_id order by log_date) as rnk
from tmp_table
order by customer_id, log_date
;
output is this:
customer_id|log_date |hour_diff|rnk|
-----------|-------------------|---------|---|
532|2020-01-09 18:14:47| | 1|
532|2020-01-09 18:22:13| 0| 2|
532|2020-01-09 18:26:07| 0| 3|
981|2020-01-09 16:53:52| | 1|
981|2020-01-09 17:08:28| 1| 2|
981|2020-01-11 17:25:24| 48| 3|
981|2020-01-14 17:32:10| 72| 4|
981|2020-01-14 21:49:12| 4| 5|
981|2020-01-17 19:15:22| 70| 6|
1983|2019-11-05 10:30:58| | 1|
1983|2019-12-02 13:58:52| 651| 2|
1983|2019-12-05 17:25:01| 76| 3|
1983|2019-12-11 13:58:24| 140| 4|
1983|2019-12-12 14:15:25| 25| 5|
and I want it to look like this:
customer_id|log_date |hour_diff|rnk|
-----------|-------------------|---------|---|
532|2020-01-09 18:14:47| | 1|
532|2020-01-09 18:22:13| 0| 1|
532|2020-01-09 18:26:07| 0| 1|
981|2020-01-09 16:53:52| | 1|
981|2020-01-09 17:08:28| 1| 1|
981|2020-01-11 17:25:24| 48| 2|
981|2020-01-14 17:32:10| 72| 3|
981|2020-01-14 21:49:12| 4| 3|
981|2020-01-17 19:15:22| 70| 4|
1983|2019-11-05 10:30:58| | 1|
1983|2019-12-02 13:58:52| 651| 2|
1983|2019-12-05 17:25:01| 76| 3|
1983|2019-12-11 13:58:24| 140| 4|
1983|2019-12-12 14:15:25| 25| 5|
i.e. if hour_diff > 24
then rank() over (partition by customer_id order by log_date)
else "pause rank"
what I tried to do is set a case statement where if the hour_diff
< 24 then take the lag()
of the log_date else take the log_date and then do a dense_rank()
on this new log_date
the problem I encountered with that is that if a customer does multiple bookings within 24 hours then it won't really "pause" the rank number
Upvotes: 1
Views: 126
Reputation: 1269673
You can use lag()
to determine where the value should increment. Then use a cumulative sum:
select t.*,
sum( case when prev_log_date > log_date - interval '24 hour' then 0 else 1 end ) over (partition by customer_id order by log_date) as rnk
from (select t.*,
lag(log_date) over (partition by customer_id order by log_date) as prev_log_date
from tmp_table t
) t;
Upvotes: 1