Reputation: 696
I would like to count all unique customers that were active on 2019-01-01 with the condition that they also were active in the subsequent 3 days.
Main table
date customer_id time_spent_online_min
2019-01-01 1 5
2019-01-01 2 6
2019-01-01 3 4
2019-01-02 1 7
2019-01-02 2 5
2019-01-03 3 3
2019-01-04 1 4
2019-01-04 2 6
Output table
date total_active_customers
2019-01-01 2
This is what I have tried so far:
with cte as(
select customer_id
,date
,time_spent_online_min
from main_table
where date between date '2019-01-01' and date '2019-01-04'
and customer_id is not null)
select date
,count(distinct(customer_id)) as total_active_customers
from cte
where date = date '2019-01-01'
group by 1
Upvotes: 0
Views: 325
Reputation: 6741
WITH
-- your input
input(dt,customer_id,time_spent_online_min) AS (
SELECT DATE '2019-01-01',1,5
UNION ALL SELECT DATE '2019-01-01',2,6
UNION ALL SELECT DATE '2019-01-01',3,4
UNION ALL SELECT DATE '2019-01-02',1,7
UNION ALL SELECT DATE '2019-01-02',2,5
UNION ALL SELECT DATE '2019-01-03',3,3
UNION ALL SELECT DATE '2019-01-04',1,4
UNION ALL SELECT DATE '2019-01-04',2,6
)
,
-- count the active days in this row and the following 3 days
count_activity AS (
SELECT
*
, COUNT(customer_id) OVER(
PARTITION BY customer_id ORDER BY dt
RANGE BETWEEN CURRENT ROW AND INTERVAL '3 DAY' FOLLOWING
) AS act_count
FROM input
)
SELECT
dt
, COUNT(*) AS total_active_customers
FROM count_activity
WHERE dt='2019-01-01'
AND act_count > 2
GROUP BY dt
;
-- out dt | total_active_customers
-- out ------------+------------------------
-- out 2019-01-01 | 2
Upvotes: 0
Reputation: 1269923
If you have only one record per day, you can use lead()
:
select date, count(*)
from (select t.*, lead(date, 3) over (partition by customer_id order by date) as date_3
from main_table t
) t
where date = '2019-01-01' and
date_3 = '2019-01-04'
group by date;
If you can have more than one record per day, then aggregate and then use lead()
:
select date, count(*)
from (select t.*, lead(date, 3) over (partition by customer_id order by date) as date_3
from (select customer_id, date, sum(time_spent_online_min) as time_spent_online_min
from maintable t
group by customer_id, date
) t
) t
where date = '2019-01-01' and
date_3 = '2019-01-04'
group by date;
You can also easily expand this to any dates:
select date, count(*)
from (select t.*, lead(date, 3) over (partition by customer_id order by date) as date_3
from main_table t
) t
where date_3 = date + interval '3' day
group by date;
Upvotes: 1
Reputation: 521409
I would use exists logic here:
SELECT COUNT(*)
FROM main_table t1
WHERE
date = '2019-01-01' AND
EXISTS (SELECT 1 FROM main_table t2
WHERE t2.customer_id = t1.customer_id AND t2.date = '2019-01-02') AND
EXISTS (SELECT 1 FROM main_table t2
WHERE t2.customer_id = t1.customer_id AND t2.date = '2019-01-03') AND
EXISTS (SELECT 1 FROM main_table t2
WHERE t2.customer_id = t1.customer_id AND t2.date = '2019-01-04');
This answer assumes that a given customer would only have one record for one date of activity.
Upvotes: 0