Reputation: 1903
I have the below sample of my table, that has thousands of ids and a row for each day for each id, I also have a subscriber_gained and subscriber_lost for each day/id. Is there a way to calculate how many followers I have for each day with this amount if data?
metrics_date | id | subscriber_lost | subscriber_gained |
---|---|---|---|
2022-12-03 | 3343 | 54 | 37 |
2022-12-02 | 3343 | 29 | 27 |
2022-12-03 | 1223 | 44 | 26 |
2022-12-02 | 1223 | 21 | 36 |
I want to have a query that shows the running total for that day for that id:
metrics_date | id | subscriber_lost | subscriber_gained | number_of_visitors |
---|---|---|---|---|
2022-12-03 | 3343 | 54 | 37 | 1209 |
2022-12-02 | 3343 | 29 | 27 | 1226 |
2022-12-03 | 1223 | 44 | 26 | 3521 |
2022-12-02 | 1223 | 21 | 36 | 3539 |
I've tried this query but the total is off :
select
METRICS_DATE,
channel_id,
number_of_visitors,
case
when lag(number_of_visitors) over(order by METRICS_DATE) is null
then number_of_visitors
when lag(number_of_visitors) over(order by METRICS_DATE) < number_of_visitors
then number_of_visitors - lag(number_of_visitors) over(order by METRICS_DATE)
else 0
end subscribers_gained,
case when lag(number_of_visitors) over(order by METRICS_DATE) > number_of_visitors
then lag(number_of_visitors) over(order by METRICS_DATE) - number_of_visitors
else 0
end subscribers_lost
from (
select METRICS_DATE,
channel_id,
count(*) number_of_visitors
from you.p_content_owner_basic_a3_you
where channel_id = '3343'
group by METRICS_DATE,
channel_id
) t
order by METRICS_DATE desc;
Upvotes: 0
Views: 43
Reputation: 25928
so with some data in a CTE for the simplicity of it:
with data(metrics_date, id, subscriber_lost, subscriber_gained) as (
select * from values
('2022-12-03'::date, 3343, 54, 37),
('2022-12-02'::date, 3343, 29, 27),
('2022-12-03'::date, 1223, 44, 26),
('2022-12-02'::date, 1223, 21, 36),
('2022-12-01'::date, 9999, 0, 10),
('2022-12-02'::date, 9999, 5, 10),
('2022-12-03'::date, 9999, 15, 10),
('2022-12-04'::date, 9999, 10, 10)
)
what you are want is to subtract the two window frame SUM's of the two partials:
select
d.*
,sum(d.subscriber_gained) over ( partition by d.id order by d.metrics_date) -
sum(d.subscriber_lost) over ( partition by d.id order by d.metrics_date) as number_of_visitors
from data as d
order by 2,1;
which can also be express as the sum of the difference.
select
d.*
,sum(d.subscriber_gained - d.subscriber_lost) over ( partition by d.id order by d.metrics_date) as number_of_visitors
from data as d
order by 2,1;
METRICS_DATE | ID | SUBSCRIBER_LOST | SUBSCRIBER_GAINED | NUMBER_OF_VISITORS |
---|---|---|---|---|
2022-12-02 | 1223 | 21 | 36 | 15 |
2022-12-03 | 1223 | 44 | 26 | -3 |
2022-12-02 | 3343 | 29 | 27 | -2 |
2022-12-03 | 3343 | 54 | 37 | -19 |
2022-12-01 | 9999 | 0 | 10 | 10 |
2022-12-02 | 9999 | 5 | 10 | 15 |
2022-12-03 | 9999 | 15 | 10 | 10 |
2022-12-04 | 9999 | 10 | 10 | 10 |
select
d.*
,d.subscriber_gained - d.subscriber_lost as change
,sum(change) over ( partition by d.id order by d.metrics_date) as number_of_visitors
,round(div0(change, number_of_visitors+change) *100,1) as before_percent_change
,round(div0(change, number_of_visitors) *100,1) as after_percent_change
from data as d
order by 2,1;
gives:
METRICS_DATE | ID | SUBSCRIBER_LOST | SUBSCRIBER_GAINED | CHANGE | NUMBER_OF_VISITORS | BEFORE_PERCENT_CHANGE | AFTER_PERCENT_CHANGE |
---|---|---|---|---|---|---|---|
2022-12-02 | 1223 | 21 | 36 | 15 | 15 | 50 | 100 |
2022-12-03 | 1223 | 44 | 26 | -18 | -3 | 85.7 | 600 |
2022-12-02 | 3343 | 29 | 27 | -2 | -2 | 50 | 100 |
2022-12-03 | 3343 | 54 | 37 | -17 | -19 | 47.2 | 89.5 |
2022-12-01 | 9999 | 0 | 10 | 10 | 10 | 50 | 100 |
2022-12-02 | 9999 | 5 | 10 | 5 | 15 | 25 | 33.3 |
2022-12-03 | 9999 | 15 | 10 | -5 | 10 | -100 | -50 |
2022-12-04 | 9999 | 10 | 10 | 0 | 10 | 0 | 0 |
Upvotes: 1