Reputation: 2491
I have monthly expenditure data in BigQuery for some customers, with the following structure:
CREATE TABLE if not EXISTS monthly_spend (
user_id int,
transaction_month DATE,
spend float
);
INSERT INTO monthly_spend VALUES
(1, '2021-01-01', 0),
(1, '2021-02-01', 1),
(1, '2021-03-01', 1),
(1, '2021-04-01', 2),
(1, '2021-05-01', 5),
(2, '2021-01-01', 5),
(2, '2021-02-01', 0),
(2, '2021-03-01', 1),
(2, '2021-04-01', 2),
(2, '2021-05-01', 2);
I'm trying to calculate the rolling median of the monthly expenditure using the following query:
select
user_id,
transaction_month,
avg(spend) over(partition by user_id order by transaction_month rows between unbounded preceding and 1 preceding) as rolling_avg_spend,
percentile_cont(spend, 0.5) over(partition by user_id order by transaction_month rows between unbounded preceding and 1 preceding) as rolling_median_spend,
from monthly_spend
However, I get the following error:
Window ORDER BY is not allowed for analytic function percentile_cont at [69:63]
Is there a way to calculate a rolling median (without the current row) in BigQuery?
Thanks!
Upvotes: 1
Views: 1453
Reputation: 172944
Try below
select
user_id,
transaction_month,
rolling_avg_spend,
(select distinct percentile_cont(spend, 0.5) over()
from unnest(rolling_spends) spend
) as rolling_median_spend
from (
select
user_id,
transaction_month,
avg(spend) over(partition by user_id order by transaction_month rows between unbounded preceding and 1 preceding) as rolling_avg_spend,
array_agg(spend) over(partition by user_id order by transaction_month rows between unbounded preceding and 1 preceding) as rolling_spends,
from monthly_spend
)
Upvotes: 0