Reputation: 70406
Following data sample is given:
+---------------+--------+---------+------------+
| customer_id | city | spend | timestamp |
+---------------+--------+---------+------------+
| 1 | A | 0.7 | 2019-02-12 |
| 2 | B | 0.9 | 2019-02-12 |
| 3 | C | 0.8 | 2019-02-12 |
| 4 | B | 0.95 | 2019-02-12 |
+---------------+--------+---------+------------+
I want to answer following question: how much does a customer spend on average per month per city? The result should look like this:
+--------+---------+------------+
| city | avg | timestamp |
+--------+---------+------------+
| A | ... | 2019-02-12 |
| B | ... | 2019-02-12 |
| C | ... | 2019-02-12 |
+--------+---------+------------+
I tried to solve it with a moving average:
SELECT
city,
AVG(spend) OVER (PARTITION BY customer_id ORDER BY date ROWS BETWEEN 29 PRECEDING AND CURRENT ROW) avg_spend,
date
FROM (
SELECT
customer_id,
city,
AVG(spend) spend,
date
FROM `project.dataset.table`
GROUP BY customer_id, city, date
)
ORDER BY date DESC
I am getting (small) numbers for avg_spend that seem to be more like a daily instead of monthly average. Any idea what might be wrong with my query?
Upvotes: 0
Views: 1018
Reputation: 692
try one of these, depending on what result (grouped or not) you want
with
sample as (
select
*
from
unnest(
array[
struct(1 as customer_id, 'A' as city, 1000 as amount, timestamp'2019-02-12' as timestamp),
struct(1, 'A', 2000 , timestamp'2019-02-25'),
struct(1, 'A', 800, timestamp'2019-03-12'),
struct(1, 'B', 4500, timestamp'2019-03-10'),
struct(1, 'B', 500, timestamp'2019-03-14'),
struct(2, 'A', 1350, timestamp'2019-02-05'),
struct(2, 'A', 50, timestamp'2019-02-14'),
struct(3, 'B', 2000, timestamp'2019-04-02'),
struct(3, 'B', 4000, timestamp'2019-05-22')
]
)
)
select
customer_id,
city,
month,
avg(spent_by_day) as avg_amount_spent
from
( select
customer_id,
city,
date(timestamp) as date,
date_trunc(date(timestamp), month) as month,
sum(amount) as spent_by_day
from
sample
group by
1, 2, 3, 4)
group by
1, 2, 3
with
sample as (
select
*
from
unnest(
array[
struct(1 as customer_id, 'A' as city, 1000 as amount, timestamp'2019-02-12' as timestamp),
struct(1, 'A', 2000 , timestamp'2019-02-25'),
struct(1, 'A', 800, timestamp'2019-03-12'),
struct(1, 'B', 4500, timestamp'2019-03-10'),
struct(1, 'B', 500, timestamp'2019-03-14'),
struct(2, 'A', 1350, timestamp'2019-02-05'),
struct(2, 'A', 50, timestamp'2019-02-14'),
struct(3, 'B', 2000, timestamp'2019-04-02'),
struct(3, 'B', 4000, timestamp'2019-05-22')
]
)
)
select
customer_id,
city,
date,
avg(spent_by_day) over( partition by
customer_id,
city,
month) as avg_amount_spent
from
( select
customer_id,
city,
date(timestamp) as date,
date_trunc(date(timestamp), month) as month,
sum(amount) as spent_by_day
from
sample
group by
1, 2, 3, 4)on
Upvotes: 1