Reputation: 1117
My Data looks like this
WITH test AS (
SELECT * FROM UNNEST([
STRUCT('2019-10-26' as date,'1.8025137' AS article_id, 'Digital Paying' as user_type,'open' as openmode, '123' as uid),
('2019-10-26','1.8025137' , 'Digital Paying','close', '523'),
('2019-10-26','1.8025137' , 'Anonymous','open', '321'),
('2019-10-26','1.8025137' , 'Registered','close', '231'),
('2019-10-26','1.8025137' , 'Registered','open', '431'),
('2019-10-26','1.8025137' , 'Digital Paying','close', '132'),
('2019-10-26','1.8025137' , 'Anonymous','close', '111')
])
),
-- first level of aggregation, prepare for fine tuning
date_article as (
SELECT
date,
article_id,
ARRAY_AGG(struct(user_type,openmode, uid)) AS ut
FROM test
GROUP BY 1,2
)
(SELECT
date,
article_id,
-- feed sub-query output into an array "action"
array(SELECT AS STRUCT
user_type as user_type, -- re-group data within the array by field "action"
array_agg(struct(openmode as openmode,uid as uid) ) op
FROM UNNEST(ut)
GROUP BY 1
) as user_types
FROM date_article)
My goal is to aggregate the user_types.op.openmode and user_types.op.uid by user_types.user_type without create any duplicates as :
Upvotes: 0
Views: 1147
Reputation: 172954
I think you are looking for below
#standardSQL
WITH test AS (
SELECT * FROM UNNEST([
STRUCT('2019-10-26' AS DATE,'1.8025137' AS article_id, 'Digital Paying' AS user_type,'open' AS openmode, '123' AS uid),
('2019-10-26','1.8025137' , 'Digital Paying','close', '523'),
('2019-10-26','1.8025137' , 'Anonymous','open', '321'),
('2019-10-26','1.8025137' , 'Registered','close', '231'),
('2019-10-26','1.8025137' , 'Registered','open', '431'),
('2019-10-26','1.8025137' , 'Digital Paying','close', '132'),
('2019-10-26','1.8025137' , 'Anonymous','close', '111')
])
), users_agg AS (
SELECT DATE, article_id, user_type, openmode, COUNT(DISTINCT uid) AS uids
FROM test GROUP BY 1,2,3,4
), modes_agg AS (
SELECT DATE, article_id, user_type, ARRAY_AGG(STRUCT(openmode, uids)) AS modes
FROM users_agg GROUP BY 1,2,3
), types_agg AS (
SELECT DATE, article_id, ARRAY_AGG(STRUCT(user_type, modes)) types
FROM modes_agg GROUP BY 1,2
), article_agg AS (
SELECT DATE, ARRAY_AGG(STRUCT(article_id, types)) articles
FROM types_agg GROUP BY 1
)
SELECT *
FROM article_agg
with result
Upvotes: 1
Reputation: 3616
You were making it a bit more complicated than necessary. If possible, do your 'normal' SQL first and then format into arrays/structs afterwards.
WITH test AS (
SELECT * FROM UNNEST([
STRUCT('2019-10-26' as date,'1.8025137' AS article_id, 'Digital Paying' as user_type,'open' as openmode, '123' as uid),
('2019-10-26','1.8025137' , 'Digital Paying','close', '523'),
('2019-10-26','1.8025137' , 'Anonymous','open', '321'),
('2019-10-26','1.8025137' , 'Registered','close', '231'),
('2019-10-26','1.8025137' , 'Registered','open', '431'),
('2019-10-26','1.8025137' , 'Digital Paying','close', '132'),
('2019-10-26','1.8025137' , 'Anonymous','close', '111')
])
),
agg as (
select
date,
article_id,
user_type,
openmode,
count(distinct uid) as uids
from test
group by 1,2,3,4
),
final as (
select
date,
article_id,
user_type,
array_agg(struct(openmode, uids)) as subfields
from agg
group by 1,2,3
)
select * from final
Upvotes: 0