Reputation: 101
Imagine I have a table with two columns, a date DATE and an array of strings ITEMS.
I'd like to create a column ITEMS_AGG which contains an aggregate of all the arrays from previous rows, i.e. something like:
DATE ITEMS ITEMS_AGG
1 a, b a, b
2 a, c a, b, c
3 b, c a, b, c
4. a, d a, b, c, d
5. a, b, e a, b, c, d, e
etc.
Upvotes: 5
Views: 1257
Reputation: 175606
EDIT: Starting from version 9.1 it is possible to use cumulative ARRAY_AGG:
SELECT *
,ARRAY_AGG(ITEMS) OVER(PARTITION BY GRP ORDER BY DATE) AS arr_agg
,ARRAY_DISTINCT(ARRAY_FLATTEN(arr_agg)) AS result
FROM test;
Output:
Cumulative array_agg with distinct defined as JavaScript UDTF.
Sample data:
CREATE OR REPLACE TABLE test(grp TEXT, date INT, ITEMS ARRAY)
AS
SELECT 'X',1, ARRAY_CONSTRUCT('a', 'b')
UNION SELECT 'X',2, ARRAY_CONSTRUCT('a', 'c')
UNION SELECT 'X',3, ARRAY_CONSTRUCT('b', 'c')
UNION SELECT 'X',4, ARRAY_CONSTRUCT('a', 'd')
UNION SELECT 'X',5, ARRAY_CONSTRUCT('a', 'b', 'e')
UNION SELECT 'Y',1, ARRAY_CONSTRUCT('z')
UNION SELECT 'Y',2, ARRAY_CONSTRUCT('y','x')
UNION SELECT 'Y',3, ARRAY_CONSTRUCT('y');
Function:
CREATE OR REPLACE FUNCTION aggregate (TS ARRAY)
RETURNS table (output variant)
LANGUAGE JAVASCRIPT
STRICT
IMMUTABLE
AS '
{
initialize: function(argumentInfo, context) {
this.result = [];
},
processRow: function (row, rowWriter, context) {
this.result = [...new Set(this.result.concat(row.TS))];
rowWriter.writeRow({OUTPUT: this.result.sort()});
}
}
';
Query:
SELECT *
FROM test, TABLE(aggregate(ITEMS) OVER(PARTITION BY grp ORDER BY date))
ORDER BY grp, date;
Output:
Upvotes: 5
Reputation: 25903
Well not exactly what you seems to want, you can aggregate with double-ups with a recursive cte, as there is no ARRAY_CAT(DISTINCT) with this SQL:
WITH data AS (
SELECT column1 as date, split(column2, ',') as items FROM VALUES
(1, 'a,b'),
(2, 'a,c'),
(3, 'b,c'),
(4, 'a,d'),
(5, 'a,b,e')
), rec AS (
WITH RECURSIVE r_cte AS (
SELECT date as date, items
FROM data
WHERE date = 1
UNION ALL
SELECT r.date+1 as r_date, array_cat(r.items, d.items) as items
FROM r_cte r
JOIN data d
ON r.date + 1 = d.date
)
SELECT * from r_cte
)
SELECT *
FROM rec;
DATE | ITEMS |
---|---|
1 | [ "a", "b" ] |
2 | [ "a", "b", "a", "c" ] |
3 | [ "a", "b", "a", "c", "b", "c" ] |
4 | [ "a", "b", "a", "c", "b", "c", "a", "d" ] |
5 | [ "a", "b", "a", "c", "b", "c", "a", "d", "a", "b", "e" ] |
but really you should use Lukasz solution.
Upvotes: 1
Reputation: 5803
Late to the party, but if you're stopping by, we can use a lateral join
with array_union_agg
with cte (grp, dt, items) as
(select 'x', 1, ['a', 'b'] union all
select 'x', 2, ['a', 'c'] union all
select 'x', 3, ['b', 'c'] union all
select 'x', 4, ['a', 'd'] union all
select 'x', 5, ['a', 'b', 'e'] union all
select 'y', 1, ['z'] union all
select 'y', 2, ['y','x'] union all
select 'y', 3, ['y'])
select *
from cte a, lateral(select array_union_agg(b.items) as items_agg
from cte b
where a.grp=b.grp and b.dt<=a.dt) t2
order by a.grp, a.dt
Upvotes: 0