Reputation: 1335
Let's say in each row I have an id
and two arrays array_1
and array_2
that looks like following
SELECT 'a' id, [1,2,3,4,5] array_1, [2,2,2,3,6] array_2 UNION ALL
SELECT 'b', [2,3,4,5,6], [7,7,8,6,9] UNION ALL
SELECT 'c', [], [1,4,5]
I want concatenate these two arrays and only keep the unique elements in the new array. My desired output would look like the following
+----+-----------+-----------+-----------------------------+
| id | array_1 | array_2 | concatenated_array_distinct |
+----+-----------+-----------+-----------------------------+
| a | 1,2,3,4,5 | 2,2,2,3,6 | 1,2,3,4,5,6 |
| b | 2,3,4,5,6 | 7,7,8,6,9 | 2,3,4,5,6,7,8,9 |
| c | | 1,4,5 | 1,4,5 |
+----+-----------+-----------+-----------------------------+
I was trying to use array_concat
function but I could not find a way to keep distinct elements using the array_concat
function.
Is there anyway I can get the desired output?
Upvotes: 7
Views: 12124
Reputation: 1917
simple, readable and maintainable solution:
#Declare the function once
#standardSQL
CREATE TEMP FUNCTION dedup(val ANY TYPE) AS ((
SELECT ARRAY_AGG(t)
FROM (SELECT DISTINCT * FROM UNNEST(val) v) t
));
with t as (
select 'a' id, [1,2,3,4,5] array_1, [2,2,2,3,6] array_2 UNION ALL
select 'b', [2,3,4,5,6], [7,7,8,6,9] UNION ALL
select 'c', [], [1,4,5]
)
select t.*,
dedup(array_1 || array_2) array_unique
from t
Upvotes: 1
Reputation: 173191
Below is for BigQuery Standard SQL
... I was trying to use array_concat function but I could not find a way to keep distinct elements using the array_concat function. ...
You were on right track :o)
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'a' id, [1,2,3,4,5] array_1, [2,2,2,3,6] array_2 UNION ALL
SELECT 'b', [2,3,4,5,6], [7,7,8,6,9] UNION ALL
SELECT 'c', [], [1,4,5]
)
SELECT *,
ARRAY(SELECT DISTINCT x
FROM UNNEST(ARRAY_CONCAT(array_1, array_2)) x
ORDER BY x
) concatenated_array_distinct
FROM `project.dataset.table`
Upvotes: 12
Reputation: 1270993
You can use unnest()
and union distinct
:
with t as (
select 'a' id, [1,2,3,4,5] array_1, [2,2,2,3,6] array_2 UNION ALL
select 'b', [2,3,4,5,6], [7,7,8,6,9] UNION ALL
select 'c', [], [1,4,5]
)
select t.*,
(select array_agg( e.el)
from (select el
from unnest(array_1) el
union distinct
select el
from unnest(array_2) el
) e
) array_unique
from t
Upvotes: 3