Reputation: 107
I am trying converting Hive QL to Bigquery SQL
Hive Query
create table d_bu as
select
itapp_id,
it_acronym,
concat_ws(',',sort_array(split(concat_ws(":", collect_set(it_unit)),':'))) as merged_BU,
case when lower(domain) in ('csms','ucmdb') then 'yes' when lower(line_of_business) in ('psbms','retail','hosted') then 'yes' else 'no' end as heritage
from db.table_bkp
group by itapp_id,
it_acronym,
domain,
line_of_business;
BigQuery converted
CREATE TABLE db.d_bu
AS
SELECT
table_bkp.itapp_id,
table_bkp.it_acronym,
array_to_string(ARRAY (
SELECT
v
FROM
UNNEST(split(array_to_string(array_agg(DISTINCT table_bkp.it_unit), ':'), ':')) AS v
ORDER BY
v
), ',') AS merged_bu,
CASE
WHEN lower(table_bkp.domain) IN(
'csms', 'ucmdb'
) THEN 'yes'
WHEN lower(table_bkp.line_of_business) IN(
'psbms', 'retail', 'hosted'
) THEN 'yes'
ELSE 'no'
END AS heritage
FROM
db.table_bkp
GROUP BY 1, 2, table_bkp.domain, table_bkp.line_of_business
;
I tried multiple ways but, I am getting this error running the BQ SQL stating, Aggregate function array_agg is not allowed under Unnest
Can someone please look into it if the conversion is correct or not?
Upvotes: 0
Views: 833
Reputation: 12254
Aggregation for merged_bu
column seems too complicated.
If you want a sorted, de-duplicated CSV string, you can simple write it like below.
-- sample data
WITH table_bkp AS (
SELECT * FROM UNNEST(SPLIT('badfbkzjdidwwkdefjdfka', '')) it_unit
)
-- aggregation query starts here
SELECT STRING_AGG(DISTINCT it_unit ORDER BY it_unit) AS merged_bu
FROM table_bkp;
-- query result
+-----------------------+
| merged_bu |
+-----------------------+
| "a,b,d,e,f,i,j,k,w,z" |
+-----------------------+
Upvotes: 1