TheDataGuy
TheDataGuy

Reputation: 3108

BigQuery - Group by on arrays

I want to group by on an array.

sample query:

#standardSQL
WITH `project.dataset.table` AS (
  SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.323316 cost UNION ALL
  SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.342825 cost 
)
SELECT 
  description, 
  ARRAY(
    SELECT AS STRUCT 
      JSON_EXTRACT_SCALAR(kv, '$.key') key, 
      JSON_EXTRACT_SCALAR(kv, '$.value') value 
    FROM UNNEST(SPLIT(labels, '},{')) kv_temp, 
    UNNEST([CONCAT('{', REGEXP_REPLACE(kv_temp, r'^\[{|}]$', ''), '}')]) kv
  ) labels,
  cost
FROM `project.dataset.table`

Result of the above query:

Row description labels.key      labels.value    cost     
1   compute         application     scaled-server   0.323316     
                    department      hrd      
2   compute         application    scaled-server  0.342825     
                    department     hrd

I want result like below:

Row description labels.key      labels.value    cost     
1   compute         application     scaled-server   0.666141     
                    department      hrd   

Upvotes: 1

Views: 2576

Answers (1)

Mikhail Berlyant
Mikhail Berlyant

Reputation: 172994

#standardSQL
WITH `project.dataset.table` AS (
  SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.323316 cost UNION ALL
  SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.342825 cost 
), temp AS (
  SELECT description, labels, SUM(cost) AS cost
  FROM `project.dataset.table`
  GROUP BY description, labels
)
SELECT 
  description, 
  ARRAY(
    SELECT AS STRUCT 
      JSON_EXTRACT_SCALAR(kv, '$.key') key, 
      JSON_EXTRACT_SCALAR(kv, '$.value') value 
    FROM UNNEST(SPLIT(labels, '},{')) kv_temp, 
    UNNEST([CONCAT('{', REGEXP_REPLACE(kv_temp, r'^\[{|}]$', ''), '}')]) kv
  ) labels,
  cost
FROM temp

Upvotes: 2

Related Questions