Tom
Tom

Reputation: 34356

BigQuery - Extra multiple nest child keys inside JSON document

I have a JSON structure in a field that looks like this. I'm trying to extract every task in every category, there could be any number of tasks or categories.

I've got part of the way there by extracting a single category, but can't seem to do it for every task in every category.

 "tasks": {
    "category-business": [
        {
            "dateCompleted": {
                "_seconds": 1653672655,
                "_nanoseconds": 791000000
            },
            "slug": "task-alpha",
            "status": "completed"
        },
        {
            "dateCompleted": {
                "_seconds": 1654516259,
                "_nanoseconds": 796000000
            },
            "slug": "task-bravo",
            "status": "completed"
        }
       ],"category-community": [
        {
            "dateCompleted": {
                "_seconds": 1654709063,
                "_nanoseconds": 474000000
            },
            "slug": "task-papa",
            "status": "completed"
        },
        {
            "dateCompleted": {
                "_seconds": 1654709841,
                "_nanoseconds": 764000000
            },
            "slug": "task-zebra",
            "status": "completed"
        }
    ]}

Here's the query so far

SELECT
*
FROM
(
    SELECT
        ARRAY(
            SELECT            
                STRUCT(
                    TIMESTAMP_SECONDS(
                        CAST(
                            JSON_EXTRACT_SCALAR(business_tasks, '$.dateCompleted._seconds') AS INT64
                        )
                    ) AS dateCompleted,
                    json_extract_scalar(business_tasks, '$.slug') AS task_slug,
                    json_extract_scalar(business_tasks, '$.status') AS status
                )
            FROM
                UNNEST(
                    json_extract_array(DATA, '$.tasks.category-business')
                ) business_tasks
        ) AS items
    FROM
        `table`
)

This extracts just the information in the category business.

What I'm trying to do is expand category-community and any other children underneath the tasks key. The real data has at least 10 categories and 50 tasks.

I think I need to do another round of UNNEST and json_extract_array but I can't quite work out the correct order?

Upvotes: 0

Views: 246

Answers (3)

Ísis Santos Costa
Ísis Santos Costa

Reputation: 391

new version:

select 
  timestamp_seconds(cast(regexp_extract_all(to_json_string(json_extract(DATA,'$.tasks')), r'"_seconds":(\d*)')[offset(a)] as int64)) dateCompleted
  , regexp_extract_all(to_json_string(json_extract(DATA,'$.tasks')), r'"slug":"([a-z\-]*)"')[offset(a)] task_slug
  , regexp_extract_all(to_json_string(json_extract(DATA,'$.tasks')), r'"status":"([a-z\-]*)"')[offset(a)] status
from table
join unnest(generate_array(0,-1+array_length(regexp_extract_all(to_json_string(json_extract(DATA,'$.tasks')), r'"slug":"([a-z\-]*)"')))) a

https://console.cloud.google.com/bigquery?sq=1013309549723:9f43bd653ba14589b31a1f5673adcda7

Upvotes: 1

Ísis Santos Costa
Ísis Santos Costa

Reputation: 391

DML only:

with category_level as (
  select 
    coalesce(
      json_query_array(DATA.tasks[a], '$.category-business') 
    , json_query_array(DATA.tasks[a], '$.category-community')
    , json_query_array(DATA.tasks[a], '$.category-3')
    , json_query_array(DATA.tasks[a], '$.category-4')
    , json_query_array(DATA.tasks[a], '$.category-5')
    , json_query_array(DATA.tasks[a], '$.category-6')
    , json_query_array(DATA.tasks[a], '$.category-7')
    , json_query_array(DATA.tasks[a], '$.category-8')
    , json_query_array(DATA.tasks[a], '$.category-9')
    , json_query_array(DATA.tasks[a], '$.category-10')
      ) category_array
  from table 
  left join unnest(generate_array(0, 100)) a
  where DATA.tasks[a] is not null
)

select 
  timestamp_seconds(cast(json_extract_scalar(b.dateCompleted._seconds) as int64)) dateCompleted
  , json_extract_scalar(b.slug) slug
  , json_extract_scalar(b.status) status
from category_level
left join unnest(category_array) b

https://console.cloud.google.com/bigquery?sq=1013309549723:fe8b75122e5b4b549e8081df99584c81

Upvotes: 1

Mikhail Berlyant
Mikhail Berlyant

Reputation: 172944

Consider below approach

create temp function get_keys(input string) returns array<string> language js as """
  return Object.keys(JSON.parse(input));
""";
create temp function get_values(input string) returns array<string> language js as """
  return Object.values(JSON.parse(input));
""";
create temp function get_leaves(input string) returns string language js as '''
  function flattenObj(obj, parent = '', res = {}){
    for(let key in obj){
        let propName = parent ? parent + '.' + key : key;
        if(typeof obj[key] == 'object'){
            flattenObj(obj[key], propName, res);
        } else {
            res[propName] = obj[key];
        }
    }
    return JSON.stringify(res);
  }
  return flattenObj(JSON.parse(input));
  ''';
create temp table temp_table as (
  select 
    split(key, '.')[offset(0)] as category, 
    split(key, '.')[offset(1)] as offset, 
    split(key, '.')[offset(2)] || ifnull(split(key, '.')[safe_offset(3)], '') as key,
    val, format('%t', t) row_id
  from your_table t, unnest([struct(get_leaves(json_extract(data, '$.tasks')) as leaves)]),
  unnest(get_keys(leaves)) key with offset
  join unnest(get_values(leaves)) val with offset using(offset)
);

execute immediate (
  select '''
    select * except(row_id) from temp_table
    pivot (any_value(val) for key in ("''' || keys || '"))'
  from (
    select string_agg(key, '","') keys 
    from (select distinct key from temp_table)
  )
);      

if applied to sample data in your question - output is

enter image description here

Upvotes: 2

Related Questions