Reputation: 5
Good day, I'm working with BigQuery to construct an output in the format of
{"selectionListId":"SelectionListName","keys":["EVENT_TYPE_CODE","EVENT_TYPE_DESCRIPTION","METADATA"],"fileType":"dataTopicSelectionList"}
I have the following query, but I'm not sure if my METADATA struct is formatted correctly to produce the right output.
#standardSQL
-- Create the temporary table
CREATE OR REPLACE TEMP TABLE temp_table (
selectionListId STRING,
keys ARRAY<STRUCT<EVENT_TYPE_CODE STRING, EVENT_TYPE_DESCRIPTION STRING, METADATA STRUCT<subKeyName STRING, subKeyType STRING>>>,
fileType STRING
);
-- Insert data into the temporary table
INSERT INTO temp_table (
selectionListId,
keys,
fileType
)
SELECT
'SelectionListName' AS selectionListId,
ARRAY<STRUCT<
EVENT_TYPE_CODE STRING,
EVENT_TYPE_DESCRIPTION STRING,
METADATA STRUCT<CountedFrom DATE, To_ DATE, Count INT64>>>
[
STRUCT('EVENT_TYPE_CODE', 'STRING', []),
STRUCT('EVENT_TYPE_DESCRIPTION', 'STRING', []),
STRUCT('METADATA', 'STRUCT',
[STRUCT('CountedFrom', 'DATE', []),
STRUCT('To_', 'DATE', []),
STRUCT('Count', 'INT64', [])])
] AS keys,
'dataTopicSelectionList' AS fileType
FROM (
SELECT
Column1 AS EVENT_TYPE_CODE,
Column2 AS EVENT_TYPE_DESCRIPTION,
MIN(DATE(TransferLoc.EVENT_DTM)) AS CountedFrom,
MAX(DATE(TransferLoc.EVENT_DTM)) AS To_,
COUNT(TransferLoc.EVENT_DTM) AS Count
FROM `projectID.TransferLoc` TransferLoc
INNER JOIN `projectID.TransferEventType` TransferEventType
ON TransferLoc.EVENT_TYPE_DK = TransferEventType.EVENT_TYPE_DK
WHERE
TransferLoc.EVENT_DTM > '1900-01-01 00:00:00' AND
TransferLoc.EVENT_DTM < CURRENT_DATE()
GROUP BY
TransferEventType.EVENT_TYPE_CODE,
TransferEventType.EVENT_TYPE_DESCRIPTION
);
After the temp table is created, the schema is as follows:
However, on the insert I get this error:
Query error: Array element type STRUCT<STRING, STRING, ARRAY<STRUCT<STRING, STRING, ARRAY<INT64>>>> does not coerce to STRUCT<EVENT_TYPE_CODE STRING, EVENT_TYPE_DESCRIPTION STRING, METADATA STRUCT<CountedFrom DATE, To_ DATE, Count INT64>>
Any help/suggestion would be appreciated
I tried changing the METADATA structs. Placing the METADATA as an ARRAY<STRUCT<...>>
Upvotes: 0
Views: 69
Reputation: 5
I was able to get the correct schema to output the JSON representation correctly. I had to create 2 temp tables; along the way ran into some aggregation issues which was resolved by using the 2 tables and then join on temp. Here's my approach:
Step 1: Create temp table to prevent aggregation error
CREATE OR REPLACE TEMP TABLE aggregated_data AS (
SELECT
dataset1.EVENT_TYPE_DK,
MIN(DATE(dataset1.EVENT_DTM)) AS `from`,
MAX(DATE(dataset1.EVENT_DTM)) AS `to`,
COUNT(dataset1.EVENT_DTM) AS `count`
FROM `project1.dataset1` dataset1
WHERE dataset1.EVENT_DTM > '1900-01-01 00:00:00'
AND dataset1.EVENT_DTM < CURRENT_DATE()
GROUP BY dataset1.EVENT_TYPE_DK
);
Step 2: Query the aggregated_data, then join
CREATE OR REPLACE TEMP TABLE temp_table AS (
SELECT
'SelectionListName' AS selectionListId,
ARRAY_AGG(
STRUCT(
dataset2.EVENT_TYPE_CODE,
dataset2.EVENT_TYPE_DESCRIPTION,
METADATA
)
) AS keys,
'dataTopicSelectionList' AS fileType
FROM `project1.dataset2` dataset2
JOIN (
SELECT EVENT_TYPE_DK, ARRAY_AGG(STRUCT(`from`, `to`, `count`)) AS METADATA
FROM aggregated_data
GROUP BY EVENT_TYPE_DK
) AS aggregated_data_arr
ON dataset2.EVENT_TYPE_DK = aggregated_data_arr.EVENT_TYPE_DK
GROUP BY 1,3
);
Upvotes: 0