Reputation: 476
I have the following SQL to insert data into a JSON type column in BigQuery
DROP TABLE MMP.tmpJourneys;
CREATE TABLE MMP.tmpJourneys (
id INT64 NOT NULL,
data JSON
);
INSERT INTO MMP.tmpJourneys (id, data)
VALUES
(1, JSON '{"journey_id":1,"transport_type":"train","origin":"London","destination":"Manchester","origin_time":"09:00","destination_time":"12:00","duration_mins":180,"intermediate_stops":[{"station":"Birmingham","arrival_time":"10:00","departure_time":"10:15"},{"station":"Crewe","arrival_time":"12:30","departure_time":"12:45"}]}'),
(2, JSON '{"journey_id":2,"transport_type":"bus","origin":"Manchester","destination":"Liverpool","origin_time":"10:00","destination_time":"12:00","duration_mins":120,"intermediate_stops":[{"station":"Warrington","arrival_time":"11:30","departure_time":"11:45"},{"station":"StHelens","arrival_time":"13:00","departure_time":"13:15"}]}'),
(3, JSON '{"journey_id":3,"transport_type":"scooter","origin":"Liverpool","destination":"Birmingham","origin_time":"13:00","destination_time":"14:30","duration_mins":90,"intermediate_stops":[{"station":"Warrington","arrival_time":"14:30","departure_time":"14:45"}]}');
SELECT * FROM MMP.tmpJourneys;
The data represents public transport journeys with a nested group of intermediate locations.
How can I query the data in the intermediate_stops repeating group?
SELECT JSON_EXTRACT_SCALAR(data, '$.journey_id') AS journey_id,
JSON_EXTRACT_SCALAR(data, '$.transport_type') AS transport_type,
JSON_EXTRACT_SCALAR(data, '$.origin') AS transport_type,
JSON_EXTRACT_SCALAR(data, '$.destination') AS transport_type,
JSON_EXTRACT_SCALAR(data, '$.origin_time') AS transport_type,
JSON_EXTRACT_SCALAR(data, '$.duration_mins') AS transport_type,
JSON_EXTRACT(data, '$.intermediate_stops') AS intermediate_stops,
JSON_EXTRACT_SCALAR(stops, '$.station') AS station1,
-- intermediate_stops
-- ARRAY(SELECT AS STRUCT
-- location,
-- time
-- FROM UNNEST(JSON_EXTRACT(data, '$.intermediate_stops'))
-- ) AS intermediate_stops
FROM MMP.tmpJourneys,
UNNEST(JSON_QUERY_ARRAY(data.intermediate_stops)) AS stops;
Upvotes: 0
Views: 134
Reputation: 12274
It looks you're almost close to the answer. Consider below query.
WITH tmpJourneys AS (
SELECT 1 id, JSON '{"journey_id":1,"transport_type":"train","origin":"London","destination":"Manchester","origin_time":"09:00","destination_time":"12:00","duration_mins":180,"intermediate_stops":[{"station":"Birmingham","arrival_time":"10:00","departure_time":"10:15"},{"station":"Crewe","arrival_time":"12:30","departure_time":"12:45"}]}' data UNION ALL
SELECT 2, JSON '{"journey_id":2,"transport_type":"bus","origin":"Manchester","destination":"Liverpool","origin_time":"10:00","destination_time":"12:00","duration_mins":120,"intermediate_stops":[{"station":"Warrington","arrival_time":"11:30","departure_time":"11:45"},{"station":"StHelens","arrival_time":"13:00","departure_time":"13:15"}]}' UNION ALL
SELECT 3, JSON '{"journey_id":3,"transport_type":"scooter","origin":"Liverpool","destination":"Birmingham","origin_time":"13:00","destination_time":"14:30","duration_mins":90,"intermediate_stops":[{"station":"Warrington","arrival_time":"14:30","departure_time":"14:45"}]}'
)
SELECT JSON_VALUE(data, '$.journey_id') AS journey_id,
JSON_VALUE(data, '$.transport_type') AS transport_type,
-- ...
ARRAY(
SELECT AS STRUCT
JSON_VALUE(e, '$.station') AS station,
JSON_VALUE(e, '$.arrival_time') AS arrival_time,
JSON_VALUE(e, '$.departure_time') AS departure_time
FROM UNNEST(JSON_QUERY_ARRAY(data, '$.intermediate_stops')) e
) AS intermediate_stops
FROM tmpJourneys t;
Query results
Upvotes: 1