Reputation: 211
I'm inserting JSON into Google Big Query. At the bottom of the question is the schema of the JSON.
Below is an example JSON:
{
"_index":"data",
"_type":"collection_v1",
"_id":"548d035f23r8987b768a5e60",
"_score":1,
"_source":{
"fullName":"Mike Smith",
"networks":[
{
"id":[
"12923449"
],
"network":"facebook",
"link":"https://www.facebook.com/127654449"
}
],
"sex":{
"network":"facebook",
"value":"male"
},
"interests":{
},
"score":1.045,
"merged_by":"548f899444v5t4v45te9a4cc"
}
}
as you can see there's a "_source.fullName" field with "Mike Smith".
When I try to create a table with it, it errors out:
Array specified for non-repeated field: _source.fullName.
I believe this field is a one-time field for _source. How do I overcome this error?
here's the Schema:
[
{
"name": "_index",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "_id",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "_type",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "score",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "header",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "fullName",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "src",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "avatar",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "merged_by",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "cover",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "sex",
"type": "RECORD",
"mode": "NULLABLE",
"fields": [
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "_source",
"type": "RECORD",
"mode": "NULLABLE",
"fields": [
{
"name": "fullName",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "links",
"type": "STRING",
"mode": "REPEATED"
},
{
"name": "birthday",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "phones",
"type": "STRING",
"mode": "REPEATED"
},
{
"name": "pictures",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "url",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "tab",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "contacts",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "id",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "fullName",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "tag",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "groups",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "id",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "Name",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "skills",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "relations",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "about",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "emails",
"type": "STRING",
"mode": "REPEATED"
},
{
"name": "languages",
"type": "STRING",
"mode": "REPEATED"
},
{
"name": "places",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "type",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "education",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "school",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "experience",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "start",
"type": "NUMERIC",
"mode": "NULLABLE"
},
{
"name": "company",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "title",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "networks",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "link",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "id",
"type": "STRING",
"mode": "REPEATED"
}
]
},
{
"name": "network",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "others",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "tag",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "books",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "tag",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "music",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "tag",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "games",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "tag",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "spotify",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "tag",
"type": "STRING",
"mode": "NULLABLE"
}
]
}
]
}
]
}
]
Upvotes: 0
Views: 2760
Reputation: 59165
You could import the full json row as if it was a CSV - basically a one column BigQuery table of json objects. Then you can parse the JSON at will inside BigQuery, with queries like this:
WITH j AS (
SELECT """{"_index":"data","_type":"collection_v1","_id":"548d035f23r8987b768a5e60","_score":1,"_source":{"fullName":"Mike Smith","networks":[{"id":["12923449"],"network":"facebook","link":"https://www.facebook.com/127654449"}],"sex":{"network":"facebook","value":"male"},"interests":{},"score":1.045,"merged_by":"548f899444v5t4v45te9a4cc"}}""" j
)
SELECT index
, STRUCT(
JSON_EXTRACT_SCALAR(source, '$.fullName') AS fullName
, [
STRUCT(
JSON_EXTRACT_SCALAR(source, '$.networks[0].id[0]') AS id
, JSON_EXTRACT_SCALAR(source, '$.networks[0].network') AS network
, JSON_EXTRACT_SCALAR(source, '$.networks[0].link') AS link)
] AS networks
) source
FROM (
SELECT JSON_EXTRACT_SCALAR(j.j, '$._index') index
, JSON_EXTRACT(j.j, '$._source') source
FROM j
)
See:
Upvotes: 1