Reputation: 21
I'm trying to insert dummy data into BigQuery using a Python Script seen below.
I'm getting this error and not sure why/how to fix it:
ERROR: Error while reading data, error message: JSON parsing error in row starting at position 0: Expected key
def insert_into_bq(cloud_event):
values = {
'person_id': raw_data_to_insert['PersonID'],
'first_name': raw_data_to_insert['FirstName']
}
table_schema = {
'name': 'PersonID',
'type': 'INTEGER',
'mode': 'REQUIRED'
},{
'name': 'FirstName',
'type': 'STRING',
'mode': 'NULLABLE'
}
project_id = 'PROJECT'
dataset_id = 'tests'
table_id = 'persons'
client = bigquery.Client(project = project_id)
dataset = client.dataset(dataset_id)
table = dataset.table(table_id)
job_config = bigquery.LoadJobConfig()
job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON
job_config.schema = format_schema(table_schema)
with open('data_to_insert.json', 'w') as fp:
json.dump(values, fp, ensure_ascii=False, indent=4)
with open('data_to_insert.json', 'rb') as source_file:
job = client.load_table_from_file(
source_file,
table,
job_config=job_config
)
try:
print(job.result())
except BadRequest as e:
for e in job.errors:
print('ERROR: {}'.format(e['message']))
Upvotes: 0
Views: 1388
Reputation: 1201
For the benefit of the community, this approach works as well:
from google.cloud import bigquery
from google.api_core.exceptions import BadRequest
import json
def format_schema(schema):
formatted_schema = []
for row in schema:
formatted_schema.append(bigquery.SchemaField(row['name'], row['type'], row['mode']))
return formatted_schema
raw_data_to_insert = {'idno': 1, 'name': 'Joe'}
raw_data_to_insert_1 = {'idno': 2, 'name': 'Jane'}
values = [{'person_id': raw_data_to_insert['idno'], 'first_name': raw_data_to_insert['name']}, {'person_id': raw_data_to_insert_1['idno'], 'first_name': raw_data_to_insert_1['name']}]
result = [json.dumps(record) for record in values]
#print('\n'.join(result))
result_2 = ('\n'.join(result))
table_schema = {
'name': 'person_id',
'type': 'INTEGER',
'mode': 'REQUIRED'
},{
'name': 'first_name',
'type': 'STRING',
'mode': 'NULLABLE'
}
project_id = '<your-project>'
dataset_id = '<your-dataset>'
table_id = 'persons'
client = bigquery.Client(project = project_id)
dataset = client.dataset(dataset_id)
table = dataset.table(table_id)
job_config = bigquery.LoadJobConfig()
job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON
job_config.schema = format_schema(table_schema)
with open('data_to_insert.json', 'w') as fp:
fp.write(result_2)
with open('data_to_insert.json', 'rb') as source_file:
job = client.load_table_from_file(
source_file,
table,
job_config=job_config
)
try:
print(job.result())
except BadRequest as e:
for e in job.errors:
print('ERROR: {}'.format(e['message']))
Upvotes: 1