mj8701
mj8701

Reputation: 21

Insert JSON Data into BigQuery ERROR: Error while reading data, error message: JSON parsing error in row starting at position 0: Expected key

I'm trying to insert dummy data into BigQuery using a Python Script seen below.

I'm getting this error and not sure why/how to fix it:

ERROR: Error while reading data, error message: JSON parsing error in row starting at position 0: Expected key

def insert_into_bq(cloud_event):
        values = {
        'person_id': raw_data_to_insert['PersonID'],
        'first_name': raw_data_to_insert['FirstName']
    }


table_schema = {
    'name': 'PersonID',
    'type': 'INTEGER',
    'mode': 'REQUIRED'
    },{
        'name': 'FirstName',
        'type': 'STRING',
        'mode': 'NULLABLE'
    }
        

project_id = 'PROJECT'
dataset_id = 'tests'
table_id = 'persons'

client  = bigquery.Client(project = project_id)
dataset  = client.dataset(dataset_id)
table = dataset.table(table_id)

job_config = bigquery.LoadJobConfig()
job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON
job_config.schema = format_schema(table_schema)

with open('data_to_insert.json', 'w') as fp:
    json.dump(values, fp, ensure_ascii=False, indent=4)



with open('data_to_insert.json', 'rb') as source_file:
    job = client.load_table_from_file(
        source_file,
        table,
        job_config=job_config
    )
    try:
        print(job.result())
    except BadRequest as e:
        for e in job.errors:
            print('ERROR: {}'.format(e['message']))

Upvotes: 0

Views: 1388

Answers (1)

Anjela B
Anjela B

Reputation: 1201

For the benefit of the community, this approach works as well:

from google.cloud import bigquery
from google.api_core.exceptions import BadRequest
import json


def format_schema(schema):
    formatted_schema = []
    for row in schema:
        formatted_schema.append(bigquery.SchemaField(row['name'], row['type'], row['mode']))
    return formatted_schema

raw_data_to_insert = {'idno': 1, 'name': 'Joe'}
raw_data_to_insert_1 = {'idno': 2, 'name': 'Jane'}



values = [{'person_id': raw_data_to_insert['idno'], 'first_name': raw_data_to_insert['name']}, {'person_id': raw_data_to_insert_1['idno'], 'first_name': raw_data_to_insert_1['name']}]
result = [json.dumps(record) for record in values]
#print('\n'.join(result))
result_2 = ('\n'.join(result))


table_schema = {
    'name': 'person_id',
    'type': 'INTEGER',
    'mode': 'REQUIRED'
    },{
        'name': 'first_name',
        'type': 'STRING',
        'mode': 'NULLABLE'
    }


project_id = '<your-project>'
dataset_id = '<your-dataset>'
table_id = 'persons'

client  = bigquery.Client(project = project_id)
dataset  = client.dataset(dataset_id)
table = dataset.table(table_id)

job_config = bigquery.LoadJobConfig()
job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON
job_config.schema = format_schema(table_schema)

with open('data_to_insert.json', 'w') as fp:
    fp.write(result_2)


with open('data_to_insert.json', 'rb') as source_file:
    job = client.load_table_from_file(
        source_file,
        table,
        job_config=job_config
    )
    try:
        print(job.result())
    except BadRequest as e:
        for e in job.errors:
            print('ERROR: {}'.format(e['message']))

Output: enter image description here

Upvotes: 1

Related Questions