agonen
agonen

Reputation: 311

creating Google BigQuery table from GoogleSheet

I'm trying to create using BQ API a table from an external source (google sheet).

project_id = 'XXX'
dataset_name= f'{project_id}.brodmann_dev'
table_name = f'{dataset_name}.test2'
from google.cloud import bigquery
from google.cloud.bigquery.table import Table
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.external_config import GoogleSheetsOptions

source_uri="https://docs.google.com/spreadsheets/d/XXXX"
external_config = bigquery.ExternalConfig(source_format=bigquery.ExternalSourceFormat.GOOGLE_SHEETS)
external_config.source_uri=source_uri
schema = [SchemaField('id', 'STRING', 'NULLABLE', None, (), None)]
table = bigquery.Table(table_name,schema=schema)
table.external_config=external_config
client.delete_table(table, not_found_ok=True) 
client.create_table(table)

The table is created but without any reference to the source uri

Upvotes: 0

Views: 1101

Answers (1)

agonen
agonen

Reputation: 311

client = bigquery.Client( project=project_id)
project_id = 'XXX'
dataset_id= f'{project_id}.brodmann_dev'
dataset = client.get_dataset(dataset_id)
table_id = "test2"
schema = [
    bigquery.SchemaField("name", "STRING"),
    bigquery.SchemaField("post_abbr", "STRING"),
    bigquery.SchemaField("bad_record", "INTEGER"),
]

table = bigquery.Table(dataset.table(table_id), schema=schema)
client.delete_table(table, not_found_ok=True)
external_config = bigquery.ExternalConfig("GOOGLE_SHEETS")

external_config.source_uris = ['https://docs.google.com/spreadsheets/d/XXX']
external_config.options.skip_leading_rows = 1  
external_config.options.range = ("cat_highway")

# if `schema=None` it works fine.
external_config.autodetect = True   

# if the value is `1` then it throws an error as in the table cell `C3` and `C5` has character. 
external_config.max_bad_records = 2    

# Verify with the csv and json it works (through the error if it's value is False and csv has an extra column).
external_config.ignore_unknown_values = True   
table.external_data_configuration = external_config

table = client.create_table(table)


sql = "SELECT * FROM `{}.{}`".format(dataset_id, table_id)
query_job = client.query(sql)  

Upvotes: 1

Related Questions