Reputation: 311
I'm trying to create using BQ API a table from an external source (google sheet).
project_id = 'XXX'
dataset_name= f'{project_id}.brodmann_dev'
table_name = f'{dataset_name}.test2'
from google.cloud import bigquery
from google.cloud.bigquery.table import Table
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.external_config import GoogleSheetsOptions
source_uri="https://docs.google.com/spreadsheets/d/XXXX"
external_config = bigquery.ExternalConfig(source_format=bigquery.ExternalSourceFormat.GOOGLE_SHEETS)
external_config.source_uri=source_uri
schema = [SchemaField('id', 'STRING', 'NULLABLE', None, (), None)]
table = bigquery.Table(table_name,schema=schema)
table.external_config=external_config
client.delete_table(table, not_found_ok=True)
client.create_table(table)
The table is created but without any reference to the source uri
Upvotes: 0
Views: 1101
Reputation: 311
client = bigquery.Client( project=project_id)
project_id = 'XXX'
dataset_id= f'{project_id}.brodmann_dev'
dataset = client.get_dataset(dataset_id)
table_id = "test2"
schema = [
bigquery.SchemaField("name", "STRING"),
bigquery.SchemaField("post_abbr", "STRING"),
bigquery.SchemaField("bad_record", "INTEGER"),
]
table = bigquery.Table(dataset.table(table_id), schema=schema)
client.delete_table(table, not_found_ok=True)
external_config = bigquery.ExternalConfig("GOOGLE_SHEETS")
external_config.source_uris = ['https://docs.google.com/spreadsheets/d/XXX']
external_config.options.skip_leading_rows = 1
external_config.options.range = ("cat_highway")
# if `schema=None` it works fine.
external_config.autodetect = True
# if the value is `1` then it throws an error as in the table cell `C3` and `C5` has character.
external_config.max_bad_records = 2
# Verify with the csv and json it works (through the error if it's value is False and csv has an extra column).
external_config.ignore_unknown_values = True
table.external_data_configuration = external_config
table = client.create_table(table)
sql = "SELECT * FROM `{}.{}`".format(dataset_id, table_id)
query_job = client.query(sql)
Upvotes: 1