Reputation: 99
I have an external table which is populated from a csv file. In the csv file there is a field which has an escape character in it followed by a coma. eg "a\,b" which should read just "a,b". when i load the csv file it it separates it into 2 columns "a", "b" but should read "a,b" in one column. I've tried using the option = '' without any luck.
CREATE OR REPLACE EXTERNAL TABLE TEST
(A STRING,
B STRING)
OPTIONS (
format = 'CSV',
quote = '\'
)
Could someone help ?
Upvotes: 0
Views: 758
Reputation: 12254
You may try below workaround.
CREATE OR REPLACE EXTERNAL TABLE `your-project.your-dataset.so_test` (
raw STRING
) OPTIONS (
uris=['gs://your-bucket/so/test2.csv'],
format = 'CSV',
field_delimiter = CHR(1)
);
CREATE TEMP TABLE sample_table AS
SELECT csv[SAFE_OFFSET(0)] col1,
REPLACE(csv[SAFE_OFFSET(1)], '|', ',') col2,
csv[SAFE_OFFSET(2)] col3,
FROM `bigdata-cubig-data.bdc_cubig_temp.so_test`,
UNNEST ([STRUCT(SPLIT(REPLACE(raw, '\\,', '|')) AS csv)]);
SELECT * FROM sample_table;
Sample csv file
blah,a\,b,blah
Query results
Or, using PIVOT query
CREATE TEMP TABLE sample_table (
col1 STRING, col2 STRING, col3 STRING,
) AS
SELECT * REPLACE(REPLACE(col_1, '|', ',') AS col_1) FROM (
SELECT col, offset
FROM `your-project.your-dataset.so_test`,
UNNEST (SPLIT(REPLACE(csv, '\\,', '|'))) col WITH offset
) PIVOT (ANY_VALUE(col) col FOR offset IN (0, 1, 2));
SELECT * FROM sample_table;
Upvotes: 1