Reputation: 5147
It seems quite new, but just hoping someone here has been able to use nodejs to write directly to BigQuery storage using @google-cloud/bigquery-storage.
There is an explanation of how the overall backend API works and how to write a collection of rows atomically using BigQuery Write API but no such documentation for nodejs yet. A recent release 2.7.0 documents the addition of said feature but there is no documentation, and the code is not easily understood.
There is an open issue requesting an example but thought I'd try my luck to see if anyone has been able to use this API yet.
Upvotes: 4
Views: 2213
Reputation: 108
Suppose you have a BigQuery table called student with three columns id,name and age. Following steps will get you to load data into the table with nodejs storage write api.
Define student.proto file as follows
syntax = "proto2";
message Student {
required int64 id = 1;
optional string name = 2;
optional int64 age = 3;
}
Run the following at the command prompt
protoc --js_out=import_style=commonjs,binary:. student.proto
It should generate student_pb.js file in the current directory.
Write the following js code in the current directory and run it
const {BigQueryWriteClient} = require('@google-cloud/bigquery-storage').v1;
const st = require('./student_pb.js')
const type = require('@google-cloud/bigquery-storage').protos.google.protobuf.FieldDescriptorProto.Type
const mode = require('@google-cloud/bigquery-storage').protos.google.cloud.bigquery.storage.v1.WriteStream.Type
const storageClient = new BigQueryWriteClient();
const parent = `projects/${project}/datasets/${dataset}/tables/student`
var writeStream = {type: mode.PENDING}
var student = new st.Student()
var protoDescriptor = {}
protoDescriptor.name = 'student'
protoDescriptor.field = [{'name':'id','number':1,'type':type.TYPE_INT64},{'name':'name','number':2,'type':type.TYPE_STRING},{'name':'age','number':3,'type':type.TYPE_INT64}]
async function run() {
try {
var request = {
parent,
writeStream
}
var response = await storageClient.createWriteStream(request);
writeStream = response[0].name
var serializedRows = []
//Row 1
student.setId(1)
student.setName('st1')
student.setAge(15)
serializedRows.push(student.serializeBinary())
//Row 2
student.setId(2)
student.setName('st2')
student.setAge(15)
serializedRows.push(student.serializeBinary())
var protoRows = {
serializedRows
}
var proto_data = {
writerSchema: {protoDescriptor},
rows: protoRows
}
// Construct request
request = {
writeStream,
protoRows: proto_data
};
// Insert rows
const stream = await storageClient.appendRows();
stream.on('data', response => {
console.log(response);
});
stream.on('error', err => {
throw err;
});
stream.on('end', async () => {
/* API call completed */
try {
var response = await storageClient.finalizeWriteStream({name: writeStream})
response = await storageClient.batchCommitWriteStreams({parent,writeStreams: [writeStream]})
}
catch(err) {
console.log(err)
}
});
stream.write(request);
stream.end();
}
catch(err) {
console.log(err)
}
}
run();
Make sure your environment variables are set correctly to point to the file containing google cloud credentials.
Change project and dataset values accordingly.
Upvotes: 7