Reputation: 853
I would like to use Athena to run queries on data in an S3 bucket in another AWS account. I am using Javascript SDK. Reading through the documentation, I understand that I must first create a data catalog that will point Athena to the correct S3 location.
I think that I have to call the createDataCatalog method. Most of the arguments for this method are self-explanatory, except for the "parameters" argument, which seems to contain information about how the data catalog will be created. But I am unable to find anywhere how these parameters should look.
So my questions are:
Upvotes: 1
Views: 393
Reputation: 10333
For a simple use case with static S3 data,
Here is an example to create Glue Database and Table
const AWS = require("aws-sdk");
AWS.config.update({ region: "us-east-1" });
const glue = new AWS.Glue();
const dbName = "test-db";
glue.createDatabase(
{
DatabaseInput: {
Name: dbName,
},
},
function (dbCrtErr, dbRsp) {
if (dbCrtErr.message === "Database already exists." || dbRsp) {
console.log("dbRsp", dbRsp);
glue.createTable(
{
DatabaseName: dbName,
TableInput: {
Name: "my-table",
Parameters: {
classification: "json",
compressionType: "none",
},
TableType: "EXTERNAL_TABLE",
StorageDescriptor: {
Location: "s3://my-s3-bucket-with-events/",
InputFormat:
"org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
OutputFormat:
"org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
Columns: [
{
Name: "id",
Type: "string",
},
{
Name: "name",
Type: "string",
},
],
},
},
},
function (error, response) {
console.log("error", error, "response", response);
}
);
} else {
console.log("dbCrtErr", dbCrtErr);
}
}
);
Upvotes: 1