Reputation: 1675
I'd like to backup (and later import) a dynamodb table to S3. The dynamodb table exists in us-east-2, but this is an unsupported region for aws data pipelines. AWS docs seem to indicate that that shouldn't be a problem, but I can't seem to get the data pipeline to look for the table in us-east-2.
Here's an export of my data pipeline. When I run this, I get a 'resource not found error' when looking up the dynamodb table. If I temporarily create a table with the same name in us-west-2 where this data pipeline is running, the job works, but pulls the data from the table in us-west-2 instead of us-east-2. Any way to get this job to pull from the region specified in the configuration?
{
"objects": [
{
"readThroughputPercent": "#{myDDBReadThroughputRatio}",
"name": "DDBSourceTable",
"id": "DDBSourceTable",
"type": "DynamoDBDataNode",
"region": "us-east-2",
"tableName": "#{myDDBTableName}"
},
{
"period": "6 Hours",
"name": "Every 6 hours",
"id": "DefaultSchedule",
"type": "Schedule",
"startAt": "FIRST_ACTIVATION_DATE_TIME"
},
{
"bootstrapAction": "s3://us-west-2.elasticmapreduce/bootstrap-actions/configure-hadoop, --yarn-key-value,yarn.nodemanager.resource.memory-mb=11520,--yarn-key-value,yarn.scheduler.maximum-allocation-mb=11520,--yarn-key-value,yarn.scheduler.minimum-allocation-mb=1440,--yarn-key-value,yarn.app.mapreduce.am.resource.mb=2880,--mapred-key-value,mapreduce.map.memory.mb=5760,--mapred-key-value,mapreduce.map.java.opts=-Xmx4608M,--mapred-key-value,mapreduce.reduce.memory.mb=2880,--mapred-key-value,mapreduce.reduce.java.opts=-Xmx2304m,--mapred-key-value,mapreduce.map.speculative=false",
"name": "EmrClusterForBackup",
"coreInstanceCount": "1",
"coreInstanceType": "m3.xlarge",
"amiVersion": "3.9.0",
"masterInstanceType": "m3.xlarge",
"id": "EmrClusterForBackup",
"region": "us-west-2",
"type": "EmrCluster",
"terminateAfter": "1 Hour"
},
{
"directoryPath": "#{myOutputS3Loc}/#{format(@scheduledStartTime, 'YYYY-MM-dd-HH-mm-ss')}",
"name": "S3BackupLocation",
"id": "S3BackupLocation",
"type": "S3DataNode"
},
{
"output": {
"ref": "S3BackupLocation"
},
"input": {
"ref": "DDBSourceTable"
},
"maximumRetries": "2",
"name": "TableBackupActivity",
"step": "s3://dynamodb-emr-us-west-2/emr-ddb-storage-handler/2.1.0/emr-ddb-2.1.0.jar,org.apache.hadoop.dynamodb.tools.DynamoDbExport,#{output.directoryPath},#{input.tableName},#{input.readThroughputPercent}",
"id": "TableBackupActivity",
"runsOn": {
"ref": "EmrClusterForBackup"
},
"type": "EmrActivity",
"resizeClusterBeforeRunning": "true"
},
{
"failureAndRerunMode": "CASCADE",
"schedule": {
"ref": "DefaultSchedule"
},
"resourceRole": "data_pipeline_etl_role",
"pipelineLogUri": "s3://MY_S3_BUCKET/",
"role": "data_pipeline_pipeline_role",
"scheduleType": "cron",
"name": "Default",
"id": "Default"
}
],
"parameters": [
{
"description": "Output S3 folder",
"id": "myOutputS3Loc",
"type": "AWS::S3::ObjectKey"
},
{
"description": "Source DynamoDB table name",
"id": "myDDBTableName",
"type": "String"
},
{
"default": "0.25",
"watermark": "Enter value between 0.1-1.0",
"description": "DynamoDB read throughput ratio",
"id": "myDDBReadThroughputRatio",
"type": "Double"
},
{
"default": "us-east-1",
"watermark": "us-east-1",
"description": "Region of the DynamoDB table",
"id": "myDDBRegion",
"type": "String"
}
],
"values": {
"myDDBRegion": "us-east-2",
"myDDBTableName": "prod--users",
"myDDBReadThroughputRatio": "0.25",
"myOutputS3Loc": "s3://MY_S3_BUCKET"
}
}
Upvotes: 1
Views: 813
Reputation: 14799
Is it a one off or something you want to do continuously? Could you use DynamoDB global tables to replicate the table in a supported region, then just remove the region once your backup is done?
Global table replication is free. You should just pay for the capacity on your replicated table whilst its up and running.
https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/GlobalTables.html
Upvotes: 1