Reputation: 10828
Using Lambda (node.js) - how to delete all the items in the Dynamodb table?
There are 500K rows in the table
I have tried using scan method and then loop through each item and then using delete method. It only allow up to 3000 rows only.
Code
exports.handler = function(context, callback) {
getRecords().then((data) => {
data.Items.forEach(function(item) {
deleteItem(item.Id).then((data1) => {
});
});
});
};
var deleteItem = function(id) {
var params = {
TableName: "TableName",
Key: {
"Id": id
},
};
return new Promise(function(resolve, reject) {
client.delete(params, function(err, data) {
if (err) {
reject(err);
} else {
resolve();
}
});
});
}
function getRecords() {
var params = {
TableName: 'TableName',
IndexName: 'Type-index',
KeyConditionExpression: 'Type = :ty',
ExpressionAttributeValues: {
':ty': "1"
},
ProjectionExpression: "Id",
};
return new Promise(function(resolve, reject) {
client.query(params, function(err, data) {
if (err) {
reject(err);
} else {
resolve(data);
}
});
});
}
Upvotes: 3
Views: 15144
Reputation: 3784
A Scan operation consumes Read capacity. Each Read returns up to 4 kb of data. When this limit is reached, the Scan returns only what it has found until there. If you need more, you need to issue another Scan request.
This, you'll need two loops: 1) loop to delete all records returned at each Scan; 2) loop to keep scanning multiple times, until you reach the end of the table
Make sure you use consistent Reads or wait 1 or 2 second(s) before issuing another Scan, otherwise you may get repeated items in different Scans.
exports.handler = function(context, callback) {
clearRecords();
};
clearRecords = function() {
getRecords().then((data) => {
data.Items.forEach(function(item) {
deleteItem(item.Id).then((data1) => {});
});
clearRecords(); // Will call the same function over and over
});
}
Observe that Lambda has a timeout limit of 15 minutes. Since you have 500K items in your table, it's likely that your Lambda will timeout and you'll need to trigger it more than once. You could also make your Lambda call itself after 14:50, for example, just take a look at the AWS SDK documentation for triggering Lambda functions. For this matter, you might also want to check the
getRemainingTimeInMillis()
method from the context object.
Upvotes: 3
Reputation: 101
There is already one right answer, but here is another code snippet to delete all records from Dynamo DB.
const AWS = require("aws-sdk");
AWS.config.update({
region: "us-east-1",
});
const docClient = new AWS.DynamoDB.DocumentClient();
const getAllRecords = async (table) => {
let params = {
TableName: table,
};
let items = [];
let data = await docClient.scan(params).promise();
items = [...items, ...data.Items];
while (typeof data.LastEvaluatedKey != "undefined") {
params.ExclusiveStartKey = data.LastEvaluatedKey;
data = await docClient.scan(params).promise();
items = [...items, ...data.Items];
}
return items;
};
const deleteItem = (table, id) => {
var params = {
TableName: table,
Key: {
id: id,
},
};
return new Promise(function (resolve, reject) {
docClient.delete(params, function (err, data) {
if (err) {
console.log("Error Deleting ", id,err);
reject(err);
} else {
console.log("Success Deleting ", id,err);
resolve();
}
});
});
};
exports.handler = async function (event, context, callback) {
try {
const tableName = "<table>";
// scan and get all items
const allRecords = await getAllRecords(tableName);
// delete one by one
for (const item of allRecords) {
await deleteItem(tableName, item.id);
}
callback(null, {
msg: "All records are deleted.",
});
} catch (e) {
callback(null, JSON.stringify(e, null, 2));
}
};
Upvotes: 7