Reputation: 794
I'm trying to implement a function which is called by API Gateway. It gets passed a email address+password, then checks if the Email address is already in use. If thats not the case it should be put in my dynamo DB table.
When testing it with an Email address which is already in use, the put operation is still executed, eventhough the boolean should be set to true.
'use strict';
var AWS = require('aws-sdk'),
uuid = require('uuid'),
documentClient = new AWS.DynamoDB.DocumentClient();
exports.handler = function(event, context, callback) {
if (event.body !== null && event.body !== undefined) {
let body = JSON.parse(event.body);
let eMailAddress = body.mail;
let password = body.password;
var EmailInUse = Boolean(false);
var paramsScan = {
TableName: "accounts"
};
documentClient.scan(paramsScan, function(err, data) {
for (var i in data.Items) {
i = data.Items;
if (i.EmailAddress == eMailAddress) {
console.log("already used");
callback(err, "Email Address already in Use!");
EmailInUse = true;
}
}
});
console.log(EmailInUse);
if (EmailInUse == false) {
console.log("should not enter if email used");
var params = {
Item: {
"AccountID": uuid.v1(),
"Password": password,
"EmailAddress": eMailAddress
},
TableName: "accounts"
};
documentClient.put(params, function(err, data) {
if (err) {
callback(err, null);
} else {
const response = {
statusCode: "200",
"headers": {},
body: JSON.stringify(params),
"isBase64Encoded": "false"
};
callback(null, response);
}
});
}
}
};
this is my Cloudwatch log for calling it 2 times with the same parameters:
12:54:01
START RequestId: 281b0eda-950b-40fc-a2e2-d326cd04f8a4 Version: $LATEST
12:54:01
2019-02-26T12:54:01.434Z 281b0eda-950b-40fc-a2e2-d326cd04f8a4 false
12:54:01
2019-02-26T12:54:01.471Z 281b0eda-950b-40fc-a2e2-d326cd04f8a4 should not enter if email used
12:54:01
END RequestId: 281b0eda-950b-40fc-a2e2-d326cd04f8a4
12:54:01
REPORT RequestId: 281b0eda-950b-40fc-a2e2-d326cd04f8a4 Duration: 320.98 ms Billed Duration: 400 ms Memory Size: 128 MB Max Memory Used: 31 MB
12:54:47
START RequestId: b9df94ce-0d59-4dfb-8b61-8098db566431 Version: $LATEST
12:54:47
2019-02-26T12:54:47.591Z b9df94ce-0d59-4dfb-8b61-8098db566431 false
12:54:47
2019-02-26T12:54:47.591Z b9df94ce-0d59-4dfb-8b61-8098db566431 should not enter if email used
12:54:47
2019-02-26T12:54:47.812Z b9df94ce-0d59-4dfb-8b61-8098db566431 already used
12:54:47
END RequestId: b9df94ce-0d59-4dfb-8b61-8098db566431
12:54:47
REPORT RequestId: b9df94ce-0d59-4dfb-8b61-8098db566431 Duration: 311.87 ms Billed Duration: 400 ms Memory Size: 128 MB Max Memory Used: 31 MB
Looking at this i notice that the last log output "already used" is called after the check if the Email address is already in use. Can somebody tell me how to resolve this problem? Many thanks in advance.
Upvotes: 0
Views: 419
Reputation: 7215
@ttulka's answer is very accurate.
I'd like to add something on top of his answer, though:
Your code can still fail even after having the callbacks - or async/await - sorted out. And why is that?
DynamoDB is a distributed system. Distributed systems, by nature, tend to use eventual consistency at its core, and that's exactly what DynamoDB, by default, does.
This means that after you fix your code with @ttulka's snippet, you can still fall under eventual consistency issues. If you want to be absolutely sure that you read the most up-to-date values from your Tables, you must use the ConsistentRead attribute in your queries.
Keep in mind that these replications that DynamoDB run are usually lightning fast (they will take only a couple of hundreds of milliseconds most of the time), but you may eventually fall into some grey area and then you'd wonder why your code didn't work.
For your use case (checking existing e-mails) it shouldn't matter, because it's very unlikely two people will want to register with the same e-mail at nearly the same time. But make sure when working with critical data (like bank accounts), you should always favour ConsistentReads. They cost twice as much when compared to EventualConsistentReads though.
Also, pay attention to Thomas Edwards's answer: Scan operations are extremely expensive (both performance and cost wise). You should avoid them at all costs and use Global Secondary Indexes instead.
Hope this helps!
EDIT: Fixed ttulka's nickname after he pointed it out :)
Upvotes: 2
Reputation: 10882
The problem is simply in synchronization.
The function documentClient.scan
uses a callback in your case. It means, the following code (console.log(EmailInUse);
and so on) is called before the callback is executed.
You can put everything into the callback, or use async/await
as AWS Lambda supports Node.js 8.10:
var AWS = require('aws-sdk'),
uuid = require('uuid'),
documentClient = new AWS.DynamoDB.DocumentClient();
exports.handler = async event => {
if (!event.body) return httpResponse(400, 'body is missing!');
try {
let body = JSON.parse(event.body);
let eMailAddress = body.mail;
let password = body.password;
var EmailInUse = Boolean(false);
var paramsScan = {
TableName: "accounts"
};
const data = await documentClient.scan(paramsScan).promise();
for (var i in data.Items) {
i = data.Items;
if (i.EmailAddress == eMailAddress) {
console.log("already used");
// you can just return here:
//return httpResponse(200, "Email Address already in Use!");
EmailInUse = true;
}
}
console.log(EmailInUse);
if (EmailInUse == false) {
console.log("should not enter if email used");
var params = {
Item: {
"AccountID": uuid.v1(),
"Password": password,
"EmailAddress": eMailAddress
},
TableName: "accounts"
};
await documentClient.put(params).promise();
return httpResponse(200, JSON.stringify(params));
}
} catch (err) {
return httpResponse(500, JSON.stringify(err));
}
};
function httpResponse(statusCode, body) {
return {
statusCode,
body,
"isBase64Encoded": "false"
};
}
You can just finish the process when the email address is found, then you can get rid of the EmailInUse
variable whatsoever - it makes your code shorter, simplier and easier to reason about.
Upvotes: 4
Reputation: 13667
Scans are incredibly expensive, and as your site grows this will be very inefficent.
Also bear in mind that it can take some time for DynamoDB to save the record, which is why you might be able to get through.
You should use an index on EmailAddress
in DynamoDB if you want to search by it often and quickly, or find a different way of checking for duplications. I have a seperate cached index of registered emails to check against for speed.
Upvotes: 1