Reputation: 1672
Working on a project that is taking in 5 similar SQL databases, and I need to detect and filter out duplicates. I think I'm on the right track, but I'm not quite there yet. I am attempting to follow these steps to accomplish this:
start a .forEach()
for the main array passing in an item
object.
create a filtered array via let filtered = Array.filter(x => x.id !== item.id);
to keep from checking against itself.
start a .forEach()
for the filtered array passing in comparison
as the parameter.
initialize variables for similarity in Name, Phone, and Email fields.(i.e.nameSimilarity
, phoneSimilarity
, and emailSimilarity
)
If item.email
and comparison.email
aren't empty, compare the strings and store the similarity percentage in emailSimilarity
else emailSimilarity=0
.
If item.phone
and comparison.phone
aren't empty, compare the strings and store the similarity percentage in phoneSimilarity
else phoneSimilarity=0
.
Combine item.firstName
and item.lastName
into an variable called itemFullName
and combine comparison.firstName
and comparison.lastName
into a variable called comparisonFullName
.
If itemFullName
and comparisonFullName
aren't empty, compare the strings and store the similarity percentage in nameSimilarity
else nameSimilarity=0
.
if any of the percentages in emailSimilarity
, nameSimilarity
, or phoneSimilarity
, add item
plus the similarity variables and comparison.id
to the duplicates array, and splice it out of the original array.
This is the code that I've written to follow these steps, but it appears that I'm getting duplicate entries in the duplicates array. I'm not sure why it's not working as expected, but I have a hunch that I can't really expect the original array to mutate inside the forEach()
operation.
fullArray.forEach(item => {
let filtered = fullArray.filter(x => x.externalId !== item.externalId);
filtered.forEach(comparison => {
let emailSimilarity, phoneSimilarity, nameSimilarity;
if ((item.email !== '') && (comparison.email !== '')) {
emailSimilarity = strcmp.jaro(item.email, comparison.email);
} else {
emailSimilarity = 0;
}
if ((item.phone !== '') && (comparison.phone !== '')) {
phoneSimilarity = strcmp.jaro(item.phone, comparison.phone);
} else {
phoneSimilarity = 0;
}
let itemFullName = `${item.firstName} ${item.LastName}`.trim() || '';
let comparisonFullName = `${comparison.firstName} ${comparison.LastName}`.trim();
if (((itemFullName !== '') && (comparisonFullName !== '')) || ((itemFullName.indexOf('Group')! > 0) && (comparisonFullName.indexOf('Group') !>0))) {
nameSimilarity = strcmp.jaro(itemFullName, comparisonFullName);
} else {
nameSimilarity = 0;
}
if ((emailSimilarity || phoneSimilarity || nameSimilarity) > 0.89) {
let dupesOutput = Object.assign({}, item, { similarName: nameSimilarity, similarEmail: emailSimilarity, similarPhone: phoneSimilarity, similarTo: comparison.externalId });
dupes.push(dupesOutput);
fullArray = fullArray.filter(x => x.externalId !== item.externalId);
}
});
});
Where's the issue?
Upvotes: 0
Views: 82
Reputation: 6540
Assuming the similarity check is working, the problem is that you're reassigning a new array to fullArray
while still being in the forEach
loop of the old one.
I'd suggest you use Array.filter
:
var filteredArray = fullArray.filter(item => {
return !fullArray.some(comparison => {
if(comparison.externalId==item.externalId)
return false;
let emailSimilarity, phoneSimilarity, nameSimilarity;
if ((item.email !== '') && (comparison.email !== '')) {
emailSimilarity = strcmp.jaro(item.email, comparison.email);
} else {
emailSimilarity = 0;
}
if ((item.phone !== '') && (comparison.phone !== '')) {
phoneSimilarity = strcmp.jaro(item.phone, comparison.phone);
} else {
phoneSimilarity = 0;
}
let itemFullName = `${item.firstName} ${item.LastName}`.trim() || '';
let comparisonFullName = `${comparison.firstName} ${comparison.LastName}`.trim();
if (((itemFullName !== '') && (comparisonFullName !== '')) || ((itemFullName.indexOf('Group')! > 0) && (comparisonFullName.indexOf('Group') !>0))) {
nameSimilarity = strcmp.jaro(itemFullName, comparisonFullName);
} else {
nameSimilarity = 0;
}
if ((emailSimilarity || phoneSimilarity || nameSimilarity) > 0.89) {
let dupesOutput = Object.assign({}, item, { similarName: nameSimilarity, similarEmail: emailSimilarity, similarPhone: phoneSimilarity, similarTo: comparison.externalId });
dupes.push(dupesOutput);
return true;
}else
return false;
});
});
Upvotes: 2