Chris Rutherford
Chris Rutherford

Reputation: 1672

DeDuplicating array of Objects javascript

Working on a project that is taking in 5 similar SQL databases, and I need to detect and filter out duplicates. I think I'm on the right track, but I'm not quite there yet. I am attempting to follow these steps to accomplish this:

  1. start a .forEach() for the main array passing in an item object.

  2. create a filtered array via let filtered = Array.filter(x => x.id !== item.id); to keep from checking against itself.

  3. start a .forEach() for the filtered array passing in comparison as the parameter.

  4. initialize variables for similarity in Name, Phone, and Email fields.(i.e.nameSimilarity, phoneSimilarity, and emailSimilarity)

  5. If item.email and comparison.email aren't empty, compare the strings and store the similarity percentage in emailSimilarity else emailSimilarity=0.

  6. If item.phone and comparison.phone aren't empty, compare the strings and store the similarity percentage in phoneSimilarity else phoneSimilarity=0.

  7. Combine item.firstName and item.lastName into an variable called itemFullName and combine comparison.firstName and comparison.lastName into a variable called comparisonFullName.

  8. If itemFullName and comparisonFullName aren't empty, compare the strings and store the similarity percentage in nameSimilarity else nameSimilarity=0.

  9. if any of the percentages in emailSimilarity, nameSimilarity, or phoneSimilarity, add item plus the similarity variables and comparison.id to the duplicates array, and splice it out of the original array.

    This is the code that I've written to follow these steps, but it appears that I'm getting duplicate entries in the duplicates array. I'm not sure why it's not working as expected, but I have a hunch that I can't really expect the original array to mutate inside the forEach() operation.

    fullArray.forEach(item => {
        let filtered = fullArray.filter(x => x.externalId !== item.externalId);
        filtered.forEach(comparison => {
            let emailSimilarity, phoneSimilarity, nameSimilarity;
            if ((item.email !== '') && (comparison.email !== '')) {
                emailSimilarity = strcmp.jaro(item.email, comparison.email);
            } else {
                emailSimilarity = 0;
            }
            if ((item.phone !== '') && (comparison.phone !== '')) {
                phoneSimilarity = strcmp.jaro(item.phone, comparison.phone);
            } else {
                phoneSimilarity = 0;
            }
            let itemFullName = `${item.firstName} ${item.LastName}`.trim() || '';
            let comparisonFullName = `${comparison.firstName} ${comparison.LastName}`.trim();
            if (((itemFullName !== '') && (comparisonFullName !== '')) || ((itemFullName.indexOf('Group')! > 0) && (comparisonFullName.indexOf('Group') !>0))) {
                nameSimilarity = strcmp.jaro(itemFullName, comparisonFullName);
            } else {
                nameSimilarity = 0;
            }
            if ((emailSimilarity || phoneSimilarity || nameSimilarity) > 0.89) {
    
            let dupesOutput = Object.assign({}, item, { similarName: nameSimilarity, similarEmail: emailSimilarity, similarPhone: phoneSimilarity, similarTo: comparison.externalId });
            dupes.push(dupesOutput);
            fullArray = fullArray.filter(x => x.externalId !== item.externalId);
        }
    });
    

    });

Where's the issue?

Upvotes: 0

Views: 82

Answers (1)

Manuel Otto
Manuel Otto

Reputation: 6540

Assuming the similarity check is working, the problem is that you're reassigning a new array to fullArray while still being in the forEach loop of the old one.

I'd suggest you use Array.filter:

var filteredArray = fullArray.filter(item => {
    return !fullArray.some(comparison => {
        if(comparison.externalId==item.externalId) 
            return false;

        let emailSimilarity, phoneSimilarity, nameSimilarity;
        if ((item.email !== '') && (comparison.email !== '')) {
            emailSimilarity = strcmp.jaro(item.email, comparison.email);
        } else {
            emailSimilarity = 0;
        }
        if ((item.phone !== '') && (comparison.phone !== '')) {
            phoneSimilarity = strcmp.jaro(item.phone, comparison.phone);
        } else {
            phoneSimilarity = 0;
        }
        let itemFullName = `${item.firstName} ${item.LastName}`.trim() || '';
        let comparisonFullName = `${comparison.firstName} ${comparison.LastName}`.trim();
        if (((itemFullName !== '') && (comparisonFullName !== '')) || ((itemFullName.indexOf('Group')! > 0) && (comparisonFullName.indexOf('Group') !>0))) {
            nameSimilarity = strcmp.jaro(itemFullName, comparisonFullName);
        } else {
            nameSimilarity = 0;
        }
        if ((emailSimilarity || phoneSimilarity || nameSimilarity) > 0.89) {
            let dupesOutput = Object.assign({}, item, { similarName: nameSimilarity, similarEmail: emailSimilarity, similarPhone: phoneSimilarity, similarTo: comparison.externalId });
            dupes.push(dupesOutput);
            return true;
        }else
            return false;
    });
});

Upvotes: 2

Related Questions