iamjustcoder
iamjustcoder

Reputation: 4852

Get Percentage Matched property in percentage using MongoDB

I have a use case which i need to input few list of properties which gives the result searches in mongodb which matches 90% property with all data. For example: I have data like

[{
    id: 1,
    properties: {
        'property1': 'value1',
        'property2': 'value2',
        'property3': 'value3',
        'property4': 'value4',
        'property5': 'value5'
    }
},
{
    id: 2,
    properties: {
        'property1': 'value1',
        'property2': 'value2',
        'property6': 'value6',
        'property7': 'value7',
        'property8': 'value8',
        'property9': 'value9'
    }
},
{
    id: 3,
    properties: {
        'property9': 'value9'
    }
}]

I would like to search for couple of property and i am looking for atleast 50% matches which is atleast 1 property should match with any of the two input Input Query

find
{
    'property3', 'value3',
    'property7', 'value7'
}

From the above input, it matches with 2 data out of 3 data in DB. Is that possible write query with MongoDB?

Upvotes: 0

Views: 1006

Answers (1)

dnickless
dnickless

Reputation: 10918

Here is something that should get you going (you'll need a newer version of MongoDB, though, at least v3.4.4):

db.collection.aggregate({
    $addFields: { // we add a helper field
        "propertiesAsArray": { // called "propertiesAsArray"
            $objectToArray: "$properties" // which will be the array representation of the "properties" field
        }
    }
}, {
    $addFields: {
        "matchRatio": { // the value we are looking for is...
            $divide: [ { // ...the ratio between...
                $size: { // ...the number of items...
                    $filter: {
                        "input": "$propertiesAsArray", // ...in our magic property array...
                        "as": "this",
                        "cond": { // ...that match any of our conditions...
                            $or: [
                                { $eq: [ "$$this", { "k": "property3", "v": "value3" } ] },
                                { $eq: [ "$$this", { "k": "property7", "v": "value7" } ] },
                                // ...of which we could have any number...
                            ] 
                        } 
                    }
                }
            }, 2 ] // ...and the number of properties in your query
        }
    }
}, {
    $facet: { // create two separate stages
        "totalNumberOfDocs": [{
            $count: "count" // one just calculates the total number of documents
        }],
        matchingDocs: [{ // and the second stage first filters out the non-matching documents
            $match: {
                "matchRatio": { $gte: 0.50 } // we only want documents with a match ratio >= 50%
            }
        }, {
            $count: "count" // and then counts the remaining ones
        }]
    }
}, {
    $project: { // final calculation of the ratio ("2 out of 3" in your example --> 66%)
        "percentage": {
            $divide: [
                { $arrayElemAt: [ "$matchingDocs.count", 0 ] },
                { $arrayElemAt: [ "$totalNumberOfDocs.count", 0 ] }
            ]
        }
    }
})

If you are just looking for the matching documents and don't care too much about the 66% figure then this should work (untested because I'm on the way).

db.collection.aggregate({
    $addFields: { // we add a helper field
        "propertiesAsArray": { // called "propertiesAsArray"
            $objectToArray: "$properties" // which will be the array representation of the "properties" field
        }
    }
}, {
    $addFields: {
        "matchRatio": { // the value we are looking for is...
            $divide: [ { // ...the ratio between...
                $size: { // ...the number of items...
                    $filter: {
                        "input": "$propertiesAsArray", // ...in our magic property array...
                        "as": "this",
                        "cond": { // ...that match any of our conditions...
                            $or: [
                                { $eq: [ "$$this", { "k": "property3", "v": "value3" } ] },
                                { $eq: [ "$$this", { "k": "property7", "v": "value7" } ] },
                                // ...of which we could have any number...
                            ] 
                        } 
                    }
                }
            }, 2 ] // ...and the number of properties in your query
        }
    }
}, {
    $match: {
        "matchRatio": { $gte: 0.50 } // we only want documents with a match ratio >= 50%
    }
})

Upvotes: 2

Related Questions