Reputation: 916
I have documents in the format of
{
"_id": <some_id>,
"code": <some_code>,
"manually_updated": {
"code": <some_code>
}
}
I would like to find duplicates (group documents) by looking into root code
value but also in manually_updated.code
field. So the following three documents would be seen as duplicates (second document code
was "overwritten" by adding the code to manually_updated
with the same code
as the first and third document):
{
{
"_id" : ObjectId("5d2dc168651ce400a327b408"),
"code": 'ABCD',
"manually_updated": {}
},
{
"_id" : ObjectId("5d40861411981f0068e22511"),
"code": 'EFGH',
"manually_updated": {
"code": "ABCD"
}
},
{
"_id" : ObjectId("5d41374311981f0163779b79"),
"code": 'ABCD',
"manually_updated": {}
}
}
Thank you.
Upvotes: 1
Views: 90
Reputation: 17915
Please try this :
db.getCollection('yourCollection').aggregate([{
$lookup:
{
from: "yourCollection",
let: { codeToBeCompared: "$code", manualCode: '$manually_updated.code' },
pipeline: [
{
$match:
{
$expr:
{
$or:
[
{ $eq: ["$code", "$$codeToBeCompared"] },
{ $eq: ["$manually_updated.code", "$$codeToBeCompared"] },
{ $and: [{ $gt: ['$manually_updated', {}] }, { $eq: ["$manually_updated.code", '$$manualCode'] }] }
]
}
}
}
],
as: "data"
}
}, { $group: { _id: '$code', manually_updated: { $push: '$manually_updated' }, finalData: { $first: '$$ROOT' } } }, { $match: { $expr: { $gt: [{ $size: "$finalData.data" }, 1] } } },
{ $project: { 'manually_updated': 1, 'data': '$finalData.data' } }])
Sample Docs :
/* 1 */
{
"_id" : ObjectId("5d2dc168651ce400a327b408"),
"code" : "ABCD",
"manually_updated" : {}
}
/* 2 */
{
"_id" : ObjectId("5d40861411981f0068e22511"),
"code" : "EFGH",
"manually_updated" : {
"code" : "ABCD"
}
}
/* 3 */
{
"_id" : ObjectId("5d41374311981f0163779b79"),
"code" : "ABCD",
"manually_updated" : {}
}
/* 4 */
{
"_id" : ObjectId("5d518a3ce8078d6134c4cd21"),
"code" : "APPPP",
"manually_updated" : {}
}
/* 5 */
{
"_id" : ObjectId("5d518a3ce8078d6134c4cd22"),
"code" : "APPPP",
"manually_updated" : {
"code" : "ABCD"
}
}
/* 6 */
{
"_id" : ObjectId("5d518a3ce8078d6134c4cd23"),
"code" : "APPPP",
"manually_updated" : {}
}
/* 7 */
{
"_id" : ObjectId("5d518a3ce8078d6134c4cd24"),
"code" : "deffffff",
"manually_updated" : {}
}
Output :
/* 1 */
{
"_id" : "APPPP",
"manually_updated" : [ // Preserving this to say we've passed thru these values
{},
{
"code": "ABCD"
},
{}
],
"data" : [
{
"_id": ObjectId("5d518a3ce8078d6134c4cd21"),
"code": "APPPP",
"manually_updated": {}
},
{
"_id": ObjectId("5d518a3ce8078d6134c4cd22"),
"code": "APPPP",
"manually_updated": {
"code": "ABCD"
}
},
{
"_id": ObjectId("5d518a3ce8078d6134c4cd23"),
"code": "APPPP",
"manually_updated": {}
}
]
}
/* 2 */
{
"_id" : "EFGH",
"manually_updated" : [
{
"code": "ABCD"
}
],
"data" : [
{
"_id": ObjectId("5d40861411981f0068e22511"),
"code": "EFGH",
"manually_updated": {
"code": "ABCD"
}
}
]
}
/* 3 */
{
"_id" : "ABCD",
"manually_updated" : [
{},
{}
],
"data" : [
{
"_id": ObjectId("5d2dc168651ce400a327b408"),
"code": "ABCD",
"manually_updated": {}
},
{
"_id": ObjectId("5d40861411981f0068e22511"),
"code": "EFGH",
"manually_updated": {
"code": "ABCD"
}
},
{
"_id": ObjectId("5d41374311981f0163779b79"),
"code": "ABCD",
"manually_updated": {}
},
{
"_id": ObjectId("5d518a3ce8078d6134c4cd22"),
"code": "APPPP",
"manually_updated": {
"code": "ABCD"
}
}
]
}
Also this does scan on everything, you can have $match
as first stage to filter documents based on a particular code.
Upvotes: 1