Reputation: 850
I have a user collection with following data
[
{
"user_id": "5625c95ac2d34f27148b64fa",
"friend_id": "561f40bac2d34f17148b462c"
},
{
"user_id": "562744ccc2d34f27148b6eb7",
"friend_id": "561f40bac2d34f17148b462c"
},
{
"user_id": "56248eb9c2d34f2f148b5a18",
"friend_id": "561f40bac2d34f17148b462c"
},
{
"user_id": "561f40bac2d34f17148b462c",
"friend_id": "561f3e06c2d34f27148b45f6"
},
{
"user_id": "561f40bac2d34f17148b462c",
"friend_id": "5620de97c2d34f2f148b578f"
},
{
"user_id": "56276b52c2d34f27148b7128",
"friend_id": "561f40bac2d34f17148b462c"
},
{
"user_id": "561f40bac2d34f17148b462c",
"friend_id": "56276b52c2d34f27148b7128"
}
]
i need to fetch the documents in which combination of user_id
and friend_id
not repeated. i.e in the above example last two documents user_id
repeated in friend_id
of next document.
I tried with mongo aggrigate and group by but could not reduce it.
Upvotes: 3
Views: 1292
Reputation: 50416
In order to do this you basically need to combine both user_id
and friend_id
values in a uniquely sorted combination. This means creating an array for each document with those members and sorting that array so that the order is always the same.
Then you can $group
on that sorted array content to see which documents contain that same combination and then only return those that do not share that same combination.
This leads to this aggregate statement:
db.collection.aggregate([
{ "$project": {
"user_id": 1,
"friend_id": 1,
"combined": {
"$map": {
"input": ["A","B"],
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el", "A" ] },
"$user_id",
"$friend_id"
]
}
}
}
}},
{ "$unwind": "$combined" },
{ "$sort": { "combined": 1 } },
{ "$group": {
"_id": "$_id",
"combined": { "$push": "$combined" },
"user_id": { "$first": "$user_id" },
"friend_id": { "$first": "$friend_id" }
}},
{ "$group": {
"_id": "$combined",
"docs": { "$push": {
"_id": "$_id",
"user_id": "$user_id",
"friend_id": "$friend_id"
}}
}},
{ "$redact": {
"$cond": {
"if": { "$ne": [{ "$size": "$docs" }, 1] },
"then": "$$PRUNE",
"else": "$$KEEP"
}
}}
])
The PHP translation for laravel means to need to access the raw collection object from the manager, where "collection" is the actual name of the collection in MongoDB:
$result = DB::collection("collection")->raw(function($collection) {
return $collection->aggregate(
array(
array(
'$project' => array(
'user_id' => 1,
'friend_id' => 1,
'combined' => array(
'$map' => array(
'input' => array("A","B"),
'as' => 'el',
'in' => array(
'$cond' => array(
array( '$eq' => array( '$el', 'A' ) ),
'$user_id',
'$friend_id'
)
)
)
)
)
),
array( '$unwind' =>'$combined' ),
array( '$sort' => array( 'combined' => 1 ) ),
array(
'$group' => array(
'_id' => '$_id',
'combined' => array( '$push' => '$combined' ),
'user_id' => array( '$first' => '$user_id' ),
'friend_id' => array( '$first' => '$friend_id' )
)
),
array(
'$group' => array(
'_id' => '$combined',
'docs' => array(
'$push' => array(
'_id' => '$_id',
'user_id' => '$user_id',
'friend_id' => 'friend_id'
)
)
)
),
array(
'$redact' => array(
'$cond' => array(
'if' => array( '$ne' => array( array( '$size' => '$docs'), 1) ),
'then' => '$$PRUNE',
'else' => '$$KEEP'
)
)
)
)
);
});
Or if your MongoDB version is less than 2.6, and you lack operators like $map
and $redact
, then you can still do this, but not as efficiently:
$result = DB::collection("collection")->raw(function($collection) {
return $collection->aggregate(
array(
array(
'$project' => array(
'user_id' => 1,
'friend_id' => 1,
'type' => array( '$const' => array( 'A', 'B' ) )
)
),
array( '$unwind' => '$type' ),
array(
'$group' => array(
'_id' => '$_id',
'user_id' => array( '$first' => '$user_id' ),
'friend_id' => array( '$first' => '$friend_id' ),
'combined' => array(
'$push' => array(
'$cond' => array(
array( '$eq' => array( '$type', 'A' ) ),
'$user_id',
'$friend_id'
)
)
)
)
)
array( '$unwind' =>'$combined' ),
array( '$sort' => array( 'combined' => 1 ) ),
array(
'$group' => array(
'_id' => '$_id',
'combined' => array( '$push' => '$combined' ),
'user_id' => array( '$first' => '$user_id' ),
'friend_id' => array( '$first' => '$friend_id' )
)
),
array(
'$group' => array(
'_id' => '$combined',
'docs' => array(
'$push' => array(
'_id' => '$_id',
'user_id' => '$user_id',
'friend_id' => 'friend_id'
)
),
'count' => array( '$sum' => 1 )
)
),
array( '$match' => array( 'count' => 1 ) )
)
);
});
Where the first three stages mimic what the first stage is doing in the first example listing by putting both values in a single array. Of course the last two stages by "counting" the array members while grouping and then filtering out anything that does not have a "count" of 1
.
In either case this leaves you with output that only lists the documents where that combination does not occur in either order:
{
"_id" : [ "561f40bac2d34f17148b462c", "5625c95ac2d34f27148b64fa" ],
"docs" : [
{
"_id" : ObjectId("56306f6cd2387ad4c95b0cc9"),
"user_id" : "5625c95ac2d34f27148b64fa",
"friend_id" : "561f40bac2d34f17148b462c"
}
]
}
{
"_id" : [ "561f3e06c2d34f27148b45f6", "561f40bac2d34f17148b462c" ],
"docs" : [
{
"_id" : ObjectId("56306f6cd2387ad4c95b0ccc"),
"user_id" : "561f40bac2d34f17148b462c",
"friend_id" : "561f3e06c2d34f27148b45f6"
}
]
}
{
"_id" : [ "561f40bac2d34f17148b462c", "56248eb9c2d34f2f148b5a18" ],
"docs" : [
{
"_id" : ObjectId("56306f6cd2387ad4c95b0ccb"),
"user_id" : "56248eb9c2d34f2f148b5a18",
"friend_id" : "561f40bac2d34f17148b462c"
}
]
}
{
"_id" : [ "561f40bac2d34f17148b462c", "5620de97c2d34f2f148b578f" ],
"docs" : [
{
"_id" : ObjectId("56306f6cd2387ad4c95b0ccd"),
"user_id" : "561f40bac2d34f17148b462c",
"friend_id" : "5620de97c2d34f2f148b578f"
}
]
}
{
"_id" : [ "561f40bac2d34f17148b462c", "562744ccc2d34f27148b6eb7" ],
"docs" : [
{
"_id" : ObjectId("56306f6cd2387ad4c95b0cca"),
"user_id" : "562744ccc2d34f27148b6eb7",
"friend_id" : "561f40bac2d34f17148b462c"
}
]
}
You can pretty up the output, but this serves the purpose of showing the ordered combination used along with the original document data.
Upvotes: 3