Reputation: 1475
I have a collection with documents like this:
[
{
"user_id": 1,
"prefs": [
"item1",
"item2",
"item3",
"item4"
]
},
{
"user_id": 2,
"prefs": [
"item2",
"item5",
"item3"
]
},
{
"user_id": 3,
"prefs": [
"item4",
"item3",
"item7"
]
}
]
What I want is to write an aggregation which will get a user_id
and producer a list containing all users mapped to the number of same prefs
in their lists. for example if I run the aggregation for user_id = 1
, I have to get:
[
{
"user_id": 2,
"same": 1
},
{
"user_id": 3,
"same": 2
}
]
Upvotes: 4
Views: 613
Reputation: 50406
You cannot write any query here with input as simple as "user_id": 1
here, but you can retrieve the document for that user and then get a comparison of that data to the other documents you are retrieving:
var doc = db.collection.findOne({ "user_id": 1 });
db.collection.aggregate([
{ "$match": { "user_id": { "$ne": 1 } } },
{ "$project": {
"_id": 0,
"user_id": 1
"same": { "$size": { "$setIntersection": [ "$prefs", doc.prefs ] } }
}}
])
Which is one approach, but also not that much different to comparing each document in the client:
function intersect(a,b) {
var t;
if (b.length > a.length) t = b, b = a, a = t;
return a.filter(function(e) {
if (b.indexOf(e) != -1) return true;
});
}
var doc = db.collection.findOne({ "user_id": 1 });
db.collection.find({ "user_id": { "$ne": 1 } }).forEach(function(mydoc) {
printjson({
"user_id": mydoc.user_id,
"same": intersect(mydoc.prefs, doc.prefs).length
});
});
It's the same thing. You are not really "aggregating" anything here but just making comparisons of one documents content against the other. Of course you can ask the aggregation framework to do something like "filter" out anything that does not have a similar match:
var doc = db.collection.findOne({ "user_id": 1 });
db.collection.aggregate([
{ "$match": { "user_id": { "$ne": 1 } } },
{ "$project": {
"_id": 0,
"user_id": 1
"same": { "$size": { "$setIntersection": [ "$prefs", doc.prefs ] } }
}},
{ "$match": { "same": { "$gt": 0 } }}
])
Though actually that would be more efficient to remove any documents with a zero count before doing the projection:
var doc = db.collection.findOne({ "user_id": 1 });
db.collection.aggregate([
{ "$match": { "user_id": { "$ne": 1 } } },
{ "$redact": {
"$cond": {
"if": { "$gt": [
{ "$size": { "$setIntersection": [ "$prefs", doc.prefs ] } },
0
]},
"then": "$$KEEP",
"else": "$$PRUNE"
}
}},
{ "$project": {
"_id": 0,
"user_id": 1
"same": { "$size": { "$setIntersection": [ "$prefs", doc.prefs ] } }
}}
])
And at least then that would make some sense to do the server processing.
But otherwise, it's all pretty much the same, with possibly a "little" more overhead on the client working out the "intersection" here.
Upvotes: 5