Reputation: 303
var UserSchema = Schema (
{
android_id: String,
created: {type: Date, default:Date.now},
interests: [{ type: Schema.Types.ObjectId, ref: 'Interests' }],
});
Users.aggregate([
{ $match: {android_id: {$ne: userID}, interests: {$elemMatch: {$in: ids}} }},
{ $group: { _id: { android_id: '$android_id'},count: {$sum: 1}}},
{ $sort: {count: -1}},
{ $limit: 5 }],
I need the to find the top 5 android_ids of the users with the most interests in common with me (ids array). I can work with the array of only matched elements from the interests array too.
Upvotes: 1
Views: 593
Reputation: 50416
You seemed to be going along the right lines here but you do need to consider that arrays have special considerations for comparisons.
Your basic start here is to find all users that are not the current user, and that you also need at least the "interests" array of the current user as well. You seem to be doing that already, but for here let us consider that you have the whole user
object for the current user which will be used in the listing.
This makes your "top 5" basically a product of "Not me, and the most interests in common", which means you basically need to count the "overlap" of interests on each user compared to the current user.
This is basically the $setIntersection
of the two arrays or "sets" where the elements in common are returned. In order to count how many are in common, there is also the $size
operator. So you apply like this:
Users.aggregate(
[
{ "$match": {
"android_id": { "$ne": user.android_id },
"interests": { "$in": user.interests }
}},
{ "$project": {
"android_id": 1,
"interests": 1,
"common": {
"$size": {
"$setIntersection": [ "$interests", user.interests ]
}
}
}},
{ "$sort": { "common": -1 } },
{ "$limit": 5 }
],
function(err,result) {
}
);
The result returned in "common" is the count of common interests between the current user and the user being examined in the data. This data is then processed by $sort
in order to put the largest number of common interests on top, and then $limit
returns only the top 5.
If for some reason your MongoDB version is presently lower than MongoDB 2.6 where both the $setIntersection
and $size
operators are introduced, then you can still do this, but it just takes a longer form of processing the arrays.
Mainly you need to $unwind
the arrays and process each match individually:
{ "$match": {
"android_id": { "$ne": user.android_id },
"interests": { "$in": user.interests }
}},
{ "$unwind": "$interests" },
{ "$group": {
"_id": "$_id",
"android_id": { "$first": "$android_id" },
"interests": { "$push": "$interests" },
"common": {
"$sum": {
"$add": [
{ "$cond": [{ "$eq": [ "$interests", user.interests[0] ] },1,0 ] },
{ "$cond": [{ "$eq": [ "$interests", user.interests[1] ] },1,0 ] },
{ "$cond": [{ "$eq": [ "$interests", user.interests[2] ] },1,0 ] }
]
}
}
}},
{ "$sort": { "common": -1 }},
{ "$limit": 5 }
Which is more practically coded to generate the condtional matches in the pipeline:
var pipeline = [
{ "$match": {
"android_id": { "$ne": user.android_id },
"interests": { "$in": user.interests }
}},
{ "$unwind": "$interests" }
];
var group =
{ "$group": {
"_id": "$_id",
"android_id": { "$first": "$android_id" },
"interests": { "$push": "$interests" },
"common": {
"$sum": {
"$add": []
}
}
}};
user.interests.forEach(function(interest) {
group.$group.common.$sum.$add.push(
{ "$cond": [{ "$eq": [ "$interests", interest ] }, 1, 0 ] }
);
});
pipeline.push(group);
pipeline = pipeline.concat([
{ "$sort": { "common": -1 }},
{ "$limit": 5 }
])
User.aggregate(pipeline,function(err,result) {
});
The key elements there being that "both" the current user and the user being inspected have their "interests" separated out for comparison to see if they are "equal". The result from $cond
attributes a 1
where this is true or 0
where false.
Any returns ( and only ever expected to be 1
at best, per pair ) are passed to the $sum
accumulator which counts the matches in common. You can alternately $match
with an $in
condition again:
{ "$unwind": "$interests" },
{ "$match": { "interests": { "$in": user.interests } },
{ "$group": {
"_id": "$_id",
"android_id": { "$first": "$android_id" },
"common": { "$sum": 1 }
}}
But this is naturally destructive of the array content as non matches are filtered out. So it depends on what you would rather have in the response.
That is the basic process for getting the "common" counts for use in further processing like $sort
and $limit
in order to get your "top 5".
Just for fun, here is a basic node.js listing to show the effects of common matches: var async = require('async'), mongoose = require('mongoose'), Schema = mongoose.Schema;
mongoose.connect('mongodb://localhost/sample');
var interestSchema = new Schema({
name: String
});
var userSchema = new Schema({
name: String,
interests: [{ type: Schema.Types.ObjectId, ref: 'Interest' }]
});
var Interest = mongoose.model( 'Interest', interestSchema );
var User = mongoose.model( 'User', userSchema );
var interestHash = {};
async.series(
[
function(callback) {
async.each([Interest,User],function(model,callback) {
model.remove({},callback);
},callback);
},
function(callback) {
async.each(
[
"Tennis",
"Football",
"Gaming",
"Cooking",
"Yoga"
],
function(interest,callback) {
Interest.create({ name: interest},function(err,obj) {
if (err) callback(err);
interestHash[obj.name] = obj._id;
callback();
});
},
callback
);
},
function(callback) {
async.each(
[
{ name: "Bob", interests: ["Tennis","Football","Gaming"] },
{ name: "Tom", interests: ["Football","Cooking","Yoga"] },
{ name: "Sue", interests: ["Tennis","Gaming","Yoga","Cooking"] }
],
function(data,callback) {
data.interests = data.interests.map(function(interest) {
return interestHash[interest];
});
User.create(data,function(err,user) {
//console.log(user);
callback(err);
})
},
callback
);
},
function(callback) {
async.waterfall(
[
function(callback) {
User.findOne({ name: "Bob" },callback);
},
function(user,callback) {
console.log(user);
User.aggregate(
[
{ "$match": {
"_id": { "$ne": user._id },
"interests": { "$in": user.interests }
}},
{ "$project": {
"name": 1,
"interests": 1,
"common": {
"$size": {
"$setIntersection": [ "$interests", user.interests ]
}
}
}},
{ "$sort": { "common": -1 } }
],
function(err,result) {
if (err) callback(err);
Interest.populate(result,'interests',function(err,result) {
console.log(result);
callback(err);
});
}
);
}
],
callback
);
}
],
function(err) {
if (err) throw err;
//console.dir(interestHash);
mongoose.disconnect();
}
);
Which will output:
{ _id: 55dbd7be0e5516ac16ea62d1,
name: 'Bob',
__v: 0,
interests:
[ 55dbd7be0e5516ac16ea62cc,
55dbd7be0e5516ac16ea62cd,
55dbd7be0e5516ac16ea62ce ] }
[ { _id: 55dbd7be0e5516ac16ea62d3,
name: 'Sue',
interests:
[ { _id: 55dbd7be0e5516ac16ea62cc, name: 'Tennis', __v: 0 },
{ _id: 55dbd7be0e5516ac16ea62ce, name: 'Gaming', __v: 0 },
{ _id: 55dbd7be0e5516ac16ea62d0, name: 'Yoga', __v: 0 },
{ _id: 55dbd7be0e5516ac16ea62cf, name: 'Cooking', __v: 0 } ],
common: 2 },
{ _id: 55dbd7be0e5516ac16ea62d2,
name: 'Tom',
interests:
[ { _id: 55dbd7be0e5516ac16ea62cd, name: 'Football', __v: 0 },
{ _id: 55dbd7be0e5516ac16ea62cf, name: 'Cooking', __v: 0 },
{ _id: 55dbd7be0e5516ac16ea62d0, name: 'Yoga', __v: 0 } ],
common: 1 } ]
Upvotes: 3