Reputation: 768
I am working on a small application running on node.js, which connects to mongodb through Mongoose ORM. One of the Models is a Person model Model Schema:
{
id : Number,
name : String
concatVals : String
}
Example:
[
{
id : 1,
name : 'jerry'
friends : 'adam#peter#robert#steven'
},
{
id : 2,
name : 'tony'
friends : 'richard#robert#steven'
},
{
id : 3,
name : 'mike'
friends : 'henry#steven#jerry#adam#tony'
},
{
id : 4,
name : 'peter'
friends : 'jerry#bill#bobby#steven#mike#paul'
}
]
As you can see, friends field is basically a String containing names separated by '#'. There is an important reason why friends field exists as a string, not an array. So we cannot change its type or structure. This 'friends list' is actually much longer in real database. As you can see, most of these objects will have intersecting friends list (steven appears in multiple documents).
Goal: I need to figure out the way to efficiently split the friends field in each document, turn it into an array and have a list of all distinct populated friends of subset of people. So basically the result I want to get when asking for 'tony' and 'mike' persons:
[
{
name : jerry,
id : 1,
friends : 'adam#peter#robert#steven'
},
{
name : tony,
id : 2,
friends : 'richard#robert#steven'
},
{
richard ...
},
{
henry ...
},
{
steven ...
},
{
robert ...
},
{
adam ...
}
] // POPULATED friends of tony and mike
The thing is that the amount of data is HUGE, so I want to move as much computation as possible to database side, carrying minimum data processing on server side. My solution so far looks like this:
Person.mapReduce({
map: function() {
emit(this.name, this.friends.split('#'));
},
reduce: function(key, values) {
return values;
},
query: {
name: {
$in: ['tony', 'mike']
}
},
out: 'friends_output'
}, // at this point we have docs with friends String splitted into array
function(err, mapReduceObject) {
mapReducePipeline.aggregate(
{ $unwind: '$value'},
{
$group: {_id: '$value'} // distinct friend docs
},
{
// combining all distinct friends
$group: {
_id: null,
allValues: { $addToSet: '$_id'}
}
},
function(err, data) {
console.log(data[0].allValues)
// here I get the list of names, not populated docs
});
});
This way I am partially fulfilling my goal: I am able to get all distinct friends of 'tony' and 'mike'. But I want those friends to be populated and I can't find a good way to populate them during mapreduce. Of course, I can make another DB call inside function(err, data) and get Persons using names in query
...
},
function(err, data) {
Persons.find({name : data[0].allValues},
function(err, friends){
console.log(friends);
}
);
});
but that sums up to total 3 DB calls during this procedure: - mapReduce - aggregation - search query
This last .find() call keeps bothering me. Do you see any way to populate friends inside/during mapreduce or aggregate? If you have radically different solution to my problem, please share.
Upvotes: 0
Views: 1603
Reputation: 3503
Why not use an array? If you do, you can use all kinds of neat tricks in mongo to deal with your data (for example, finding a value in array with "field":"value".) If you need the data in that hashed format, you can just join it on get using a virtual getter to hash them together, rather than the other way around, and your data will more closely reflect it's model. Since this all defines a relationship, populate might also be appropriate, but might make things more obtuse. Here is an example, where "friend" is a 1-way relationship, like "following". I am using async so all the stuff is saved in the correct order.
var async = require('async');
// return all unique valuesin an Array.filter
var filterUnique = function(value, index, self) { return self.indexOf(value) === index; };
var PersonSchema = new mongoose.Schema({
'name': String,
'_friends': [{ type: mongoose.Schema.Types.ObjectId, ref: 'Person' }]
});
PersonSchema.virtual('friends').get(function () {
return this['_friends'].map(function(f){ return f.name; }).join('#');
});
PersonSchema.methods.addFriend = function (friend) {
this['_friends'] = this['_friends'] || [];
this['_friends'].push(friend);
this['_friends'] = this['_friends'].filter(filterUnique);
}
var Person = mongoose.model('Person', PersonSchema);
function generatePeople(cb){
var generatePerson = function(name, cb){
Person({"name": name}).save(cb);
}
async.map(['Paul', 'Peter', 'Mary', 'Emily', 'David', 'Christy'], generatePerson, cb);
}
function addFriendsPaul(cb){
Person.findOne({"name":"Paul"}, function(err, Paul){
var addFriend = function(person, cb){
person.addFriend(Paul);
person.save(cb);
// paul adds them back
Paul.addFriend(person);
Paul.save();
}
Person.find({"name":{"$ne":"Paul"}}, function(err, people){
async.map(people, addFriend, cb);
});
});
}
function addFriendsDavid(cb){
Person.findOne({"name":"David"}, function(err, David){
var addFriend = function(person, cb){
person.addFriend(David);
person.save(cb);
}
Person.find({"name":{"$ne":"David"}}, function(err, people){
async.map(people, addFriend, cb);
});
});
}
async.series([
generatePeople,
addFriendsPaul,
addFriendsDavid,
function(){
Person.findOne({"name":"Paul"})
.populate('_friends')
.exec(function(err, Paul){
console.log('Paul:', Paul.friends);
})
}
]);
Upvotes: 0