Reputation: 900
I have collection like this in mongodb:
{
"_id" : ObjectId("56a5f47ed420cf0db5b70242"),
"tag" : "swift",
"values" : [
{
"word" : "osx",
"value" : 0.02
},
{
"word" : "compiler",
"value" : 0.01
}
]
},
{
"_id" : ObjectId("56a5f47ed420cf0db5b70243"),
"tag" : "c++",
"values" : [
{
"word" : "namespace",
"value" : 0.04
},
{
"word" : "compiler",
"value" : 0.05
}
]
}
I need to transform it in collection like this:
{
"_id" : ObjectId("56a5f4e5d420cf0db5b70247"),
"word" : "namespace",
"values" : [
{
"tag" : "c++",
"value" : 0.04
}
]
},
{
"_id" : ObjectId("56a5f4e5d420cf0db5b70248"),
"word" : "compiler",
"values" : [
{
"tag" : "swift",
"value" : 0.01
},
{
"tag" : "c++",
"value" : 0.05
}
]
},
{
"_id" : ObjectId("56a5f4e5d420cf0db5b70249"),
"word" : "osx",
"values" : [
{
"tag" : "swift",
"value" : 0.02
}
]
}
I'm new to working with a mongodb and MapReduce and have some questions:
{"word": word, "values": {"tag":tag, "value": value} }
in map stage and merge it into reduce state?Upvotes: 3
Views: 1349
Reputation: 61263
Using the .aggregate()
method.
You need to denormalize the "values" array using the $unwind
operator. The last stage in the pipeline is the $group
stage where you group your document by "values.word" and use the $push
accumulator operator to return and array of sub-documents for each group.
From there, you can insert your documents into a new collection using "bulk" operations.
var bulk = db.myCollection.initializeOrderedBulkOp();
var count = 0;
db.collection.aggregate( [
{ "$unwind": "$values" },
{ "$group": {
"_id": "$values.word",
"values": {
"$push": { "tag": "$tag", "value": "$values.value" }
}
} }
]).forEach(function(doc) {
bulk.insert( { "word": doc._id, "values": doc.values } );
count++;
if ( count % 1000 === 0 ) {
// Execute per 1000 operations and re-init
bulk.execute();
bulk = db.myCollection.initializeOrderedBulkOp();
}
});
// Clean up queues
if (count > 0 ) {
bulk.execute();
}
Upvotes: 4