Reputation: 13
I have an author collection that looks like this:
{ "_id" : ObjectId("332ddf"),
"authors" : "Mark Twain",
"publisher" : "NY",
"books" : [ "The Adventures of Tom Sawyer", "The Prince and the Pauper" ] }
{ "_id" : ObjectId("4ef342"),
"authors" : "F. Scott Fitzgerald",
"publisher" : "NY",
"books" : [ "The Adventures of Tom Sawyer",
"The Great Gatsby", "This Side of Paradise" ] }
I am trying to use the aggregation to create a new collection. For aggregation I tried:
db.author.aggregate([
{ "$unwind": "$books"},
{ $project:
{
book: "$books",
authors: ["$authors"]
}
}
])
but I get duplicate key error collection
for unwinding the books. How can I get the desired collection using aggregation ?
Upvotes: 1
Views: 80
Reputation: 61225
Actually you don't need the $project
stage. All you need to do is "denormalize" the "Books" array using the $unwind
operator then $group
your documents by "book" and use the $push
operator to return an array of "authors".
var cursor = db.getCollection('authors').aggregate([
{ "$unwind": "$books" },
{ "$group": {
"_id": "$books",
"authors": { "$push": "$authors" }
}}
])
The aggregation query yields something like this:
{ "_id" : "This Side of Paradise", "authors" : [ "F. Scott Fitzgerald" ] }
{ "_id" : "The Great Gatsby", "authors" : [ "F. Scott Fitzgerald" ] }
{ "_id" : "The Prince and the Pauper", "authors" : [ "Mark Twain" ] }
{
"_id" : "The Adventures of Tom Sawyer",
"authors" : [
"Mark Twain",
"F. Scott Fitzgerald"
]
}
BTW the _id
compound field in your expected result doesn't make much sense so I removed it but if you really feel you need it then simply replace "_id": "$books"
by "_id": { "book": "$books" }
in the group stage.
Now let see how we can insert into another collection. one way to do this as mention in the comment below is using the $out
operator which must be the last stage in the aggregation pipeline.
{ "$out": "newCollection" }
In case you need to process the result client-side before inserting the document into a new collection you should use "bulk" operation.
var requests = [];
var count = 0;
cursor.forEach(function(document) {
// Do something with the document and push a new operation to the stack
requests.push({ insertOne : document });
count++;
if(count % 1000 === 0) {
db.newCollection.bulkWrite(requests);
requests = [];
count = 0;
}
});
db.newCollection.bulkWrite(requests);
var bulk = db.newCollection.initializeUnorderedBulkOp();
var count = 0;
cursor.forEach(function(document) {
// Do something with the document and push a new operation to the stack
bulk.insert(document);
count++;
if (count % 1000 === 0) {
bulk.execute();
bulk = db.newCollection.initializeUnorderedBulkOp();
}
})
if (count > 0 ) {
bulk.execute();
}
Upvotes: 2