Reputation: 10611
I'm a newbie to MongoDB and basically from RDBMS. I'm using $addFields to retrieve a single column from a join table as follows,
Movies
collection structure:
{
"_id" : ObjectId("5cd30088ccbfc800173dcd42"),
"name" : "NGK",
"sourceId" : "5cd3007cccbfc800173dcd41"
}
Events
collection structure:
{
"_id" : ObjectId("5cfa8b20ee38ea10717b3e72"),
"movieId" : ObjectId("5cd720dda185d600170278fd"),
"type" : "news",
"description" : "Kathua rape, murder case verdict LIVE updates: Victim's family likely to challenge verdict giving life term to three main accused",
"resources" :{
"posterLinks" : [
{
"_v" : 1560167750,
"image" : "xxxx/posters/orgomohvopr0ykxgmq2g"
},
{
"_v" : 1560167752,
"image" : "xxxx/posters/bnskv36qbdj9wnaomvre"
}
],
"youtubeLinks" : [
"https://www.youtube.com/watch?v=ocnzgwjXdLw",
"https://www.youtube.com/watch?v=oLRHNYQ-Dno"
]
},
"createdDate" : ISODate("2019-06-07T16:04:48.980Z")
},
{
"_id" : ObjectId("5cfa8d03e8604410c74a65ab"),
"movieId" : ObjectId("5cd720dda185d600170278fd"),
"sourceId" : ObjectId("5cfa0bb8f5b21607b245eff3"),
"type" : "news",
"description" : "Former Pakistani president Asif Ali Zardari arrested on corruption charges",
"resources" : {
"posterLinks" : [
{
"_v" : 1560167915,
"image" : "xxxx/posters/ldu3iceqarnaaw1qeyhk"
},
{
"_v" : 1560167917,
"image" : "xxxx/posters/r1oi172lpxdylnqaify5"
}
]
},
"createdDate" : ISODate("2019-06-07T16:12:51.927Z")
}
My Query:
db.getCollection('movies').aggregate([
{
$lookup: {
from: "events",
localField: "_id",
foreignField: "movieId",
as: "events"
}
},
{
$addFields: {
"resources": "$events.resources"
}
}
]);
The result is as follows,
{
"_id" : ObjectId("5cd720dda185d600170278fd"),
"action" : [
"5cf35c027807f0f65b5e9627"
],
"events" : [
{
"_id" : ObjectId("5cfa8b20ee38ea10717b3e72"),
"movieId" : ObjectId("5cd720dda185d600170278fd"),
"sourceId" : ObjectId("5cfa0bb8f5b21607b245eff3"),
"type" : "news",
"description" : "Kathua rape, murder case verdict LIVE updates: Victim's family likely to challenge verdict giving life term to three main accused",
"resources" :{
"posterLinks" : [
{
"_v" : 1560167750,
"image" : "xxxx/posters/orgomohvopr0ykxgmq2g"
},
{
"_v" : 1560167752,
"image" : "xxxx/posters/bnskv36qbdj9wnaomvre"
}
],
"youtubeLinks" : [
"https://www.youtube.com/watch?v=ocnzgwjXdLw",
"https://www.youtube.com/watch?v=oLRHNYQ-Dno"
]
},
"createdDate" : ISODate("2019-06-07T16:04:48.980Z")
},
{
"_id" : ObjectId("5cfa8d03e8604410c74a65ab"),
"movieId" : ObjectId("5cd720dda185d600170278fd"),
"sourceId" : ObjectId("5cfa0bb8f5b21607b245eff3"),
"type" : "news",
"description" : "Former Pakistani president Asif Ali Zardari arrested on corruption charges",
"resources" : {
"posterLinks" : [
{
"_v" : 1560167915,
"image" : "xxxx/posters/ldu3iceqarnaaw1qeyhk"
},
{
"_v" : 1560167917,
"image" : "xxxx/posters/r1oi172lpxdylnqaify5"
}
]
},
"createdDate" : ISODate("2019-06-07T16:12:51.927Z")
},
"resources" : [
{
"posterLinks" : [
{
"_v" : 1560167750,
"image" : "xxxx/posters/orgomohvopr0ykxgmq2g"
},
{
"_v" : 1560167752,
"image" : "xxxx/posters/bnskv36qbdj9wnaomvre"
}
],
"youtubeLinks" : [
"https://www.youtube.com/watch?v=ocnzgwjXdLw",
"https://www.youtube.com/watch?v=oLRHNYQ-Dno"
]
},
{
"posterLinks" : [
{
"_v" : 1560167915,
"image" : "xxxx/posters/ldu3iceqarnaaw1qeyhk"
},
{
"_v" : 1560167917,
"image" : "xxxx/posters/r1oi172lpxdylnqaify5"
}
]
}
]
}
I'm getting the expected result, but as you can see when there's more than events for a movie, there are multiple arrays in resources when a movie has more than one event.
I'm looking for a way to get single resources array for each movie with single posterLinks
and youtubeLinks
object, I'm aware that this can be achieved by using a loop after the values are retrieved. But is there a way to do this in the query itself?
Also I don't need the events array again in the result as the required resources are already available in the result. Any suggestions on this would greatly be appreciated.
Upvotes: 0
Views: 91
Reputation: 37048
For arbitrary resources you can use sub-query syntax of lookup
Something like this:
db.getCollection('movies').aggregate([
{
$lookup:
{
from: "events",
let: { movieId: "$_id" },
pipeline: [
{ $match: { $expr: { $eq: ["$movieId", "$$movieId"] } } },
{ $project: { resources: { $objectToArray: "$resources" } } },
{ $unwind: "$resources" },
{ $unwind: "$resources.v" },
{ $group: { _id:"$resources.k", "v": { $addToSet:"$resources.v" } } },
{ $group: { _id:null, resources: { $push: { k:"$_id", v:"$v" } } } },
{ $project: { resources: { $arrayToObject: "$resources" } } },
{ $replaceRoot: { newRoot:"$resources" } }
],
as: "events"
}
},
{ $project: { resources:0 } },
{ $unwind: "$events" }
]);
The pipeline within lookup groups resources by keys. It allows duplicate resources in the resulting array. If it is not intended the sub-query pipeline should be amended accordingly.
Bear in mind it's quite expensive query. If structure of resources is predefined and stable you can make the query more efficient by using array functions with hardcoded keys in the main pipeline instead.
Upvotes: 1