Stranger
Stranger

Reputation: 10611

How to have the values merged into a single array when using $addFields?

I'm a newbie to MongoDB and basically from RDBMS. I'm using $addFields to retrieve a single column from a join table as follows,

Movies collection structure:

{
    "_id" : ObjectId("5cd30088ccbfc800173dcd42"),
    "name" : "NGK",
    "sourceId" : "5cd3007cccbfc800173dcd41"
}

Events collection structure:

{
    "_id" : ObjectId("5cfa8b20ee38ea10717b3e72"),
    "movieId" : ObjectId("5cd720dda185d600170278fd"),
    "type" : "news",
    "description" : "Kathua rape, murder case verdict LIVE updates: Victim's family likely to challenge verdict giving life term to three main accused",
    "resources" :{
        "posterLinks" : [ 
            {
                "_v" : 1560167750,
                "image" : "xxxx/posters/orgomohvopr0ykxgmq2g"
            }, 
            {
                "_v" : 1560167752,
                "image" : "xxxx/posters/bnskv36qbdj9wnaomvre"
            }
        ],
        "youtubeLinks" : [ 
            "https://www.youtube.com/watch?v=ocnzgwjXdLw", 
            "https://www.youtube.com/watch?v=oLRHNYQ-Dno"
        ]
    },
    "createdDate" : ISODate("2019-06-07T16:04:48.980Z")
}, 
{
    "_id" : ObjectId("5cfa8d03e8604410c74a65ab"),
    "movieId" : ObjectId("5cd720dda185d600170278fd"),
    "sourceId" : ObjectId("5cfa0bb8f5b21607b245eff3"),
    "type" : "news",
    "description" : "Former Pakistani president Asif Ali Zardari arrested on corruption charges",
    "resources" : {
        "posterLinks" : [ 
            {
                "_v" : 1560167915,
                "image" : "xxxx/posters/ldu3iceqarnaaw1qeyhk"
            }, 
            {
                "_v" : 1560167917,
                "image" : "xxxx/posters/r1oi172lpxdylnqaify5"
            }
        ]
    },
    "createdDate" : ISODate("2019-06-07T16:12:51.927Z")
}

My Query:

db.getCollection('movies').aggregate([
   {
        $lookup: {
            from: "events",
            localField: "_id",
            foreignField: "movieId",
            as: "events"
        }
   },
   { 
       $addFields: { 
           "resources": "$events.resources"
        }
   }
]);

The result is as follows,

{
    "_id" : ObjectId("5cd720dda185d600170278fd"),
    "action" : [ 
        "5cf35c027807f0f65b5e9627"
    ],
    "events" : [ 
        {
            "_id" : ObjectId("5cfa8b20ee38ea10717b3e72"),
            "movieId" : ObjectId("5cd720dda185d600170278fd"),
            "sourceId" : ObjectId("5cfa0bb8f5b21607b245eff3"),
            "type" : "news",
            "description" : "Kathua rape, murder case verdict LIVE updates: Victim's family likely to challenge verdict giving life term to three main accused",
            "resources" :{
                "posterLinks" : [ 
                    {
                        "_v" : 1560167750,
                        "image" : "xxxx/posters/orgomohvopr0ykxgmq2g"
                    }, 
                    {
                        "_v" : 1560167752,
                        "image" : "xxxx/posters/bnskv36qbdj9wnaomvre"
                    }
                ],
                "youtubeLinks" : [ 
                    "https://www.youtube.com/watch?v=ocnzgwjXdLw", 
                    "https://www.youtube.com/watch?v=oLRHNYQ-Dno"
                ]
            },
            "createdDate" : ISODate("2019-06-07T16:04:48.980Z")
        }, 
        {
            "_id" : ObjectId("5cfa8d03e8604410c74a65ab"),
            "movieId" : ObjectId("5cd720dda185d600170278fd"),
            "sourceId" : ObjectId("5cfa0bb8f5b21607b245eff3"),
            "type" : "news",
            "description" : "Former Pakistani president Asif Ali Zardari arrested on corruption charges",
            "resources" : {
                "posterLinks" : [ 
                    {
                        "_v" : 1560167915,
                        "image" : "xxxx/posters/ldu3iceqarnaaw1qeyhk"
                    }, 
                    {
                        "_v" : 1560167917,
                        "image" : "xxxx/posters/r1oi172lpxdylnqaify5"
                    }
                ]
            },
            "createdDate" : ISODate("2019-06-07T16:12:51.927Z")
        }, 
    "resources" : [ 
        {
            "posterLinks" : [ 
                {
                    "_v" : 1560167750,
                    "image" : "xxxx/posters/orgomohvopr0ykxgmq2g"
                }, 
                {
                    "_v" : 1560167752,
                    "image" : "xxxx/posters/bnskv36qbdj9wnaomvre"
                }
            ],
            "youtubeLinks" : [ 
                "https://www.youtube.com/watch?v=ocnzgwjXdLw", 
                "https://www.youtube.com/watch?v=oLRHNYQ-Dno"
            ]
        }, 
        {
            "posterLinks" : [ 
                {
                    "_v" : 1560167915,
                    "image" : "xxxx/posters/ldu3iceqarnaaw1qeyhk"
                }, 
                {
                    "_v" : 1560167917,
                    "image" : "xxxx/posters/r1oi172lpxdylnqaify5"
                }
            ]
        }
    ]
}

I'm getting the expected result, but as you can see when there's more than events for a movie, there are multiple arrays in resources when a movie has more than one event.

I'm looking for a way to get single resources array for each movie with single posterLinks and youtubeLinks object, I'm aware that this can be achieved by using a loop after the values are retrieved. But is there a way to do this in the query itself?

Also I don't need the events array again in the result as the required resources are already available in the result. Any suggestions on this would greatly be appreciated.

Upvotes: 0

Views: 91

Answers (1)

Alex Blex
Alex Blex

Reputation: 37048

For arbitrary resources you can use sub-query syntax of lookup

Something like this:

db.getCollection('movies').aggregate([
    {
       $lookup:
         {
            from: "events",
            let: { movieId: "$_id" },
            pipeline: [ 
                { $match: { $expr: { $eq: ["$movieId", "$$movieId"] } } },
                { $project: { resources: { $objectToArray: "$resources" } } },
                { $unwind: "$resources" },
                { $unwind: "$resources.v" },
                { $group: { _id:"$resources.k", "v": { $addToSet:"$resources.v" } } },
                { $group: { _id:null, resources: { $push: { k:"$_id", v:"$v" } } } },
                { $project: { resources: { $arrayToObject: "$resources" } } },
                { $replaceRoot: { newRoot:"$resources" } }
            ],
            as: "events"
         }
    },
    { $project: { resources:0 } },
    { $unwind: "$events" }
]);

The pipeline within lookup groups resources by keys. It allows duplicate resources in the resulting array. If it is not intended the sub-query pipeline should be amended accordingly.

Bear in mind it's quite expensive query. If structure of resources is predefined and stable you can make the query more efficient by using array functions with hardcoded keys in the main pipeline instead.

Upvotes: 1

Related Questions