Cameron
Cameron

Reputation: 607

MongoDb: Getting $avg in aggregate for complex data

I'm trying to get an average rating in my Mongo aggregate and am having trouble accessing the nested array. I've gotten my aggregation to give the following array. I'm trying to have city_reviews return an array of averages.

[ 
  {
    "_id": "Dallas",
    "city_reviews": [
           //arrays of restaurant objects that include the rating
           //I would like to get an average of the rating in each review, so these arrays will be numbers (averages)
           [ {
              "_id": "5b7ead6d106f0553d8807276",
              "created": "2018-08-23T12:41:29.791Z",
              "text": "Crackin good place. ",
              "rating": 4,
              "store": "5b7d67d5356114089909e58d",
              "author": "5b7d675e356114089909e58b",
              "__v": 0
              }, {review2}, {review3}]
           [{review1}, {review2}, {review3}],
           [{review1}. {review2}],
           [{review1}, {review2}, {review3}, {review4}],
           []
      ]

   },
  {
    "_id": "Houston",
    "city_reviews": [
           // arrays of restaurants 
           [{review1}, {review2}, {review3}],
           [{review1}, {review2}, {review3}],
           [{review1}, {review2}, {review3}, {review4}],
           [],
           []
      ]
  }

]

I would like to do an aggregation on this that returns an array of averages within the city_reviews, like this:

{
    "_id": "Dallas",
    "city_reviews": [
           // arrays of rating averages
           [4.7],
           [4.3],
           [3.4],
           [],
           []
      ]
  }

Here's what I've tried. It's giving me back averageRating of null, because $city_reviews is an array of object and I'm not telling it to go deep enough to capture the rating key.

return this.aggregate([
    { $lookup: { from: 'reviews', localField: '_id', foreignField: 'store', as: 
      'reviews' }},
    {$group: {_id: '$city', city_reviews: { $push : '$reviews'}}},
    { $project: {
       averageRating: { $avg: '$city_reviews'}
    }}
 ])

Is there a way to work with this line so I can return arrays of averages instead of the full review objects.

averageRating: { $avg: '$city_reviews'}

EDIT: Was asked for entire pipeline.

return this.aggregate([
    { $lookup: { from: 'reviews', localField: '_id', foreignField: 'store', as: 'reviews' }},
    {$group: {
        _id: '$city', 
        city_reviews: { $push : '$reviews'}}
    },
    { $project: {
        photo: '$$ROOT.photo',
        name: '$$ROOT.name',
        reviews: '$$ROOT.reviews',
        slug: '$$ROOT.slug',
        city: '$$ROOT.city',
        "averageRatingIndex":{
            "$map":{
            "input":"$city_reviews",
            "in":[{"$avg":"$$this.rating"}]
            }
        },
     }
    },
    { $sort: { averageRating: -1 }},
    { $limit: 5 }
])

My first query was to connect two models together:

{ $lookup: { from: 'reviews', localField: '_id', foreignField: 'store', as: 'reviews' }},

Which resulted in this:

[ {
    "_id": "5b7d67d5356114089909e58d",
    "location": {},
    "tags": [],
    "created": "2018-08-22T13:23:23.224Z",
    "name": "Lucia",
    "description": "Great name",
    "city": "Dallas",
    "photo": "ab64b3e7-6207-41d8-a670-94315e4b23af.jpeg",
    "author": "5b7d675e356114089909e58b",
    "slug": "lucia",
    "__v": 0,
    "reviews": []
  },
  {..more object like above}
]

Then, I grouped them like this:

{$group: {
     _id: '$city', 
     city_reviews: { $push : '$reviews'}}
 }

This returned what my original question is about. Essentially, I just want to have a total average rating for each city. My accepted answer does answer my original question. I'm getting back this:

{
  "_id": "Dallas",
  "averageRatingIndex": [
     [ 4.2 ],
     [ 3.6666666666666665 ],
     [ null ],
     [ 3.2 ],
     [ 5 ],
     [ null ]
   ]
}

I've tried to use the $avg operator on this to return one, final average that I can display for each city, but I'm having trouble.

Upvotes: 0

Views: 682

Answers (2)

dnickless
dnickless

Reputation: 10918

With respect to your optimization request, I don't think there's a lot of room for improvement beyond the version that you already have. However, the following pipeline might be faster than your current solution because of the initial $group stage which should result in way less $lookups. I am not sure how MongoDB will optimize all of that internally so you might want to profile the two versions against a real data set.

db.getCollection('something').aggregate([{
    $group: {
        _id: '$city', // group by city
        "averageRating": { $push: "$_id" } // create array of all encountered "_id"s per "city" bucket - we use the target field name to avoid creation of superfluous fields which would need to be removed from the output later on
    }
}, {
    $lookup: {
        from: 'reviews',
        let: { "averageRating": "$averageRating" }, // create a variable called "$$ids" which will hold the previously created array of "_id"s
        pipeline: [{
            $match: { $expr: { $in: [ "$store", "$$averageRating" ] } } // do the usual "joining"
        }, {
            $group: {
                "_id": null, // group all found items into the same single bucket
                "rating": { $avg: "$rating" }, // calculate the avg on a per "store" basis
            }
        }],
        as: 'averageRating' 
    }
}, {
    $sort: { "averageRating.rating": -1 }
}, {
    $limit: 5
}, { 
    $addFields: { // beautification of the output only, technically not needed - we do this as the last stage in order to only do it for the max. of 5 documents that we're interested in
        "averageRating": { // this is where we reuse the field we created in the first stage
            $arrayElemAt: [ "$averageRating.rating", 0 ] // pull the first element inside the array outside of the array
        }
    }
}])

In fact, the "initial $group stage" approach could also be used in conjunction with @Veerams solution like this:

db.collection.aggregate([{
    $group: {
        _id: '$city', // group by city
        "averageRating": { $push: "$_id" } // create array of all encountered "_id"s per "city" bucket - we use the target field name to avoid creation of superfluous fields which would need to be removed from the output later on
    }
}, {
    $lookup: {
        from: 'reviews',
        localField: 'averageRating',
        foreignField: 'store',
        as: 'averageRating'
    },
}, {
    $project: {
        "averageRating": {
            $avg: {
                $map: {
                    input: "$averageRating",
                    in: { $avg: "$$this.rating" }
                }
            }
        }
    }
}, {
    $sort: { averageRating: -1 }
}, {
    $limit: 5
}])

Upvotes: 1

s7vr
s7vr

Reputation: 75964

You can use $map to with $avg to output avg.

{"$project":{
  "averageRating":{
     "$map":{
      "input":"$city_reviews",
      "in":[{"$avg":"$$this.rating"}]
    }
  }
}}

Upvotes: 1

Related Questions