Cédric Rémond
Cédric Rémond

Reputation: 1054

MongoDB: Reduce array of objects into a single object by computing the average of each field

I use the MongoDB aggregation API to aggregate some data daily. The result of this aggregation is of this format:

[
  {
    aggDate: '2019-05-23',
    results: [
      {
        foo: 0.58,
        bar: 0.42
      }, {
        foo: 0.32,
        bar: 0.98
      }
    ]
  }
]

The aggregation on the date is fine, but now I would like to aggregate the objects in the results array.

The result of this aggregation should be of the following format:

[
  {
    aggDate: '2019-05-23',
    result: {
      foo: 0.45 // avg of all the `foo`, here: (0.58 + 0.32) / 2
      bar: 0.7 // avg of all the `bar`, here: (0.42 + 0.98) / 2
    }
  }
]

My problem here is that the keys foo and bar can change/new fields can be added in results objects. To avoid recoding the query each time it occurs, I want to use some generic way to say to MongoDB

Take this array of objects and reduce it into a single object where each value is the average of the same field in all objects.

I know the $reduce operator exists in MongoDB but I can't figure out how to use it and I am not even sure if it can help me here.

Upvotes: 6

Views: 12800

Answers (3)

Ashh
Ashh

Reputation: 46481

You do not need to use $reduce. Simply $sum can do the job.

db.collection.aggregate([
  { "$project": {
    "result": {
      "foo": { "$divide": [{ "$sum": "$results.foo" }, { "$size": "$results" }] },
      "bar": { "$divide": [{ "$sum": "$results.bar" }, { "$size": "$results" }] }
    }
  }}
])

Update-> according to dynamic keys inside results array

db.collection.aggregate([
  { "$project": {
    "aggDate": 1,
    "results": {
      "$reduce": {
        "input": {
          "$map": { "input": "$results", "in": { "$objectToArray": "$$this" }}
        },
        "initialValue": [],
        "in": { "$concatArrays": ["$$value", "$$this"] }
      }
    }
  }},
  { "$project": {
    "aggDate": 1,
    "result": {
      "$arrayToObject": {
        "$map": { "input": { "$setUnion": ["$results.k"] },
          "as": "m",
          "in": {
            "$let": {
              "vars": {
                "fil": {
                  "$filter": {
                    "input": "$results",
                    "as": "d",
                    "cond": { "$eq": ["$$d.k", "$$m"] }
                  }
                }
              },
              "in": {
                "k": "$$m",
                "v": { "$divide": [{ "$sum": "$$fil.v" }, { "$size": "$$fil" }] }
              }
            }
          }
        }
      }
    }
  }}
])

MongoPlayground

With more simplified version and with single $project stage

db.collection.aggregate([
  { "$project": {
    "aggDate": 1,
    "result": {
      "$let": {
        "vars": {
          "red": {
            "$reduce": {
              "input": {
                "$map": { "input": "$results", "in": { "$objectToArray": "$$this" }}
              },
              "initialValue": [],
              "in": { "$concatArrays": ["$$value", "$$this"] }
            }
          }
        },
        "in": {
          "$arrayToObject": {
            "$map": { "input": { "$setUnion": ["$$red.k"] },
              "as": "m",
              "in": {
                "$let": {
                  "vars": {
                    "fil": {
                      "$filter": {
                        "input": "$$red",
                        "as": "d",
                        "cond": { "$eq": ["$$d.k", "$$m"] }
                      }
                    }
                  },
                  "in": {
                    "k": "$$m",
                    "v": { "$divide": [{ "$sum": "$$fil.v" }, { "$size": "$$fil" }] }
                  }
                }
              }
            }
          }
        }
      }
    }
  }}
])

MongoPlayground

And both output as

[
  {
    "_id": ObjectId("5a934e000102030405000000"),
    "aggDate": "2019-05-23",
    "result": {
      "bar": 0.7,
      "foho": 0.32,
      "foo": 0.58,
      "sdbar": 0.98
    }
  }
]

Upvotes: 8

Tom Slabbaert
Tom Slabbaert

Reputation: 22296

EDIT: not the most elegant but:

    { 
        "$unwind" : "$result"
    }, 
    { 
        "$addFields" : {
            "value" : {
                "$objectToArray" : "$result"
            }
        }
    }, 
    { 
        "$unwind" : "$value"
    }, 
    { 
        "$group" : {
            "_id" : null, 
            "length" : {
                "$sum" : 1.0
            }, 
            "total" : {
                "$sum" : "$value.v"
            }
        }
    }, 
    { 
        "$addFields" : {
            "avg" : {
                "$divide" : [
                    "$total", 
                    "$length"
                ]
            }
        }
    } 

Upvotes: 0

mickl
mickl

Reputation: 49975

You should run $unwind and aggregate the data using $group stages. You also need $arrayToObject and $objectToArray to work with dynamic keys. $reduce is not an option here since keys are unknwon

db.col.aggregate([
    {
        $project: {
            aggDate: 1,
            results: {
                $map: { input: "$results", in: { $objectToArray: "$$this" } }
            }
        }
    },
    {
        $unwind: "$results"
    },
    {
        $unwind: "$results"
    },
    {
        $group: {
            _id: { aggDate: "$aggDate", k: "$results.k" },
            sum: { $sum: "$results.v" },
            count: { $sum: 1 }
        }
    },
    {
        $project: {
            _id: 1,
            v: { $divide: [ "$sum", "$count" ] }
        }
    },
    {
        $group: {
            _id: "$_id.aggDate",
            results: { $push: { k: "$_id.k", v: "$v" } }
        }
    },
    {
        $project: {
            _id: 0,
            aggDate: "$_id",
            results: { $arrayToObject: "$results" }
        }
    }
])

Mongo Playground

Upvotes: 5

Related Questions