Markus Müller
Markus Müller

Reputation: 188

Apply multistage grouping in MongoDb Aggregation Framework

lets's assume I have the following data:

[
    { name: "Clint", hairColor: "brown", shoeSize: 8, income: 20000 },
    { name: "Clint", hairColor: "blond", shoeSize: 9, income: 30000 },
    { name: "George", hairColor: "brown", shoeSize: 7, income: 30000 },
    { name: "George", hairColor: "blond", shoeSize: 8, income: 10000 },
    { name: "George", hairColor: "blond", shoeSize: 9, income: 20000 }
]

I want to have the following output:

[
    {
        name: "Clint",
        counts: 2,
        avgShoesize: 8.5,
        shoeSizeByHairColor: [
            { _id: "brown", counts: 1, avgShoesize: 8 },
            { _id: "blond", counts: 1, avgShoesize: 9 },
        ],
        incomeByHairColor: [
            { _id: "brown", counts: 1, avgIncome: 20000 },
            { _id: "blond", counts: 1, avgIncome: 30000 },
        ]
    },
    {
        name: "George",
        counts: 3,
        avgShoesize: 8,
        shoeSizeByHairColor: [
            { _id: "brown", counts: 1, avgShoesize: 8 },
            { _id: "blond", counts: 2, avgShoesize: 8.5 },
        ],
        incomeByHairColor: [
            { _id: "brown", counts: 1, avgIncome: 30000 },
            { _id: "blond", counts: 2, avgIncome: 15000 },
        ],
    }
]

Basically I want to group my dataset by some key and then I want to have multiple groups of the subset.

First I thought of applying a $group with the key name. and the to use $facet in order to have various aggregations. I guess this will ot work since $facet does not use the subset from the previous $group. If I use $facet first I would need to split the result in multiple documents.

Any ideas how to properly solve my problem?

Upvotes: 1

Views: 137

Answers (2)

Anurag Wagh
Anurag Wagh

Reputation: 1086

Phase 1: You can group by name and hairColor and accumulate count, avgShoeSize, avgIncome, hairColors

Phase 2: Push accumulated into an array of incomeByHairColor, incomeByHairColor using $map operator.

Phase 3: Finally, in phase 3 you accumulate group by name and accumulate, incomeByHairColor, incomeByHairColor and count

Pipeline:

db.users.aggregate([
    {
        $group :{
            _id: {
                name : "$name", 
                hairColor: "$hairColor"
            }, 
            count : {"$sum": 1},
            avgShoeSize: {$avg: "$shoeSize"},
            avgIncome : {$avg: "$income"},
            hairColors : {$addToSet:"$hairColor" }
        }
    }, 
    {
        $project: {
            _id:0,
            name : "$_id.name",
            hairColor: "$_id.hairColor",
            count : "$count",
            incomeByHairColor : {
                $map: {
                  input: "$hairColors",
                  as: "key",
                  in: { 
                      _id: "$$key", 
                      counts: "$count", 
                      avgIncome: "$avgIncome"
                  }
                }
                
            },
            shoeSizeByHairColor:{
                $map: {
                  input: "$hairColors",
                  as: "key",
                  in: { 
                      _id: "$$key", 
                      counts: "$count", 
                      avgShoeSize: "$avgShoeSize"
                  }
                }
                
            
            }
        }
    },
    {
        $group: {
            _id : "$name",
            count : {$sum:  "$count"},
            incomeByHairColor: {$push : "$incomeByHairColor"},
            shoeSizeByHairColor : {$push : "$shoeSizeByHairColor"}
        }
    }
            
]
)

Output:

/* 1 */
{
    "_id" : "Clint",
    "count" : 2,
    "incomeByHairColor" : [
        [
            {
                "_id" : "blond",
                "counts" : 1,
                "avgIncome" : 30000
            }
        ],
        [
            {
                "_id" : "brown",
                "counts" : 1,
                "avgIncome" : 20000
            }
        ]
    ],
    "shoeSizeByHairColor" : [
        [
            {
                "_id" : "blond",
                "counts" : 1,
                "avgShoeSize" : 9
            }
        ],
        [
            {
                "_id" : "brown",
                "counts" : 1,
                "avgShoeSize" : 8
            }
        ]
    ]
},

/* 2 */
{
    "_id" : "George",
    "count" : 3,
    "incomeByHairColor" : [
        [
            {
                "_id" : "blond",
                "counts" : 2,
                "avgIncome" : 15000
            }
        ],
        [
            {
                "_id" : "brown",
                "counts" : 1,
                "avgIncome" : 30000
            }
        ]
    ],
    "shoeSizeByHairColor" : [
        [
            {
                "_id" : "blond",
                "counts" : 2,
                "avgShoeSize" : 8.5
            }
        ],
        [
            {
                "_id" : "brown",
                "counts" : 1,
                "avgShoeSize" : 7
            }
        ]
    ]
}

Upvotes: 0

mickl
mickl

Reputation: 49945

You need double $group, first one should aggregate by name and hairColor. And the second one can build nested array:

db.collection.aggregate([
    {
        $group: {
            _id: { name: "$name", hairColor: "$hairColor" },
            count: { $sum: 1 },
            sumShoeSize: { $sum: "$shoeSize" },
            avgShoeSize: { $avg: "$shoeSize" },
            avgIncome: { $avg: "$income" },
            docs: { $push: "$$ROOT" }
        }
    },
    {
        $group: {
            _id: "$_id.name",
            count: { $sum: "$count" },
            sumShoeSize: { $sum: "$sumShoeSize" },
            shoeSizeByHairColor: {
                $push: {
                    _id: "$_id.hairColor", counts: "$count", avgShoeSize: "$avgShoeSize"
                }
            },
            incomeByHairColor: {
                $push: {
                    _id: "$_id.hairColor", counts: "$count", avgIncome: "$avgIncome"
                }
            }
        }
    },
    {
        $project: {
            _id: 1,
            count: 1,
            avgShoeSize: { $divide: [ "$sumShoeSize", "$count" ] },
            shoeSizeByHairColor: 1,
            incomeByHairColor: 1
        }
    }
])

Mongo Playground

Upvotes: 3

Related Questions