Reputation: 188
lets's assume I have the following data:
[
{ name: "Clint", hairColor: "brown", shoeSize: 8, income: 20000 },
{ name: "Clint", hairColor: "blond", shoeSize: 9, income: 30000 },
{ name: "George", hairColor: "brown", shoeSize: 7, income: 30000 },
{ name: "George", hairColor: "blond", shoeSize: 8, income: 10000 },
{ name: "George", hairColor: "blond", shoeSize: 9, income: 20000 }
]
I want to have the following output:
[
{
name: "Clint",
counts: 2,
avgShoesize: 8.5,
shoeSizeByHairColor: [
{ _id: "brown", counts: 1, avgShoesize: 8 },
{ _id: "blond", counts: 1, avgShoesize: 9 },
],
incomeByHairColor: [
{ _id: "brown", counts: 1, avgIncome: 20000 },
{ _id: "blond", counts: 1, avgIncome: 30000 },
]
},
{
name: "George",
counts: 3,
avgShoesize: 8,
shoeSizeByHairColor: [
{ _id: "brown", counts: 1, avgShoesize: 8 },
{ _id: "blond", counts: 2, avgShoesize: 8.5 },
],
incomeByHairColor: [
{ _id: "brown", counts: 1, avgIncome: 30000 },
{ _id: "blond", counts: 2, avgIncome: 15000 },
],
}
]
Basically I want to group my dataset by some key and then I want to have multiple groups of the subset.
First I thought of applying a $group
with the key name
. and the to use $facet
in order to have various aggregations. I guess this will ot work since $facet
does not use the subset from the previous $group
. If I use $facet
first I would need to split the result in multiple documents.
Any ideas how to properly solve my problem?
Upvotes: 1
Views: 137
Reputation: 1086
Phase 1: You can group by name
and hairColor
and accumulate count
, avgShoeSize
, avgIncome
, hairColors
Phase 2: Push accumulated into an array of incomeByHairColor
, incomeByHairColor
using $map
operator.
Phase 3: Finally, in phase 3 you accumulate group by name and accumulate,
incomeByHairColor
, incomeByHairColor
and count
Pipeline:
db.users.aggregate([
{
$group :{
_id: {
name : "$name",
hairColor: "$hairColor"
},
count : {"$sum": 1},
avgShoeSize: {$avg: "$shoeSize"},
avgIncome : {$avg: "$income"},
hairColors : {$addToSet:"$hairColor" }
}
},
{
$project: {
_id:0,
name : "$_id.name",
hairColor: "$_id.hairColor",
count : "$count",
incomeByHairColor : {
$map: {
input: "$hairColors",
as: "key",
in: {
_id: "$$key",
counts: "$count",
avgIncome: "$avgIncome"
}
}
},
shoeSizeByHairColor:{
$map: {
input: "$hairColors",
as: "key",
in: {
_id: "$$key",
counts: "$count",
avgShoeSize: "$avgShoeSize"
}
}
}
}
},
{
$group: {
_id : "$name",
count : {$sum: "$count"},
incomeByHairColor: {$push : "$incomeByHairColor"},
shoeSizeByHairColor : {$push : "$shoeSizeByHairColor"}
}
}
]
)
Output:
/* 1 */
{
"_id" : "Clint",
"count" : 2,
"incomeByHairColor" : [
[
{
"_id" : "blond",
"counts" : 1,
"avgIncome" : 30000
}
],
[
{
"_id" : "brown",
"counts" : 1,
"avgIncome" : 20000
}
]
],
"shoeSizeByHairColor" : [
[
{
"_id" : "blond",
"counts" : 1,
"avgShoeSize" : 9
}
],
[
{
"_id" : "brown",
"counts" : 1,
"avgShoeSize" : 8
}
]
]
},
/* 2 */
{
"_id" : "George",
"count" : 3,
"incomeByHairColor" : [
[
{
"_id" : "blond",
"counts" : 2,
"avgIncome" : 15000
}
],
[
{
"_id" : "brown",
"counts" : 1,
"avgIncome" : 30000
}
]
],
"shoeSizeByHairColor" : [
[
{
"_id" : "blond",
"counts" : 2,
"avgShoeSize" : 8.5
}
],
[
{
"_id" : "brown",
"counts" : 1,
"avgShoeSize" : 7
}
]
]
}
Upvotes: 0
Reputation: 49945
You need double $group, first one should aggregate by name
and hairColor
. And the second one can build nested array:
db.collection.aggregate([
{
$group: {
_id: { name: "$name", hairColor: "$hairColor" },
count: { $sum: 1 },
sumShoeSize: { $sum: "$shoeSize" },
avgShoeSize: { $avg: "$shoeSize" },
avgIncome: { $avg: "$income" },
docs: { $push: "$$ROOT" }
}
},
{
$group: {
_id: "$_id.name",
count: { $sum: "$count" },
sumShoeSize: { $sum: "$sumShoeSize" },
shoeSizeByHairColor: {
$push: {
_id: "$_id.hairColor", counts: "$count", avgShoeSize: "$avgShoeSize"
}
},
incomeByHairColor: {
$push: {
_id: "$_id.hairColor", counts: "$count", avgIncome: "$avgIncome"
}
}
}
},
{
$project: {
_id: 1,
count: 1,
avgShoeSize: { $divide: [ "$sumShoeSize", "$count" ] },
shoeSizeByHairColor: 1,
incomeByHairColor: 1
}
}
])
Upvotes: 3