Reputation: 1542
I'm trying to generate a hierarchical aggregation similar to the following:
┌ Product Category 1
│ ├ Category Aggregate Info
│ └ Products
│ ├ Product 1
│ │ ├ Aggregate Prices
│ │ ├ Aggregate Quantities Sold
│ │ └ etc.
│ └ Product 2
│ ├ Aggregate Prices
│ ├ Aggregate Quantities Sold
│ └ etc.
│
└ Product Category 2
├ Category Aggregate Info
└ Products
├ Product 4
│ ├ Aggregate Prices
│ ├ Aggregate Quantities Sold
│ └ etc.
└ Product 7
├ Aggregate Prices
├ Aggregate Quantities Sold
└ etc.
I don't control the product categories (eg. Fruits vs Drinks) or the products (eg. Plums vs Water). This prevents me from hardcoding these values as they are defined by the users.
I am only able to control the hierarchy of the values using the order cellguid
.
So far, I have been able to generate a list that has the structure shown below.
[
{
"record": [
{
"cellguid": "N118M2J4",
"value": "Fruits"
},
{
"cellguid": "V671H8W7",
"value": "Mango"
}
]
},
... //truncated for brevity
]
My current challenge is to convert this to nested aggregates that has the following information:
{
"records": [{
"cellguid": "N118M2J4",
"value": "Fruits",
"count": 5,
"records": [
{
"cellguid": "V671H8W7",
"value": "Mango",
"count": 2
},
{
"cellguid": "V671H8W7",
"value": "Orange",
"count": 1
},
{
"cellguid": "V671H8W7",
"value": "Plum",
"count": 1
},
{
"cellguid": "V671H8W7",
"value": "Apple",
"count": 1
}
]
},
{
"cellguid": "N118M2J4",
"value": "Drinks",
"count": 9,
"records": [
{
"cellguid": "V671H8W7",
"value": "Coca Cola",
"count": 3
},
{
"cellguid": "V671H8W7",
"value": "Pepsi",
"count": 3
},
{
"cellguid": "V671H8W7",
"value": "Water",
"count": 3
}
]
}
]
}
I'm using MongoDB 4.4.0 without much luck to get the nested groups. Any help would be much appreciated.
The full list is here:
[
{
"record": [
{
"cellguid": "N118M2J4",
"value": "Drinks"
},
{
"cellguid": "V671H8W7",
"value": "Coca Cola"
}
]
},
{
"record": [
{
"cellguid": "N118M2J4",
"value": "Fruits"
},
{
"cellguid": "V671H8W7",
"value": "Mango"
}
]
},
{
"record": [
{
"cellguid": "N118M2J4",
"value": "Drinks"
},
{
"cellguid": "V671H8W7",
"value": "Coca Cola"
}
]
},
{
"record": [
{
"cellguid": "N118M2J4",
"value": "Drinks"
},
{
"cellguid": "V671H8W7",
"value": "Coca Cola"
}
]
},
{
"record": [
{
"cellguid": "N118M2J4",
"value": "Fruits"
},
{
"cellguid": "V671H8W7",
"value": "Orange"
}
]
},
{
"record": [
{
"cellguid": "N118M2J4",
"value": "Drinks"
},
{
"cellguid": "V671H8W7",
"value": "Pepsi"
}
]
},
{
"record": [
{
"cellguid": "N118M2J4",
"value": "Drinks"
},
{
"cellguid": "V671H8W7",
"value": "Pepsi"
}
]
},
{
"record": [
{
"cellguid": "N118M2J4",
"value": "Drinks"
},
{
"cellguid": "V671H8W7",
"value": "Pepsi"
}
]
},
{
"record": [
{
"cellguid": "N118M2J4",
"value": "Fruits"
},
{
"cellguid": "V671H8W7",
"value": "Plum"
}
]
},
{
"record": [
{
"cellguid": "N118M2J4",
"value": "Fruits"
},
{
"cellguid": "V671H8W7",
"value": "Apple"
}
]
},
{
"record": [
{
"cellguid": "N118M2J4",
"value": "Drinks"
},
{
"cellguid": "V671H8W7",
"value": "Water"
}
]
},
{
"record": [
{
"cellguid": "N118M2J4",
"value": "Drinks"
},
{
"cellguid": "V671H8W7",
"value": "Water"
}
]
},
{
"record": [
{
"cellguid": "N118M2J4",
"value": "Drinks"
},
{
"cellguid": "V671H8W7",
"value": "Water"
}
]
},
{
"record": [
{
"cellguid": "N118M2J4",
"value": "Fruits"
},
{
"cellguid": "V671H8W7",
"value": "Mango"
}
]
}
]
Upvotes: 1
Views: 54
Reputation: 5703
An approach may be to group by categ/itemId, then by categId
const groupByItemCateg = {
$group: {
_id: '$record',
categ: {
$first: {
$first: '$record'
}
},
item: {
$first: {
$last: '$record'
}
},
n: { $sum: 1 }
}
}
const groupByCateg = {
$group: {
_id: '$categ',
cellguid: {
$first: '$categ.cellguid'
},
value: {
$first: '$categ.value'
},
n: { $sum: '$n' },
records: {
$push: {
cellguid: '$item.cellguid',
value: '$item.value',
n: '$n'
}
}
}
}
const project = {
$project: { _id: 0 }
}
printjson(db.products.aggregate([
groupByItemCateg,
groupByCateg,
project
]).toArray())
Actually nerded for the k-nested hierarchy generalization:
{cellguid, value, records, n}
.Then we may forward the initial document (doc:$$ROOT
) along the different stages to pick the corresponding datum at depth k
.
_id
field as a string but anything is good as long as it is not an arrayThe field name _id is reserved for use as a primary key; its value must be unique in the collection, is immutable, and may be of any type other than an array.
(I actually had a go with _id: $slice: { ['$doc.record' 0, depth+1] }
but this gives inconsistent results on local/mongoplayground even though 4.4.3 on both part)
db.products.remove({})
data=[{"record":[{"cellguid":"N118M2J4","value":"Drinks"},{"cellguid":"V671H8W7","value":"Coca Cola"},{"cellguid":"savour","value":"light"}]},{"record":[{"cellguid":"N118M2J4","value":"Fruits"},{"cellguid":"V671H8W7","value":"Mango"},{"cellguid":"color","value":"yellow"}]},{"record":[{"cellguid":"N118M2J4","value":"Drinks"},{"cellguid":"V671H8W7","value":"Coca Cola"},{"cellguid":"savour","value":"lemon"}]},{"record":[{"cellguid":"N118M2J4","value":"Drinks"},{"cellguid":"V671H8W7","value":"Coca Cola"},{"cellguid":"savour","value":"light"}]},{"record":[{"cellguid":"N118M2J4","value":"Fruits"},{"cellguid":"V671H8W7","value":"Orange"},{"cellguid":"color","value":"orange"}]},{"record":[{"cellguid":"N118M2J4","value":"Drinks"},{"cellguid":"V671H8W7","value":"Pepsi"},{"cellguid":"savour","value":"light"}]},{"record":[{"cellguid":"N118M2J4","value":"Drinks"},{"cellguid":"V671H8W7","value":"Pepsi"},{"cellguid":"savour","value":"MAX"}]},{"record":[{"cellguid":"N118M2J4","value":"Drinks"},{"cellguid":"V671H8W7","value":"Pepsi"},{"cellguid":"savour","value":"crystal"}]},{"record":[{"cellguid":"N118M2J4","value":"Fruits"},{"cellguid":"V671H8W7","value":"Plum"},{"cellguid":"color","value":"yellow"}]},{"record":[{"cellguid":"N118M2J4","value":"Fruits"},{"cellguid":"V671H8W7","value":"Apple"},{"cellguid":"color","value":"golden"}]},{"record":[{"cellguid":"N118M2J4","value":"Drinks"},{"cellguid":"V671H8W7","value":"Water"},{"cellguid":"savour","value":"beer"}]},{"record":[{"cellguid":"N118M2J4","value":"Drinks"},{"cellguid":"V671H8W7","value":"Water"},{"cellguid":"savour","value":"ale"}]},{"record":[{"cellguid":"N118M2J4","value":"Drinks"},{"cellguid":"V671H8W7","value":"Water"},{"cellguid":"savour","value":"ale"}]},{"record":[{"cellguid":"N118M2J4","value":"Fruits"},{"cellguid":"V671H8W7","value":"Mango"},{"cellguid":"color","value":"yellow"}]}]
db.products.insert(data)
const id = (depth, arrField='$doc.record') => ({
$reduce: {
input: { $slice: [arrField, 0, depth+1] },
initialValue: '',
in: {
$concat: ['$$value', '$$this.cellguid', '$$this.value']
}
}
})
const initialize = depth => ({
$group: {
_id: id(depth, '$record'),
datum: {
$first: {
$last: '$record'
}
},
n: { $sum: 1 },
doc: {
$first: '$$ROOT'
}
}
})
const groupByItemCateg = depth => ({
$group: {
_id: id(depth),
datum: {
$first: {
$arrayElemAt: ['$doc.record', depth]
}
},
n: { $sum: '$n' },
records: {
$push: {
cellguid: '$datum.cellguid',
value: '$datum.value',
records: '$records',
n: '$n'
}
},
doc: { $first: '$doc' }
}
})
const project = {
$project: {
cellguid:'$datum.cellguid',
value: '$datum.value',
records: 1,
n: 1
}
}
const stages = [
initialize(2),
// here we are 3-level nested
// max array idx is 2, so we have group by on the 0th idx and 1st idx
// should we be 4-level nested, we would need to prepend/unshift groupByItemCateg(2)
groupByItemCateg(1),
groupByItemCateg(0), // the top level group by
project
]
printjson(db.products.aggregate(stages).toArray())
Upvotes: 2
Reputation: 4452
Try this query:
db.products.aggregate([
{
$addFields: {
"product_category": { $arrayElemAt: ["$record", 0] }
}
},
{
$addFields: {
"product": { $slice: ["$record", 1, { $size: "$record" }] }
}
},
{ $unwind: "$product" },
{
$group: {
_id: "$product",
count: { $sum: 1 },
product_category: { $first: "$product_category" }
}
},
{
$group: {
_id: "$product_category",
count: { $sum: "$count" },
records: {
$push: {
"cellguid": "$_id.cellguid",
"value": "$_id.value",
"count": "$count"
}
}
}
},
{
$project: {
"_id": 0,
"cellguid": "$_id.cellguid",
"value": "$_id.value",
"count": "$count",
"records": "$records"
}
}
]);
Output:
/* 1 */
{
"_id" : {
"cellguid" : "N118M2J4",
"value" : "Fruits"
},
"count" : 5,
"records" : [
{
"cellguid" : "V671H8W7",
"value" : "Mango",
"count" : 2
},
{
"cellguid" : "V671H8W7",
"value" : "Plum",
"count" : 1
},
{
"cellguid" : "V671H8W7",
"value" : "Apple",
"count" : 1
},
{
"cellguid" : "V671H8W7",
"value" : "Orange",
"count" : 1
}
]
},
/* 2 */
{
"_id" : {
"cellguid" : "N118M2J4",
"value" : "Drinks"
},
"count" : 9,
"records" : [
{
"cellguid" : "V671H8W7",
"value" : "Pepsi",
"count" : 3
},
{
"cellguid" : "V671H8W7",
"value" : "Water",
"count" : 3
},
{
"cellguid" : "V671H8W7",
"value" : "Coca Cola",
"count" : 3
}
]
}
Upvotes: 0