haroldcampbell
haroldcampbell

Reputation: 1542

Create nested aggregates in MongoDB

I'm trying to generate a hierarchical aggregation similar to the following:

 ┌ Product Category 1
 │ ├ Category Aggregate Info
 │ └ Products
 │   ├ Product 1
 │   │ ├ Aggregate Prices
 │   │ ├ Aggregate Quantities Sold
 │   │ └ etc.
 │   └ Product 2
 │     ├ Aggregate Prices
 │     ├ Aggregate Quantities Sold
 │     └ etc.
 │
 └ Product Category 2
   ├ Category Aggregate Info
   └ Products   
     ├ Product 4
     │ ├ Aggregate Prices
     │ ├ Aggregate Quantities Sold
     │ └ etc.
     └ Product 7
       ├ Aggregate Prices
       ├ Aggregate Quantities Sold
       └ etc.

I don't control the product categories (eg. Fruits vs Drinks) or the products (eg. Plums vs Water). This prevents me from hardcoding these values as they are defined by the users.

I am only able to control the hierarchy of the values using the order cellguid.

So far, I have been able to generate a list that has the structure shown below.

[
  {
    "record": [
      {
        "cellguid": "N118M2J4",
        "value": "Fruits"
      },
      {
        "cellguid": "V671H8W7",
        "value": "Mango"
      }
    ]
  },
  ... //truncated for brevity
]

My current challenge is to convert this to nested aggregates that has the following information:

{
    "records": [{
            "cellguid": "N118M2J4",
            "value": "Fruits",
            "count": 5,
            "records": [
                {
                    "cellguid": "V671H8W7",
                    "value": "Mango",
                    "count": 2
                },
                {
                    "cellguid": "V671H8W7",
                    "value": "Orange",
                    "count": 1
                },
                {
                    "cellguid": "V671H8W7",
                    "value": "Plum",
                    "count": 1
                },
                {
                    "cellguid": "V671H8W7",
                    "value": "Apple",
                    "count": 1
                }
            ]
        },
        {
            "cellguid": "N118M2J4",
            "value": "Drinks",
            "count": 9,
            "records": [
                {
                    "cellguid": "V671H8W7",
                    "value": "Coca Cola",
                    "count": 3
                },
                {
                    "cellguid": "V671H8W7",
                    "value": "Pepsi",
                    "count": 3
                },
                {
                    "cellguid": "V671H8W7",
                    "value": "Water",
                    "count": 3
                }
            ]
        }
    ]
}

I'm using MongoDB 4.4.0 without much luck to get the nested groups. Any help would be much appreciated.

The full list is here:

[
  {
    "record": [
      {
        "cellguid": "N118M2J4",
        "value": "Drinks"
      },
      {
        "cellguid": "V671H8W7",
        "value": "Coca Cola"
      }
    ]
  },
  {
    "record": [
      {
        "cellguid": "N118M2J4",
        "value": "Fruits"
      },
      {
        "cellguid": "V671H8W7",
        "value": "Mango"
      }
    ]
  },
  {
    "record": [
      {
        "cellguid": "N118M2J4",
        "value": "Drinks"
      },
      {
        "cellguid": "V671H8W7",
        "value": "Coca Cola"
      }
    ]
  },
  {
    "record": [
      {
        "cellguid": "N118M2J4",
        "value": "Drinks"
      },
      {
        "cellguid": "V671H8W7",
        "value": "Coca Cola"
      }
    ]
  },
  {
    "record": [
      {
        "cellguid": "N118M2J4",
        "value": "Fruits"
      },
      {
        "cellguid": "V671H8W7",
        "value": "Orange"
      }
    ]
  },
  {
    "record": [
      {
        "cellguid": "N118M2J4",
        "value": "Drinks"
      },
      {
        "cellguid": "V671H8W7",
        "value": "Pepsi"
      }
    ]
  },
  {
    "record": [
      {
        "cellguid": "N118M2J4",
        "value": "Drinks"
      },
      {
        "cellguid": "V671H8W7",
        "value": "Pepsi"
      }
    ]
  },
  {
    "record": [
      {
        "cellguid": "N118M2J4",
        "value": "Drinks"
      },
      {
        "cellguid": "V671H8W7",
        "value": "Pepsi"
      }
    ]
  },
  {
    "record": [
      {
        "cellguid": "N118M2J4",
        "value": "Fruits"
      },
      {
        "cellguid": "V671H8W7",
        "value": "Plum"
      }
    ]
  },
  {
    "record": [
      {
        "cellguid": "N118M2J4",
        "value": "Fruits"
      },
      {
        "cellguid": "V671H8W7",
        "value": "Apple"
      }
    ]
  },
  {
    "record": [
      {
        "cellguid": "N118M2J4",
        "value": "Drinks"
      },
      {
        "cellguid": "V671H8W7",
        "value": "Water"
      }
    ]
  },
  {
    "record": [
      {
        "cellguid": "N118M2J4",
        "value": "Drinks"
      },
      {
        "cellguid": "V671H8W7",
        "value": "Water"
      }
    ]
  },
  {
    "record": [
      {
        "cellguid": "N118M2J4",
        "value": "Drinks"
      },
      {
        "cellguid": "V671H8W7",
        "value": "Water"
      }
    ]
  },
  {
    "record": [
      {
        "cellguid": "N118M2J4",
        "value": "Fruits"
      },
      {
        "cellguid": "V671H8W7",
        "value": "Mango"
      }
    ]
  }
]

Upvotes: 1

Views: 54

Answers (2)

grodzi
grodzi

Reputation: 5703

An approach may be to group by categ/itemId, then by categId

const groupByItemCateg = {
  $group: {
    _id: '$record',
    categ: {
      $first: {
        $first: '$record'
      }
    },
    item: {
      $first: {
        $last: '$record'
      }
    },
    n: { $sum: 1 }
  }
}

const groupByCateg = {
  $group: {
    _id: '$categ',
    cellguid: {
      $first: '$categ.cellguid'
    },
    value: {
      $first: '$categ.value'
    },
    n: { $sum: '$n' },
    records: {
      $push: {
        cellguid: '$item.cellguid',
        value: '$item.value',
        n: '$n'
      }
    }
  }
}

const project = {
  $project: { _id: 0 }
}
printjson(db.products.aggregate([
  groupByItemCateg,
  groupByCateg,
  project
]).toArray())

playground


Actually nerded for the k-nested hierarchy generalization:

  1. We can consider the datum as {cellguid, value, records, n}.

Then we may forward the initial document (doc:$$ROOT) along the different stages to pick the corresponding datum at depth k.

  1. I am serializing the _id field as a string but anything is good as long as it is not an array

The field name _id is reserved for use as a primary key; its value must be unique in the collection, is immutable, and may be of any type other than an array.

(I actually had a go with _id: $slice: { ['$doc.record' 0, depth+1] } but this gives inconsistent results on local/mongoplayground even though 4.4.3 on both part)


db.products.remove({})
data=[{"record":[{"cellguid":"N118M2J4","value":"Drinks"},{"cellguid":"V671H8W7","value":"Coca Cola"},{"cellguid":"savour","value":"light"}]},{"record":[{"cellguid":"N118M2J4","value":"Fruits"},{"cellguid":"V671H8W7","value":"Mango"},{"cellguid":"color","value":"yellow"}]},{"record":[{"cellguid":"N118M2J4","value":"Drinks"},{"cellguid":"V671H8W7","value":"Coca Cola"},{"cellguid":"savour","value":"lemon"}]},{"record":[{"cellguid":"N118M2J4","value":"Drinks"},{"cellguid":"V671H8W7","value":"Coca Cola"},{"cellguid":"savour","value":"light"}]},{"record":[{"cellguid":"N118M2J4","value":"Fruits"},{"cellguid":"V671H8W7","value":"Orange"},{"cellguid":"color","value":"orange"}]},{"record":[{"cellguid":"N118M2J4","value":"Drinks"},{"cellguid":"V671H8W7","value":"Pepsi"},{"cellguid":"savour","value":"light"}]},{"record":[{"cellguid":"N118M2J4","value":"Drinks"},{"cellguid":"V671H8W7","value":"Pepsi"},{"cellguid":"savour","value":"MAX"}]},{"record":[{"cellguid":"N118M2J4","value":"Drinks"},{"cellguid":"V671H8W7","value":"Pepsi"},{"cellguid":"savour","value":"crystal"}]},{"record":[{"cellguid":"N118M2J4","value":"Fruits"},{"cellguid":"V671H8W7","value":"Plum"},{"cellguid":"color","value":"yellow"}]},{"record":[{"cellguid":"N118M2J4","value":"Fruits"},{"cellguid":"V671H8W7","value":"Apple"},{"cellguid":"color","value":"golden"}]},{"record":[{"cellguid":"N118M2J4","value":"Drinks"},{"cellguid":"V671H8W7","value":"Water"},{"cellguid":"savour","value":"beer"}]},{"record":[{"cellguid":"N118M2J4","value":"Drinks"},{"cellguid":"V671H8W7","value":"Water"},{"cellguid":"savour","value":"ale"}]},{"record":[{"cellguid":"N118M2J4","value":"Drinks"},{"cellguid":"V671H8W7","value":"Water"},{"cellguid":"savour","value":"ale"}]},{"record":[{"cellguid":"N118M2J4","value":"Fruits"},{"cellguid":"V671H8W7","value":"Mango"},{"cellguid":"color","value":"yellow"}]}]
db.products.insert(data)

const id = (depth, arrField='$doc.record') => ({
  $reduce: {
    input: { $slice: [arrField, 0, depth+1] },
    initialValue: '',
    in: {
      $concat: ['$$value', '$$this.cellguid', '$$this.value']
    }
  }
})
const initialize = depth => ({
  $group: {
    _id: id(depth, '$record'),
    datum: {
      $first: {
        $last: '$record'
      }
    },
    n: { $sum: 1 },
    doc: {
      $first: '$$ROOT'
    }
  }
})

const groupByItemCateg = depth => ({
  $group: {
    _id: id(depth),
    datum: {
      $first: {
        $arrayElemAt: ['$doc.record', depth]
      }
    },
    n: { $sum: '$n' },
    records: {
      $push: {
        cellguid: '$datum.cellguid',
        value: '$datum.value',
        records: '$records',
        n: '$n'
      }
    },

    doc: { $first: '$doc' }
  }
})

const project = {
  $project: {
    cellguid:'$datum.cellguid',
    value: '$datum.value',
    records: 1,
    n: 1
  }
}
const stages = [
  initialize(2),
  // here we are 3-level nested
  // max array idx is 2, so we have group by on the 0th idx and 1st idx
  // should we be 4-level nested, we would need to prepend/unshift groupByItemCateg(2)
  groupByItemCateg(1),
  groupByItemCateg(0), // the top level group by
  project
]
printjson(db.products.aggregate(stages).toArray())

playground for 3-depth

Upvotes: 2

Dheemanth Bhat
Dheemanth Bhat

Reputation: 4452

Try this query:

db.products.aggregate([
    {
        $addFields: {
            "product_category": { $arrayElemAt: ["$record", 0] }
        }
    },
    {
        $addFields: {
            "product": { $slice: ["$record", 1, { $size: "$record" }] }
        }
    },
    { $unwind: "$product" },
    {
        $group: {
            _id: "$product",
            count: { $sum: 1 },
            product_category: { $first: "$product_category" }
        }
    },
    {
        $group: {
            _id: "$product_category",
            count: { $sum: "$count" },
            records: {
                $push: {
                    "cellguid": "$_id.cellguid",
                    "value": "$_id.value",
                    "count": "$count"
                }
            }
        }
    },
    {
        $project: {
            "_id": 0,
            "cellguid": "$_id.cellguid",
            "value": "$_id.value",
            "count": "$count",
            "records": "$records"
        }
    }
]);

Output:

/* 1 */
{
    "_id" : {
        "cellguid" : "N118M2J4",
        "value" : "Fruits"
    },
    "count" : 5,
    "records" : [
        {
            "cellguid" : "V671H8W7",
            "value" : "Mango",
            "count" : 2
        },
        {
            "cellguid" : "V671H8W7",
            "value" : "Plum",
            "count" : 1
        },
        {
            "cellguid" : "V671H8W7",
            "value" : "Apple",
            "count" : 1
        },
        {
            "cellguid" : "V671H8W7",
            "value" : "Orange",
            "count" : 1
        }
    ]
},

/* 2 */
{
    "_id" : {
        "cellguid" : "N118M2J4",
        "value" : "Drinks"
    },
    "count" : 9,
    "records" : [
        {
            "cellguid" : "V671H8W7",
            "value" : "Pepsi",
            "count" : 3
        },
        {
            "cellguid" : "V671H8W7",
            "value" : "Water",
            "count" : 3
        },
        {
            "cellguid" : "V671H8W7",
            "value" : "Coca Cola",
            "count" : 3
        }
    ]
}

Upvotes: 0

Related Questions