Reputation: 3395
I am stuck and confused with my current Aggregate expression and I was hoping on some input or a solution in Mongo itself.
The original data from Mongo (simplified to only the fields I need right now):
[{
'status': 'Cancelled',
'CIC Package': 'Test Gallery Cafe',
},
{
'status': 'Completed',
'CIC Package': 'Design Thinking workshop'
},
{
'status': 'Tentative',
'CIC Package': 'Design Thinking workshop'
},
{
'status': 'Confirmed',
'CIC Package': 'Product / solution demonstration'
},
....etc
]
In general...there are 1000s of records of probably 8 'CIC packages' with different statuses (Confirmed, Cancelled, Tentative, Completed) and other data that I have excluded for now.
The end result I am looking for is something like this:
[{
"_id" : "Test Gallery Café",
"package" : "Test Gallery Café",
"status" : [
{
"Cancelled": 1
},
{
"Completed": 1
}
]
},
{
"_id" : "Design Thinking workshop",
"package" : "Design Thinking workshop",
"status" : [
{
"Cancelled": 3
},
{
"Completed": 2
}
]
},
{
"_id" : "Product / solution demonstration",
"package" : "Product / solution demonstration",
"status" : [
{
"Completed": 10
},
{
"Cancelled": 3
},
{
"Confirmed": 1
}
]
}]
So per CIC package
which I renamed to package
in the $group
I want to have a count of each status that exists in the dataset. The statuses and packages are not under my control so in time new ones could be added. It needs to be a dynamic group.
I came as far as this:
db.reportData.aggregate([
{
$project:
{
'CIC package': 1,
'Status': 1
}
}
,
{
$group:
{
_id: '$CIC package',
package:
{
$first: '$CIC package'
}
,
status:
{
$push: '$Status'
}
}
}
]).toArray()
which resulted in something likes this:
[{
"_id" : "Test Gallery Café",
"package" : "Test Gallery Café",
"status" : [
"Cancelled",
"Completed"
]
},
{
"_id" : "Design Thinking workshop",
"package" : "Design Thinking workshop",
"status" : [
"Cancelled",
"Cancelled",
"Cancelled",
"Completed",
"Completed"
]
},
{
"_id" : "Product / solution demonstration",
"package" : "Product / solution demonstration",
"status" : [
"Completed",
"Completed",
"Cancelled",
"Processing",
"Cancelled",
"Cancelled",
"Completed",
"Completed",
"Completed",
"Completed",
"Completed",
"Completed",
"Completed",
"Completed",
"Completed",
"Tentative"
]
}]
This is a small extraction of a much larger set, but it a good sample of the result so far.
I have tried unwind
after the last group
which does create new records that I possibly could group
again, but I am getting a bit confused right now. And maybe I am doing it inefficiently.
I think I am almost there but I would love some input.
Any ideas?
Upvotes: 1
Views: 4190
Reputation: 3395
Okay,
I have come to sort a solution with the help of Blakes Seven answer. The following query seems to work and is based on the start dataset posted in my first question. The addition is adding 2 groups at the end to create my desired result.
db.reportData.aggregate([
{
$project: {
'CIC package': 1,
'Start Date': 1,
'Status': 1
}
},
{
$group: {
_id: '$CIC package',
package: {
$first: '$CIC package'
},
status: {
$push: '$Status'
}
}
},
{
$unwind: '$status'
},
{
$group:
{
_id:
{
"_id": "$_id",
"package": "$package",
"status": "$status"
},
package: {
$first: '$package'
},
status: {
$first: '$status'
},
count:{
$sum: 1
}
}
},
{
$group:
{
_id: "$_id._id",
package: {
$first: "$_id.package"
},
status:
{
$push:
{
"status" : "$_id.status",
"count": '$count'
}
}
}
}
]).toArray()
It results in a dataset like this:
[
{
"_id" : "Studio Canal",
"package" : "Studio Canal",
"status" : [
{
"status" : "Completed",
"count" : 8
},
{
"status" : "Cancelled",
"count" : 2
}
]
},
{
"_id" : "Meeting / forum",
"package" : "Meeting / forum",
"status" : [
{
"status" : "Cancelled",
"count" : 254
},
{
"status" : "Completed",
"count" : 275
},
{
"status" : "Processing",
"count" : 6
},
{
"status" : "Tentative",
"count" : 1
},
{
"status" : "Confirmed",
"count" : 6
}
]
},
{
"_id" : "Design Thinking workshop",
"package" : "Design Thinking workshop",
"status" : [
{
"status" : "Cancelled",
"count" : 2
}
]
},
{
"_id" : "Test Gallery Café",
"package" : "Test Gallery Café",
"status" : [
{
"status" : "Cancelled",
"count" : 1
},
{
"status" : "Completed",
"count" : 1
}
]
},
{
"_id" : "Not specified",
"package" : "Not specified",
"status" : [
{
"status" : "Completed",
"count" : 124
},
{
"status" : "Tentative",
"count" : 1
},
{
"status" : "Cancelled",
"count" : 42
},
{
"status" : "Confirmed",
"count" : 4
},
{
"status" : "Processing",
"count" : 5
}
]
},
{
"_id" : "Customer / partner / special event",
"package" : "Customer / partner / special event",
"status" : [
{
"status" : "Tentative",
"count" : 1
},
{
"status" : "Cancelled",
"count" : 145
},
{
"status" : "Processing",
"count" : 3
},
{
"status" : "Completed",
"count" : 284
},
{
"status" : "Confirmed",
"count" : 8
}
]
},
{
"_id" : "Product / solution demonstration",
"package" : "Product / solution demonstration",
"status" : [
{
"status" : "Tentative",
"count" : 1
},
{
"status" : "Confirmed",
"count" : 4
},
{
"status" : "Cancelled",
"count" : 82
},
{
"status" : "Completed",
"count" : 130
},
{
"status" : "Processing",
"count" : 1
}
]
}
]
which is what I am looking for. I have to check now if the data is correct but it looks like it. Only question now is can/should I optimise it. Maybe something for tomorrow.
Upvotes: 1
Reputation: 50406
You basically seem to want the "count of status" for each type, which is a matter of grouping first to count those and then just by the primary _id
of the object:
db.reportData.aggregate([
{ "$unwind": "$status" },
{ "$group": {
"_id": {
"_id": "$_id",
"package": "$package",
"status": "$status"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id._id",
"package": { "$first": "$_id.package" },
"status": {
"$push": {
"$cond": [
{ "$eq": [ "$_id.status", "Completed" ] },
{ "Completed": "$count" },
{ "$cond": [
{ "$eq": [ "$_id.status", "Cancelled" ] },
{ "Cancelled": "$count" },
{ "$cond": [
{ "$eq": [ "$_id.status", "Processing" ] },
{ "Processing": "$count" },
{ "Tentative": "$count" }
]}
]}
]
}
}
}}
])
Or just keep it generic with a "type" field for each status in the results:
db.reportData.aggregate([
{ "$unwind": "$status" },
{ "$group": {
"_id": {
"_id": "$_id",
"package": "$package",
"status": "$status"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id._id",
"package": { "$first": "$_id.package" },
"status": {
"$push": {
"type": "$_id.status",
"count": "$count"
}
}
}}
])
Which will give you results like this:
{
"_id" : "Test Gallery Café",
"package" : "Test Gallery Café",
"status" : [
{
"type" : "Completed",
"count" : 1
},
{
"type" : "Cancelled",
"count" : 1
}
]
}
{
"_id" : "Design Thinking workshop",
"package" : "Design Thinking workshop",
"status" : [
{
"type" : "Completed",
"count" : 2
},
{
"type" : "Cancelled",
"count" : 3
}
]
}
{
"_id" : "Not specified",
"package" : "Not specified",
"status" : [
{
"type" : "Processing",
"count" : 1
},
{
"type" : "Tentative",
"count" : 1
},
{
"type" : "Cancelled",
"count" : 3
},
{
"type" : "Completed",
"count" : 11
}
]
}
This gets a bit better in future releases of MongoDB with $filter
:
db.reportData.aggregate([
{ "$project": {
"package": 1,
"statusComplete": {
"$size": {
"$filter": {
"input": "$status",
"as": "el",
"cond": {
"$eq": [ "$$el", "Completed" ]
}
}
}
},
"statusCancelled": {
"$size": {
"$filter": {
"input": "$status",
"as": "el",
"cond": {
"$eq": [ "$$el", "Cancelled" ]
}
}
}
}
}}
])
As it is basically about "counting the matched elements in the arrays", and the last could be expanded upon to actually produce the same array result as the former, with a lot less overhead due to no $unwind
. But of course that is yet to be released, but it's just a sample of what you will be able to do in the future.
Also for the record, the data presented in the original post before alteration was this:
{
"_id" : "Test Gallery Café",
"package" : "Test Gallery Café",
"status" : [
"Cancelled",
"Completed"
]
}
{
"_id" : "Design Thinking workshop",
"package" : "Design Thinking workshop",
"status" : [
"Cancelled",
"Cancelled",
"Cancelled",
"Completed",
"Completed"
]
}
{
"_id" : "Not specified",
"package" : "Not specified",
"status" : [
"Completed",
"Completed",
"Cancelled",
"Processing",
"Cancelled",
"Cancelled",
"Completed",
"Completed",
"Completed",
"Completed",
"Completed",
"Completed",
"Completed",
"Completed",
"Completed",
"Tentative"
]
}
Upvotes: 3