jones
jones

Reputation: 1453

MongoDB group by multiple fields in a way to not affect each other result

I have the following query, what I want is to have a combined group of custom group field names, with field value.

db.getCollection('mycollection').aggregate([
  {"$match":{
     "expireDate":{"$gte":"2018-02-06T00:00:00.000Z"},
     "publishDate":{"$lte":"2018-02-06T00:00:00.000Z"},
     "isPublished":true,"isDrafted":false,
     "deletedAt":{"$eq":null},"deleted":false
  }},
  {"$group":{
     "twentyFourHourAgo":{
        "$sum":{
           "$cond":[
             {"$gt":["$publishDate","2018-02-04T08:48:16.892Z"]},1,0
           ]
        }
      },
      "fortyEightHourAgo":{
        "$sum":{
            "$cond":[
               {"$gt":["$publishDate","2018-02-01T08:48:16.892Z"]},1,0
            ]
        }
      },
      "thirtyDaysAgo":{
         "$sum":{
            "$cond":[
               {"$gt":["$publishDate","2017-12-31T08:48:16.892Z"]},1,0
            ]
         }
      },
      "_id":{
        "position":{"$ifNull":["$position","Unknown"]},
        "workType":{"$ifNull":["$workType","Unknown"]},
        "functionalArea":{"$ifNull":["$functionalArea","Unknown"]},
        "minimumEducation":{"$ifNull":["$minimumEducation","Unknown"]},
        "gender":{"$ifNull":["$gender","Unknown"]},
        "contractType":{"$ifNull":["$contractType","Unknown"]},
        "locations":{"$ifNull":["$locations","Unknown"]},
        "requiredLanguages":{"$ifNull":["$requiredLanguages","Unknown"]},
        "company":{"$ifNull":["$company.name","Unknown"]}},"count":{"$sum":1}
     }
 },
 {"$group":{
     "_id":null,
     "twentyFourHourAgo":{
        "$sum":"twentyFourHourAgo"
     },
     "fortyEightHourAgo":{
        "$sum":"$fortyEightHourAgo"
     },
     "thirtyDaysAgo":{
        "$sum":"$thirtyDaysAgo"
     },
     "position":{"$addToSet":{"Name":"$_id.position","Count":"$count"}},
     "workType":{"$addToSet":{"Name":"$_id.workType","Count":"$count"}},
     "functionalArea":{
        "$addToSet":{"Name":"$_id.functionalArea","Count":"$count"}
     },
     "minimumEducation":{
        "$addToSet":{"Name":"$_id.minimumEducation","Count":"$count"}
     },
     "gender":{"$addToSet":{"Name":"$_id.gender","Count":"$count"}},"contractType":{"$addToSet":{"Name":"$_id.contractType","Count":"$count"}},"locations":{"$addToSet":{"Name":"$_id.locations","Count":"$count"}},"requiredLanguages":{"$addToSet":{"Name":"$_id.requiredLanguages","Count":"$count"}},"company":{"$addToSet":{"Name":"$_id.company","Count":"$count"}}}}]  
)

my document inside collection schema is like:

/* 1 */
{
    "_id" : ObjectId("59e4540bf14f1607b90ffb81"),
    "vacancyNumber" : "1",
    "position" : "Software Tester",
    "publishDate" : ISODate("2018-01-02T00:00:00.000Z"),
    "expireDate" : ISODate("2018-05-29T00:00:00.000Z"),
    "yearsOfExperience" : 40,
    "minimumEducation" : "Doctorate",
    "functionalArea" : "Education",
    "company" : {
        "id" : ObjectId("59e453fbf14f1607b90ffb80"),
        "name" : "First Company",
        "profile" : "profile",
        "logo" : {
            "container" : "companyFiles",
            "name" : "abbbff58cd3fda2c59ab2ee620ea5aa0",
            "mime" : ".png",
            "size" : 5806
        }
    },
    "durations" : {
        "years" : 3,
        "months" : 4
    },
    "probationPeriod" : {
        "duration" : 34,
        "unit" : "month"
    },
    "salary" : {
        "minSalary" : 1000,
        "maxSalary" : 2000,
        "currency" : "USD",
        "period" : "monthly",
        "isNegotiable" : true
    },
    "locations" : [ 
        "Germany", 
        "Itly", 
        "Iran"
    ],
    "canApplyOnline" : true,
    "skills" : [ 
        "Skill1", 
        "Skill2", 
        "Skill3", 
        "Skill4"
    ],
    "requiredLanguages" : [ 
        "Arabic", 
        "English", 
        "Russian", 
        "Dari", 
        "French"
    ],
    "keywords" : [ 
        "Key1", 
        "Key2"
    ],
    "deleted" : false,
    "deletedAt" : null,
    "isDrafted" : false,
    "isPublished" : true,
    "requiresTravel" : true,
    "gender" : "male",
    "nationalities" : [ 
        "afghan"
    ],
    "workType" : "Full Time",
    "contractType" : "Permanent",
}

/* 2 */
{
    "_id" : ObjectId("59f9402e05d04ebe5653d98f"),
    "vacancyNumber" : "1",
    "position" : "Software Engineer",
    "publishDate" : ISODate("2018-01-03T00:00:00.000Z"),
    "expireDate" : ISODate("2018-11-10T00:00:00.000Z"),
    "yearsOfExperience" : 40,
    "minimumEducation" : "Doctorate",
    "functionalArea" : "Education",
    "company" : {
        "id" : ObjectId("59e453fbf14f1607b90ffb80"),
        "name" : "First Company",
        "profile" : "profile",
        "logo" : {
            "container" : "logo container",
            "name" : "logo name",
            "mime" : "logo mime type",
            "size" : 1
        }
    },    
    "durations" : {
        "years" : 3,
        "months" : 4
    },
    "probationPeriod" : {
        "duration" : 34,
        "unit" : "month"
    },
    "salary" : {
        "minSalary" : 1000,
        "maxSalary" : 2000,
        "currency" : "USD",
        "period" : "monthly",
        "isNegotiable" : true
    },
    "locations" : [ 
        "Afghanistan", 
        "Itly", 
        "Iran"
    ],
    "skills" : [ 
        "Skill1", 
        "Another Skill"
    ],
    "requiredLanguages" : [ 
        "Arabic", 
        "English", 
        "Russian", 
        "Dari", 
        "French"
    ],
    "keywords" : [ 
        "Keyword", 
        "Key1"
    ],
    "deleted" : false,
    "deletedAt" : null,
    "isDrafted" : false,
    "isPublished" : true,
    "gender" : "male",
    "nationalities" : [ 
        "afghan", 
        "iranian"
    ],
    "workType" : "Full Time",
    "contractType" : "Short-Term",
}

/* 3 */
{
    "_id" : ObjectId("5a03235234f7504f13970abd"),
    "vacancyNumber" : "1",
    "position" : "Software Tester",
    "publishDate" : ISODate("2017-10-10T00:00:00.000Z"),
    "expireDate" : ISODate("2018-11-25T00:00:00.000Z"),
    "yearsOfExperience" : 40,
    "minimumEducation" : "Doctorate",
    "functionalArea" : "IT Software",
    "company" : {
        "id" : ObjectId("59e453fbf14f1607b90ffb80"),
        "name" : "My First Company",
        "profile" : "profile",
        "logo" : {
            "container" : "logo container",
            "name" : "logo name",
            "mime" : "logo mime type",
            "size" : 1
        }
    },
    "durations" : {
        "years" : 3,
        "months" : 4
    },
    "probationPeriod" : {
        "duration" : 34,
        "unit" : "month"
    },
    "salary" : {
        "minSalary" : 1000,
        "maxSalary" : 2000,
        "currency" : "USD",
        "period" : "monthly",
        "isNegotiable" : true
    },
    "locations" : [ 
        "Germany", 
        "Itly", 
        "Iran"
    ],
    "skills" : [ 
        "Skill1", 
        "Test Skill"
    ],
    "requiredLanguages" : [ 
        "Arabic", 
        "English", 
        "Russian", 
        "Dari", 
        "French"
    ],
    "keywords" : [ 
        "Test Key", 
        "Keyword"
    ],
    "deleted" : false,
    "deletedAt" : null,
    "isDrafted" : false,
    "isPublished" : true,
    "gender" : "female",
    "nationalities" : [ 
        "afghan"
    ],
    "workType" : "Part Time",
    "contractType" : "Permanent",
}

Now I want to count the group of data by my custom expression check 'twentyFourHourAgo, fortyEightHourAgo, thirtyDaysAgo', and also by the value of a field (functionalArea, position, locations, keywords, workType).

My current query result is

{
    "_id" : null,
    "twentyFourHourAgo" : 0,
    "fortyEightHourAgo" : 0.0,
    "thirtyDaysAgo" : 2.0,
    "position" : [ 
        {
            "Name" : "Software Engineer",
            "Count" : 1.0
        }, 
        {
            "Name" : "Software Tester",
            "Count" : 1.0
        }
    ],
    "workType" : [ 
        {
            "Name" : "Full Time",
            "Count" : 1.0
        }, 
        {
            "Name" : "Part Time",
            "Count" : 1.0
        }
    ],
    "functionalArea" : [ 
        {
            "Name" : "Education",
            "Count" : 1.0
        }, 
        {
            "Name" : "IT Software",
            "Count" : 1.0
        }
    ],
    "minimumEducation" : [ 
        {
            "Name" : "Doctorate",
            "Count" : 1.0
        }
    ],
    "gender" : [ 
        {
            "Name" : "male",
            "Count" : 1.0
        }, 
        {
            "Name" : "female",
            "Count" : 1.0
        }
    ],
    "contractType" : [ 
        {
            "Name" : "Short-Term",
            "Count" : 1.0
        }, 
        {
            "Name" : "Permanent",
            "Count" : 1.0
        }
    ],
    "locations" : [ 
        {
            "Name" : [ 
                "Afghanistan", 
                "Itly", 
                "Iran"
            ],
            "Count" : 1.0
        }, 
        {
            "Name" : [ 
                "Germany", 
                "Itly", 
                "Iran"
            ],
            "Count" : 1.0
        }
    ],
    "requiredLanguages" : [ 
        {
            "Name" : [ 
                "Arabic", 
                "English", 
                "Russian", 
                "Dari", 
                "French"
            ],
            "Count" : 1.0
        }
    ],
    "company" : [ 
        {
            "Name" : "First Company",
            "Count" : 1.0
        }, 
        {
            "Name" : "My First Company",
            "Count" : 1.0
        }
    ]
}

As you see, I have three document that has following properties:

  1. Two document that has the same position Software Tester, but query return 1 Software Tester (It means if I have multiple documents that have some common values in specific columns, their count result is wrong). The same problem exists for other fields 'contractType, workType, etc...'.
  2. In array-type fields such as locations, my first document has Germany, Italy, Iran values in locations array, my second document has Afghanistan, Italy, Iran, and my third document has Germany, Italy, Iran. But query result is like this:

    "locations" : [ 
      {
        "Name" : [ 
          "Afghanistan", 
          "Itly", 
          "Iran"
        ],
        "Count" : 1.0
      }, 
      {
        "Name" : [ 
          "Germany", 
          "Itly", 
          "Iran"
        ],
        "Count" : 1.0
      }
     ],
    

This should be like: Germany => 2, Italy,Iran => 3, and Afghanistan => 1 The same problem exists for other array type fields.

Upvotes: 0

Views: 357

Answers (1)

user9251303
user9251303

Reputation:

Apologies I misunderstood your question earlier. To be able to $unwind the location array, but NOT effect your twentyFourHourAgo etc you could look at using $first.

You'll need to $unwind any array if you wish count/sum the individual elements.

Example of how using $first.

db.getCollection('foo').aggregate([
{ $unwind : "$locations" },
{ "$group" : { "_id" : "$_id", 
        "twentyFourHourAgo":{ $first : {
            "$sum" : { "$cond":[
                {"$gt":["$publishDate", ISODate("2016-10-10T00:00:00.000Z")]},1,0 ] } } },
        "fortyEightHourAgo" : { $first : {
            "$sum" : { "$cond" : [ 
                { "$gt" : [ "$publishDate","2018-01-02T00:00:00.000Z"]},1,0 ] } } },
        "thirtyDaysAgo" : { $first : { 
            "$sum" : { "$cond" : [
                { "$gt" : [ "$publishDate","2017-12-31T08:48:16.892Z"]},1,0 ] } } },
        } },
{ "$group" : { "_id" : null,
     "twentyFourHourAgo" : { "$sum" : "$twentyFourHourAgo" },
     "fortyEightHourAgo" : { "$sum" : "$fortyEightHourAgo" },
     "thirtyDaysAgo" : { "$sum" : "$thirtyDaysAgo" },
 }}
])

Output:

"_id" : null,
"twentyFourHourAgo" : 0,
"fortyEightHourAgo" : 3.0,
"thirtyDaysAgo" : 3.0,

Please see here $first for further information on why I think it might be of use. I've stuck an $unwind at the beginning to help prove that it will solve the problem in your OP.

Upvotes: 0

Related Questions