Horseman
Horseman

Reputation: 17

how to aggregate in mongoDB

I have a document called user.monthly, in that I have we used store 'day' : no. of clicks . Here I have given 2 samples for different date

For month January

{
    name : "devid",
    date : ISODate("2014-01-21T11:32:42.392Z"),
    daily: {'1':12,'9':13,'30':13}
} 

For month February

{
    name : "devid",
    date : ISODate("2014-02-21T11:32:42.392Z"),
    daily: {'3':12,'12':13,'25':13}
}

How can I aggregate this and get total clicks for January and February ? Please help me to resolve my problem.

Upvotes: 0

Views: 113

Answers (1)

Neil Lunn
Neil Lunn

Reputation: 151190

Your current schema is not helping you here as the "daily" field ( which we presume is your clicks per type or something like that ) is represented as a sub-document, which means that you need to explicitly name the path to each field in order to do something with it.

A better approach would be to put this information in an array:

{
    "name" : "devid",
    "date" : ISODate("2014-02-21T11:32:42.392Z"),
    "daily": [
        { "type": "3",  "clicks": 12 },
        { "type": "12", "clicks": 13 },
        { "type": "25", "clicks": 13 }
    ]
}

Then you have an aggregation statement that goes like this:

db.collection.aggregate([

    // Just match the dates in January and February
    { "$match": {
        "date": {
            "$gte": new Date("2014-01-01"), "$lt": new Date("2014-03-01")
        }
    }},

    // Unwind the "daily" array
    { "$unwind": "$daily" },

    // Group the values together by "type" on "January" and "February"
    { "$group": {
        "_id": {
            "year": { "$year": "$date" },
            "month": { "$month": "$date" },
            "type": "$daily.type"
        },
        "clicks": { "$sum": "$daily.clicks" }
    }},

    // Sort the result nicely
    { "$sort": { 
        "_id.year": 1,
        "_id.month": 1,
        "_id.type": 1
    }}
])

That form is pretty simple. Or even if you do not care about the type as a grouping and just want the month totals:

db.collection.aggregate([
    { "$match": {
        "date": {
            "$gte": new Date("2014-01-01"), "$lt": new Date("2014-03-01")
        }
    }},
    { "$unwind": "$daily" },
    { "$group": {
        "_id": {
            "year": { "$year": "$date" },
            "month": { "$month": "$date" },
        },
        "clicks": { "$sum": "$daily.clicks" }
    }},
    { "$sort": { "_id.year": 1, "_id.month": 1 }}

])

But with the current sub-document form you currently have this becomes ugly:

db.collection.aggregate([
    { "$match": {
        "date": {
            "$gte": new Date("2014-01-01"), "$lt": new Date("2014-03-01")
        }
    }},
    { "$group": {
        "_id": {
            "year": { "$year": "$date" },
            "month": { "$month": "$date" },
        },
        "clicks": { 
            "$sum": {
                "$add": [
                    { "$ifNull": ["$daily.1", 0] },
                    { "$ifNull": ["$daily.3", 0] },
                    { "$ifNull": ["$daily.9", 0] },
                    { "$ifNull": ["$daily.12", 0] },
                    { "$ifNull": ["$daily.25", 0] },
                    { "$ifNull": ["$daily.30", 0] },
                ]
            }
        }
    }}      
])

That shows that you have no other option here other than to specify what is essentially every possible field under daily ( so probably much larger ). Then we have to evaluate as that key may possibly not exist for a given document to return a default value.

For example, your first document has no key "daily.3" so without the $ifNull check the returned value would be null and invalidate the whole $sum process so that the total would be "0".

Grouping on those keys as in the first aggregate example gets even worse:

db.collection.aggregate([

    // Just match the dates in January and February
    { "$match": {
        "date": {
            "$gte": new Date("2014-01-01"), "$lt": new Date("2014-03-01")
        }
    }},

    // Project with an array to match all possible values
    { "$project": {
        "date": 1,
        "daily": 1,
        "type": { "$literal": ["1", "3", "9", "12", "25", "30" ] }
    }},

    // Unwind the "type" array
    { "$unwind": "$type" },

    // Project values onto the "type" while grouping
    { "$group" : {
         "_id": {
             "year": { "$year": "$date" },
             "month": { "$month": "$date" },
             "type": "$type"
         },
         "clicks": { "$sum": { "$cond": [
                     { "$eq": [ "$type", "1" ] },
                     "$daily.1",
                     { "$cond": [
                         { "$eq": [ "$type", "3" ] },
                         "$daily.3",
                         { "$cond": [
                             { "$eq": [ "$type", "9" ] },
                             "$daily.9",
                             { "$cond": [
                                 { "$eq": [ "$type", "12" ] },
                                 "$daily.12",
                                 { "$cond": [
                                     { "$eq": [ "$type", "25" ] },
                                     "$daily.25",
                                     "$daily.30"
                                 ]}
                             ]}
                         ]}
                     ]}
         ]}}
    }},
    { "$sort": { 
       "_id.year": 1,
       "_id.month": 1,
       "_id.type": 1
    }}
])

Which is creating one big conditional evaluation using $cond to match out the values to the "type" which we projected all possible values in an array using the $literal operator.

If you do not have MongoDB 2.6 or greater you can always do this in place of the $literal operator statement:

        "type": { "$cond": [1, ["1", "3", "9", "12", "25", "30" ], 0] }

Where essentially the true evaluation from $cond returns a "literal" declared value, which is how you specify an array. There is also the hidden $const operator that is not documented, but now exposed as $literal.

As you can see the structure here is doing you no favors, so the best option is to change it. But if you cannot and otherwise find the aggregation concept for this too hard to handle, then mapReduce offers an approach, but the processing will be much slower:

db.collection.mapReduce(
    function () {
        for ( var k in this.daily ) {
            emit(
                {
                    year: this.date.getFullYear(),
                    month: this.date.getMonth() + 1,
                    type: k
                },
                this.daily[k]
            );
        }
    },
    function(key,values) {
        return Array.sum( values );
    },
    { 
        "query": {
            "date": {
                "$gte": new Date("2014-01-01"), "$lt": new Date("2014-03-01")
            }
        },
        "out": { "inline": 1 } 
    }
)

The general lesson here is that you will get the cleanest and fastest results by altering the document format and using the aggregation framework. But all the ways to do this are listed here.

Upvotes: 1

Related Questions