Vishwas
Vishwas

Reputation: 7067

How to group data using mongodb?

I have following data with interval of 15 minutes.

[{
    "_id" : ObjectId("5500a5e6f37a84d0509526ba"),
    "runtimeMilliSeconds" : NumberLong("1426105802063"),
    "cpuMemoryStats" : {
        "currentCpuUtilization" : 71.72000122070312,
        "currentMemoryUtilization" : 77.4000015258789
    }
}
{
    "_id" : ObjectId("5500a96af37a84d0509526f8"),
    "runtimeMilliSeconds" : NumberLong("1426106701622"),
    "cpuMemoryStats" : {
        "currentCpuUtilization" : 70.30000305175781,
        "currentMemoryUtilization" : 77.4000015258789
    }
}
{
    "_id" : ObjectId("5500aceef37a84d050952739"),
    "runtimeMilliSeconds" : NumberLong("1426107601441"),
    "cpuMemoryStats" : {
        "currentCpuUtilization" : 73.2300033569336,
        "currentMemoryUtilization" : 77.4000015258789
    }
}
{
    "_id" : ObjectId("5500b07ff37a84d050952776"),
    "runtimeMilliSeconds" : NumberLong("1426108501342"),
    "cpuMemoryStats" : {
        "currentCpuUtilization" : 60.61000061035156,
        "currentMemoryUtilization" : 77.4000015258789
    }
}


{
    "_id" : ObjectId("5500b404f37a84d0509527b7"),
    "runtimeMilliSeconds" : NumberLong("1426109402199"),
    "cpuMemoryStats" : {
        "currentCpuUtilization" : 60.060001373291016,
        "currentMemoryUtilization" : 77.41000366210938
    }
}
{
    "_id" : ObjectId("5500b788f25a6f9765950f65"),
    "runtimeMilliSeconds" : NumberLong("1426110301345"),
    "cpuMemoryStats" : {
        "currentCpuUtilization" : 58.689998626708984,
        "currentMemoryUtilization" : 77.41000366210938
    }
}
{
    "_id" : ObjectId("5500bb0cf37a84d050952837"),
    "runtimeMilliSeconds" : NumberLong("1426111202063"),
    "cpuMemoryStats" : {
        "currentCpuUtilization" : 70.69999694824219,
        "currentMemoryUtilization" : 77.41000366210938
    }
}
{
    "_id" : ObjectId("5500be83f25a6f9765950fde"),
    "runtimeMilliSeconds" : NumberLong("1426112101980"),
    "cpuMemoryStats" : {
        "currentCpuUtilization" : 69.41000366210938,
        "currentMemoryUtilization" : 77.44000244140625
    }
}

{
    "_id" : ObjectId("5500c206f37a84d0509528ac"),
    "runtimeMilliSeconds" : NumberLong("1426113001781"),
    "cpuMemoryStats" : {
        "currentCpuUtilization" : 70.63999938964844,
        "currentMemoryUtilization" : 77.44000244140625
    }
}
{
    "_id" : ObjectId("5500c58cf37a84d0509528ea"),
    "runtimeMilliSeconds" : NumberLong("1426113901510"),
    "cpuMemoryStats" : {
        "currentCpuUtilization" : 68.38999938964844,
        "currentMemoryUtilization" : 77.44000244140625
    }
}
{
    "_id" : ObjectId("5500c911f25a6f97659510a0"),
    "runtimeMilliSeconds" : NumberLong("1426114801403"),
    "cpuMemoryStats" : {
        "currentCpuUtilization" : 77.7300033569336,
        "currentMemoryUtilization" : 77.44999694824219
    }
}
{
    "_id" : ObjectId("5500cca0f37a84d050952968"),
    "runtimeMilliSeconds" : NumberLong("1426115702206"),
    "cpuMemoryStats" : {
        "currentCpuUtilization" : 74.23999786376953,
        "currentMemoryUtilization" : 77.4800033569336
    }
}]

I want to group this data on hourly interval. This means I want to group 4 documents of every hour into single document such that values in 'cpuMemoryStats' keys will be average of all four. Also runtimeMilliSeconds will be average of 4 documents.

i.e. I want it like first to fourth, fifth to eighth doucment. I want four documents from above 12 documents with average of keys.

Sample output is :

[{
    "_id" : ObjectId("5500a5e6f37a84d0509526ba"),
    "runtimeMilliSeconds" : 1426107152000,
    "cpuMemoryStats" : {
        "currentCpuUtilization" : 68.96500206,
        "currentMemoryUtilization" : 77.400001526
    }
}
.
.
..
]

I tried out following:

db.collection.aggregate({"$match": { "hostId" : "1.1.1.1" , "customerId"   : "customerId" ,
"runtimeMilliSeconds" : { "$gte" : 1426104902206}}},

{"$group" : {"_id" : { "$subtract" :[ {"$divide" : ["$runtimeMilliSeconds", 3600 ]},

{ "$mod" : [{"$divide" : ["$runtimeMilliSeconds", 3600 ]},1] } ] },

"memoryUtilization":{"$avg":"$cpuMemoryStats.currentMemoryUtilization"},
  "runtime":{"$avg":"$runtimeMilliSeconds"}}})

How to group the data on hourly basis using mongo???

Upvotes: 2

Views: 118

Answers (2)

Vishwas
Vishwas

Reputation: 7067

I was pretty close to answer. I corrected my logic(math). Here is correct query-

db.collection.aggregate({
    "$match": {
    "hostId": "1.1.1.1",
    "customerId": "customerId",
    "runtimeMilliSeconds": {
        "$gte": 1426104902206
    }
    }
},
{
    "$group": {
    "_id": {
        "$subtract": [
            {
                "$divide": [
                    "$runtimeMilliSeconds",
                    3600*1000
                ]
            },
            {
                "$mod": [
                    {
                        "$divide": [
                            "$runtimeMilliSeconds",
                            3600*1000
                        ]
                    },
                    1
                ]
            }
        ]
    },
    "memoryUtilization": {
        "$avg": "$cpuMemoryStats.currentMemoryUtilization"
    },
    "runtime": {
        "$last": "$runtimeMilliSeconds"
    }
    }
},
{
    $sort: {
    runtime: 1
    }
})

This query will group all data hourly like 8.00 to 9.00, 9.00 to 10.00 etc

Upvotes: 0

Neil Lunn
Neil Lunn

Reputation: 151072

Date math seems to be the clear case on your storage format:

 db.collection.aggregate([
     { "$match": { 
         "hostId" : "1.1.1.1" , 
         "customerId" : "customerId" ,
         "runtimeMilliSeconds" : { "$gte" : 1426104902206 },
     }},
     { "$group" : {
         "_id" : { 
             "$subtract": [
                  "$runtimemilliSeconds",
                  { "$mod": [
                      "$runtimemilliSeconds",
                      1000 * 60 * 15 // 1000 ms x 60 sec * 15 mins     
                  ]}
             ]
         },
         "memoryUtilization": { "$avg": "$cpuMemoryStats.currentMemoryUtilization" },
         "runtime":{ "$avg": "$runtimeMilliSeconds" }
     }}
])

So for the record, apart from the general structure what you were looking for is a correct "constant" of 900000 as shown, being:

 1000 milliseconds
 x 60 seconds
 x 15 minutes

In order to actually come out to an hour intervals you just change the numbers

 1000 milliseconds
 x 60 seconds
 x 60 minutes

Which is one hour. All intervals are just done like that. But it's a modulo and not a division.

Upvotes: 4

Related Questions