sid0972
sid0972

Reputation: 53

Calculate average using mapreduce in MongoDb

I have a collection of 10 million records which resembles this.

{
    "_id" : ObjectId("596dd10bbd1a6628ace1c14c"),
    "X" : 13212,
    "Z" : 173836,
    "userID" : 9354785
}

User ID is unique. I have to calculate the average of X and sum of Z. I can calculate the sum of Z using the following mapReduce function

var mapFunction1 = function() {
emit(this.userID, this.Z);
};

var reduceFunction1 = function() {
return Array.sum(Z);
};

db.transaction.mapReduce(
mapfunction1, 
reduceFunction1,
{out:"mapreduce"}
)

How do i calculate the average of X?

I tried Array.avg(Z) but it returns the same output as sum(Z).

Upvotes: 4

Views: 3949

Answers (2)

Tejas Joshi
Tejas Joshi

Reputation: 100

You are not passing (key,value) pair parameter to reduceFunction1. Try this:

var mapFunction1 = function() {
emit(this.userID, this.Z);
};

var reduceFunction1 = function(varKey,varZ) {
return Array.avg(varZ);
};

db.transaction.mapReduce(
mapfunction1, 
reduceFunction1,
{out:"mapreduce"}
)

Upvotes: 1

Chris Nauroth
Chris Nauroth

Reputation: 9844

It looks like the requirements can be expressed more simply using the Aggregation Pipeline with the $avg and $sum operators.

Input

> db.transactions.find()
{ "_id" : ObjectId("5970e59e26507421fa20bee9"), "X" : 13212, "Z" : 173836, "userID" : 9354785 }
{ "_id" : ObjectId("5970e5a426507421fa20beea"), "X" : 1234, "Z" : 5678, "userID" : 1 }
{ "_id" : ObjectId("5970e5a826507421fa20beeb"), "X" : 100, "Z" : 200, "userID" : 2 }

Aggregation Pipeline

> db.transactions.aggregate([
    {
        $group : {
            _id: "aggregates",
            avgX: {
                $avg: "$X"
            },
            sumZ: {
                $sum: "$Z"
            }
        }
    }
])

Output

{ "_id" : "aggregates", "avgX" : 4848.666666666667, "sumZ" : 179714 }

Upvotes: 2

Related Questions