Mouette
Mouette

Reputation: 299

MongoDB Reduce Key with single value

I want to count the number of orders by clients, and thus, on the last year, last month and last week. I wrote a MapReduce program:

var mapOrders = function() {
    var v_order = {
        order_date : this.dt_order
        ...
    }; 

     emit(this.clientid, v_order);
};

var reduceOrders = function(p_clientid, p_orders) {
    // Initialization of the output format of the couters
    var r_result = { orders_count : {
        total: {
            1year: 0,
            1month: 0,
            7day: 0
        }
        ...
    }}

    for (var c_order = 0; c_order < p_orders.length; c_order++) {
        // Increment counters
    }

    return (r_result);
};

db.orders.mapReduce(
    mapOrders,
    reduceOrders,
    { 
        out: { merge:  "tmp_orders_indicators" }
    }
)

In my output collection, I have 2 types of records

{
    "_id" : 80320,
    "value" : {
        "order_date" : ISODate("2015-10-30T11:09:51.000Z")
        ...
    }
}

{
    "_id" : 80306,
    "value" : {
        "orders_count" : {
            "total" : {
                "count_1year" : 18,
                "count_1month" : 6,
                "count_7day" : 1
            }
            ...
        }
}

The clients with only 1 order don't go through the reduce function. I found this in the MongoDB doucmentation that explain that behaviour:

MongoDB will not call the reduce function for a key that has only a single value.

How can i do to have only 1 type of record in my output collection looking like this? Force all the record to go throught the reduce function?

{
    "_id" : 80306,
    "value" : {
        "orders_count" : {
            "total" : {
                "count_1year" : 18,
                "count_1month" : 6,
                "count_7day" : 1
            }
            ...
        }
}

Upvotes: 1

Views: 352

Answers (2)

Mouette
Mouette

Reputation: 299

Found a solution using the finalize utility.

var mapOrders = function() {
    var v_order = {
        order_date : this.dt_order
        ...
    }; 

     emit(this.clientid, v_order);
};

var reduceOrders = function(p_clientid, p_orders) {
    // Initialization of the output format of the couters
    var r_result = { orders_count : {
        total: {
            1year: 0,
            1month: 0,
            7day: 0
        }
        ...
    }}

    for (var c_order = 0; c_order < p_orders.length; c_order++) {
        // Increment counters
    }

    return (r_result);
};


var finalizeOrders = function(p_clientid, p_ReducedDrders) {

    if (typeof p_ReducedDrders.orders_count === 'undefined' )
        // Initialization of the output format of the couters
        var r_result = { orders_count : {
            total: {
                1year: 0,
                1month: 0,
                7day: 0
            }
        ...
        }}

        // do the same stuff as the for loop in the reducer
    }
    else {
        r_result = p_ReducedDrders
    }

    return (r_result);
};

db.orders.mapReduce(
    mapOrders,
    reduceOrders,
    { 
        out: { merge:  "tmp_orders_indicators" },
        finalize : finalizeOrders
    }
)

Upvotes: 1

chridam
chridam

Reputation: 103455

You can achieve this seamlessly with aggregation. Consider the following pipeline:

var dateSevenDaysAgo = new Date();
dateSevenDaysAgo.setDate(dateSevenDaysAgo.getDate()-7);

var dateMonthAgo = new Date();
dateMonthAgo.setMonth(dateMonthAgo.getMonth()-1);

var dateYearAgo = new Date();
dateYearAgo.setFullYear(dateYearAgo.getFullYear()-1);

var pipeline = [
    { "$match": { "$dt_order": { "$gte": dateYearAgo } } },
    {
        "$group": {
            "_id": "$id_client",
            "count_1year": {
                "$sum": {
                    "$cond": [ 
                        { "$gte": [ "$dt_order", dateYearAgo ] }, 
                        1, 0 
                    ]
                }
            },
            "count_1month": {
                "$sum": {
                    "$cond": [ 
                        { "$gte": [ "$dt_order", dateMonthAgo ] }, 
                        1, 0 
                    ]
                }
            },
            "count_7day": {
                "$sum": {
                    "$cond": [ 
                        { "$gte": [ "$dt_order", dateSevenDaysAgo ] }, 
                        1, 0 
                    ]
                }
            }
        }
    },
    { "$out": "tmp_indicators" }
];

db.orders.aggregate(pipeline);
db.tmp_indicators.find();

Upvotes: 1

Related Questions