rlib
rlib

Reputation: 7867

MongoDB3.2. aggregation, remove processed document

I have a collection tickets containing the docs with the scheme:

{_id: ObjectID, date: ISODate, cost: Number}

The following aggregation is done on the collection:

db.tickets.aggregate([
  {$match: {date: {$gte: ISODate("2016-02-01")}}},
  {$group:{ _id:"$date", totalCost: {$sum: "$cost"}, totalTickets: {$sum: 1}}}
], function(err,result) {
// ...
}

I want to randomly remove 20% of docs from the aggregation, means to simulate a situation that tickets collection contains 20% less docs than in reality.

How can I do it?

Upvotes: 1

Views: 54

Answers (1)

chridam
chridam

Reputation: 103335

To get 80% of docs starting from 2016-02-10 that is after the date specified in aggregation, you could use an async package to write another task that calculates the number of documents starting from 2016-02-10 and use the $sample operator with this count. Something like the following:

var locals = {},
    filter = {"date": {"$gte": ISODate("2016-02-01")}};
async.series([
    // Get count
    function(callback) {
        db.collection("tickets").count(filter, function (err, result){
            if (err) return callback(err);              
            locals.count = result; //Set the count here
            callback();
        });         
    },      
    // Run aggregation
    function(callback) {
        var pipeline = [
            {"$match": filter},
            {"$sample": {"size": {"$multiply": [locals.count, 0.8]}}
            {"$group":{ "_id": "$date", "totalCost": {"$sum": "$cost"}, "totalTickets": {"$sum": 1}}}
        ];
        db.collection("tickets").aggregate(pipeline, function(err, result) {
            if (err) return callback(err);
            locals.docs = result;
            callback();
        });
    }
], function(err) { //This function gets called after the two tasks have called their "task callbacks"
    if (err) return next(err);
    // Here locals will be populated with 'count' and 'docs'
    res.json({
        count: locals.count,
        data: locals.docs
    });
});

Upvotes: 1

Related Questions