Reputation: 7867
I have a collection tickets
containing the docs with the scheme:
{_id: ObjectID, date: ISODate, cost: Number}
The following aggregation is done on the collection:
db.tickets.aggregate([
{$match: {date: {$gte: ISODate("2016-02-01")}}},
{$group:{ _id:"$date", totalCost: {$sum: "$cost"}, totalTickets: {$sum: 1}}}
], function(err,result) {
// ...
}
I want to randomly remove 20% of docs from the aggregation, means to simulate a situation that tickets collection contains 20% less docs than in reality.
How can I do it?
Upvotes: 1
Views: 54
Reputation: 103335
To get 80% of docs starting from 2016-02-10
that is after the date specified in aggregation, you could use an async package to write another task that calculates the number of documents starting from 2016-02-10
and use the $sample
operator with this count. Something like the following:
var locals = {},
filter = {"date": {"$gte": ISODate("2016-02-01")}};
async.series([
// Get count
function(callback) {
db.collection("tickets").count(filter, function (err, result){
if (err) return callback(err);
locals.count = result; //Set the count here
callback();
});
},
// Run aggregation
function(callback) {
var pipeline = [
{"$match": filter},
{"$sample": {"size": {"$multiply": [locals.count, 0.8]}}
{"$group":{ "_id": "$date", "totalCost": {"$sum": "$cost"}, "totalTickets": {"$sum": 1}}}
];
db.collection("tickets").aggregate(pipeline, function(err, result) {
if (err) return callback(err);
locals.docs = result;
callback();
});
}
], function(err) { //This function gets called after the two tasks have called their "task callbacks"
if (err) return next(err);
// Here locals will be populated with 'count' and 'docs'
res.json({
count: locals.count,
data: locals.docs
});
});
Upvotes: 1