awimley
awimley

Reputation: 712

MongoDB using mongoose driver: how to query on a subset of document's array property?

I have a collection of voter registration data. These take the form:

voter = {
  name: "Some name",
  registrationDate: "2015-10-21T15:41:36+00:00",
  votingHistory: ["2015-10-21T15:41:36+00:00", "2015-7-21T15:41:36+00:00"]
}

I need to be able to detect the size of a subset of the votingHistory array. For example, this is one of the queries I tried:

voters.find({
  votingHistory : { $all : {$size : { $gt : 8 }}, { $gt : "2015-7-21T15:41:36+00:00" }}
})

The intention of this query is to find all voters with at least 8 recorded votes after 2015-7-21. Is there a way with mongoose to query on the size of subsets of array properties?

For example, with the following three entries:

{
  name: "name1",
  VotingHistory: ["2015-10-21T15:41:36+00:00", "2013-7-21T15:41:36+00:00"]
}, 
{
  name: "name2",
  VotingHistory: ["2015-10-21T15:41:36+00:00", "2011-7-21T15:41:36+00:00"]
}, 
{
  name: "name3",
  VotingHistory: ["2013-10-21T15:41:36+00:00", "2011-7-21T15:41:36+00:00", "2009-10-21T15:41:36+00:00", "2010-7-21T15:41:36+00:00"]
}

I'd like to find where 2 or more elements in the VotingHistory array represent a date on or after 2013-7-21. Which is only name 1 in this example.

Upvotes: 1

Views: 633

Answers (2)

chridam
chridam

Reputation: 103335

For effective querying, I would suggest modifying your mongoose schema by changing the string literals that represent the dates to actual dates.

You can modify this at schema definition first, for example

var mongoose = require('mongoose');
var Schema = mongoose.Schema;

var voterSchema = new Schema({
    name: String,
    registrationDate: Date,
    votingHistory: [Date]
});

var Voter = mongoose.model("Voter", voterSchema, "voters" );

Once that's done, you would need to modify the existing collection by using the Bulk Operations API to leverage your updates. For any given mongoose model there exists a .collection accessor that essentially accesses the "collection object" from the underlying "node native driver" on which mongoose is implemented itself. With this you can do the following updates on documents where the registrationDate field is a string

mongoose.connection.on("open", function(err, conn) { 

    var bulk = Voter.collection.initializeOrderedBulkOp();
    var counter = 0;

    Voter.find({"registrationDate": {"$type": 2} }, function(err, docs) {
        async.each(docs, function(doc, callback) {
            var regDate = new Date(doc.registrationDate),
                history = doc.votingHistory.map(function (dt){
                    return new Date(dt);
                });
            bulk.find({"_id": doc._id}).updateOne({
                "$set": {
                    registrationDate: regDate,
                    votingHistory: history
                }
            });
            counter++;

            if (counter % 1000 == 0) {
                bulk.execute(function(err,result) {             
                    bulk = Voter.collection.initializeOrderedBulkOp();
                });
            } 
            else {
                callback();
            }
        },

        // When everything's done
        function(err) {
            if ( counter % 1000 != 0 ) 
                bulk.execute(function(err,result) {
                   console.log("more updates" );
                });        
            console.log("done now");
        }
    });
});

Once the updates are done you can then do either of the couple of approaches. One of them is to use the $where operator:

var voteDate = new Date(2015, 6, 21);
Voter.find({ 
    "$where": "this.votingHistory.length > 8",
    "votingHistory": { "$gt": voteDate  }
}).exec(callback);

The other is by using the dot notation to "trick" mongodb into looking for documents that have at least a 9th votingHistory array element:

var voteDate = new Date(2015, 6, 21);
Voter.find({ 
    "votingHistory.8": { "$exists": true },
    "votingHistory": { "$gt": voteDate }
}).exec(callback);

For a solution based on the aggregation framework (based on the assumptions that the dates are proper MongoDB dates), the following pipeline would give you the desired result:

var voteDate = new Date(2015, 6, 21),
    pipeline = [
        {
            "$project": {
                "name": 1, 
                "registrationDate": 1,
                "votingHistory": 1,
                "numberOfVotes": { "$size": "$votingHistory" }
            }
        },
        {
            "$match": {
                "numberOfVotes": { "$gt": 8 },
                "votingHistory": { "$gt": voteDate }
            }
        }

    ];
// you can then use the aggregate   
Voter.aggregate(pipeline)
     .exec(function (err, results){
        // access your results here
     });

// or using the aggregation builder
Voter.aggregate()
    .project({
        "name": 1, 
        "registrationDate": 1,
        "votingHistory": 1,
        "numberOfVotes": { "$size": "$votingHistory" }
    })
    .match({
        "numberOfVotes": { "$gt": 8 },
        "votingHistory": { "$gt": voteDate }
    })
    .exec(callback);

Upvotes: 1

Srini Kandula
Srini Kandula

Reputation: 1001

It is possible if you are willing to use mongo agrregation like below

db.voter.aggregate([
    {$unwind:"$votingHistory"},
{ $match: {votingHistory:{$gt:'2015-7-21T15:41:36+00:00'}}}, 
{$group:{_id: {voterId:'$_id',name:'$name',registrationDate:'$registrationDate'}, count:{$sum:1}}},
{$match:{count:{$gt:2}}},
{$project:{_id:"$_id.voterId", name:"$_id.name", registrationDate:"$_id.registrationDate"}}
])

Upvotes: 0

Related Questions