Reputation: 38529
This is an example to replicate my issue:
I populate my collection with 1million documents like this:
for(i=1; i<=1000000; i++){
if(i%3===0)
db.numbers.insert({_id:i, stuff:"Some data", signUpDate: new Date()});
else
db.numbers.insert({_id:i, stuff:"Some data"});
}
So, every 3rd document has a signUpDate
I create the following index:
db.numbers.ensureIndex({"signUpDate" : 1});
Then, I have the following very small app using nodejs:
var Db = require('mongodb').Db
, Connection = require('mongodb').Connection
, Server = require('mongodb').Server
, format = require('util').format;
var host = 'localhost';
var port = Connection.DEFAULT_PORT;
console.log("Connecting to " + host + ":" + port);
Db.connect(format("mongodb://%s:%s/test?w=1", host, port), function(err, db) {
var collection = db.collection('numbers');
collection.find({'signedUp': true}, {'_id':1}).limit(100000).toArray(function(err, docs){
console.log(docs.length)
});
});
This works fine.
However, If I remove the .limit(100000)
, the server sits there and never responds.
In a nutshell, all I'm trying to do is return a list of _id's where signUpDate is not null (there should be around 333,000)
I'm pretty sure the issue is the way mongodb caches, but I'm not sure how I can work around this?
Upvotes: 2
Views: 1598
Reputation: 36777
You need to set the batch size and then stream or iterate the results, otherwise mongo driver sticks everything into the memory.
Also that {'_id':1}
smells fishy, it probably should be {fields: {'_id' : 1}}
So the result in your case would be:
collection.find({'signedUp': true}, {batchSize: 1000, fields: {'_id' : 1}}).each(function(err, item) {
do something with item
});
Upvotes: 6
Reputation: 312035
You shouldn't call toArray
on a large result set like this. Instead, either:
Iterate over the results using each
:
collection.find({'signedUp': true}, {'_id':1}).each(function(err, doc){
if (doc) {
console.log(doc);
} else {
console.log('All done!');
}
});
or stream the results:
var stream = collection.find({'signedUp': true}, {'_id':1}).stream();
stream.on('data', function(doc) {
console.log(doc);
});
stream.on('close', function() {
console.log('All done!');
});
Upvotes: 7