rolandnsharp
rolandnsharp

Reputation: 281

What's the best way to modify every document in a large MongoDB collection without overloading memory?

At the moment I'm running this task:

var skip = 0;
var limit = 5;

gulp.task('add coordinates to visits', function(done) {

(function recurse() {

    Visit.find({})
        .skip(skip)
        .limit(limit)
        .populate('zone')

    .exec(function cb(err, visits) {
        if (err) {
            throw err;
        }
        if (visits.length === 0) {
            return;
        }

        async.each(visits, function iterateEvents(visit, next) {
            if (!visit.zone) {
                return next();
            } else if (!visit.coordinates.lat || !visit.coordinates.lng) {
                visit.coordinates = {
                    lat: visit.zone.geo.coordinates.lat,
                    lng: visit.zone.geo.coordinates.lng
                };
            }
            visit.save(next);

        }, function cb(err) {
            if (err) {
                throw err;
            }

            skip += limit;
            setTimeout(recurse, 1000);
        });

    });

})();

});

But I'm sure there must be a more elegant and optimal method than using skip, limit, `setTimeout. Is there some mongo or mongoose method for running updating tasks?

Upvotes: 0

Views: 264

Answers (1)

Jason Cust
Jason Cust

Reputation: 10909

Based on our conversation in the comments it seems like Mongoose's querystream might be what you are looking for:

var stream = Visits.find().populate('zone').stream();

stream.on('data', function processDoc(visit) {
  var self = this;

  if (visit.zone && (!visit.coordinates.lat || !visit.coordinates.lng)) {
    self.pause();

    visit.update({
      coordinates: {
        lat: visit.zone.geo.coordinates.lat,
        lng: visit.zone.geo.coordinates.lng
      }
    }, function(err, result) {
      if (err) { console.log(err); };
      self.resume();
    });
  }
});

stream.on('error', function(err) {
  console.log('error', err);
});

stream.on('close', function() {
  console.log('closed');
});

Upvotes: 1

Related Questions