Reputation: 18619
I have mongodb in which there is 3 huge collections say 'A', 'B' and 'C'
Each collection contains about 2 million documents.
There are certain properties for each of the document.
Each document need to be updated based on those values of certain properties, from which i can determine what should be the '$set'
to that document.
currently i am using the same approach for each collection. that to find all documents in batches. collection them in memory (which i think the culprit for the current approach), then one by one update them all.
For the first collection(that have similar data as in other collections), it takes 10 minutes to get completed. then the next two collections taking 2 hours approx to get the task done or mongodb client get crashed earlier.
There is something wrong and no desired in the current approach.
Model.collection.find({}).batchSize(BATCH).toArray(function(err, docs){
if(err || !docs || !docs.length)
return afterCompleteOneCollection(err);
var spec = function(index) {
if(index % 1000 === 0) console.log('at index : ' + index);
var toSet = { };
var toUnset = { };
var over = function(){
var afterOver = function(err){
if(err) return afterCompleteOneCollection(err);
if(index < docs.length - 1) spec(index+1);
else afterCompleteOneCollection(null);
};
var sb = Object.keys(toSet).length;
var ub = Object.keys(toUnset).length;
if(sb || ub) {
var all = {};
if(sb) all.$set = toSet;
if(ub) all.$unset = toUnset;
Model.collection.update({ _id : docs[index]._id }, all, {}, afterOver);
} else afterOver(null);
};
forEachOfDocument(docs[index], toSet, toUnset, over);
};
spec(0);
});
Is there any better solution for the same.?
Upvotes: 0
Views: 65
Reputation: 18619
The streaming approach from here http://mongodb.github.io/node-mongodb-native/api-generated/cursor.html#stream worked for me
This is what i am doing :
var stream = Model.collection.find().stream();
stream.on('data', function(data){
if(data){
var toSet = { };
var toUnset = { };
var over = function(){
var afterOver = function(err){
if(err) console.log(err);
};
var sb = Object.keys(toSet).length;
var ub = Object.keys(toUnset).length;
if(sb || ub) {
var all = {};
if(sb) all.$set = toSet;
if(ub) all.$unset = toUnset;
Model.collection.update({ _id : data._id }, all, {}, afterOver);
} else afterOver(null);
};
forEachOfDocument(data, toSet, toUnset, over);
}
});
stream.on('close', function() {
afterCompleteOneCollection();
});
Upvotes: 0