Reputation: 1819
I am trying to insert a million+ records (dummy) using a NodeJS Program in MongoDB collection.But unfortunately my process runs out of memory:
This is the code I wrote in JavaScript and running it through node
var MongoClient = require('mongodb').MongoClient;
MongoClient.connect('mongodb://localhost:27017/course', function(err, db) {
if(err) throw err;
db.collection('students').drop();
var types = ['exam', 'quiz', 'homework', 'homework'];
// For 1 Million Records
for (var i = 0; i < 1000000; i++) {
// Each student taking 10 classes
for (var class_counter = 0; class_counter < 10; class_counter ++) {
scores = [];
// Each Class has 4 grades
// and each class has 4 grades
for (var j = 0; j < 4; j++) {
scores.push({'type':types[j],'score':Math.random()*100});
}
// // there are 500 different classes that they can take
class_id = Math.floor(Math.random()*501); // get a class id between 0 and 500
record = {'student_id':i, 'scores':scores, 'class_id':class_id};
db.collection('students').insert(record);
}
}
});
This is my error trace I am getting:
AMAC02PC0PHG3QP:25_Indexes_Insert macadmin$ node app.js
<--- Last few GCs --->
28373 ms: Scavenge 1397.8 (1457.4) -> 1397.8 (1457.4) MB, 1.1 / 0 ms (+ 151.3 ms in 1 steps since last GC) [allocation failure] [incremental marking delaying mark-sweep].
29444 ms: Mark-sweep 1397.8 (1457.4) -> 1397.7 (1457.4) MB, 1071.5 / 0 ms (+ 427.1 ms in 14 steps since start of marking, biggest step 202.5 ms) [last resort gc].
30486 ms: Mark-sweep 1397.7 (1457.4) -> 1397.6 (1457.4) MB, 1041.4 / 0 ms [last resort gc].
<--- JS stacktrace --->
==== JS stack trace =========================================
Security context: 0x23473037399 <JS Object>
1: /* anonymous */(aka /* anonymous */) [/Users/macadmin/Desktop/NodeJS_MongoDB/25_Indexes_Insert/app.js:~3] [pc=0x3f5d2b92c716] (this=0x23473004131 <undefined>,err=0x23473004131 <undefined>,db=0x1f851bb90029 <JS Object>)
2: /* anonymous */(aka /* anonymous */) [/Users/macadmin/Desktop/NodeJS_MongoDB/25_Indexes_Insert/node_modules/mongodb/lib/mongo_client.js:455] [pc=0x3f5d2b4da8bd] (thi...
FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - process out of memory
Abort trap: 6
My Laptop Configuration:
// Macbook Pro
// OS X 10.9.5
// 2.5 Ghz Intel Core i7
// 16 GB Ram DDR3
// SSD
Upvotes: 1
Views: 2337
Reputation: 1
I have successfully tested the following code in the environment:
var mongodb = require('mongodb');
main();
function main() {
doWork(onDoWork);
function onDoWork(error, result) {
if (error) {
console.error(error);
}
else if (result) {
console.log(result);
}
}
}
function doWork(callback) {
mongodb.MongoClient.connect('mongodb://localhost:27017/course', onConnect);
function onConnect(error, db) {
if (error) return setImmediate(callback, error);
var IMIN = 0;
var IMAX = 1000000 - 1;
var i = IMIN;
var JMIN = 0;
var JMAX = 10 - 1;
var j = JMIN;
insertOne(db, i, onInsertOne);
function onInsertOne(error, result) {
if (error) {
db.close();
setImmediate(callback, error);
}
else if (i > IMAX) {
db.close();
setImmediate(callback);
}
else {
i++;
j++;
if (j > JMAX) j = JMIN;
insertOne(db, i, onInsertOne);
}
}
}
}
function insertOne(db, studentId, callback) {
var types = ['exam', 'quiz', 'homework', 'homework'];
var KMIN = 0;
var KMAX = types.length - 1;
var scores = [];
for (var k = KMIN; k <= KMAX; k++) {
scores.push({
type: types[k],
score: Math.round(Math.random() * 100)
});
}
var record = {
student_id: studentId,
scores: scores,
class_id: Math.floor(Math.random() * 500) + 1
};
db.collection('students').insertOne(record, {}, callback);
}
Upvotes: 0
Reputation: 103445
If your MongoDB server is 2.6 or newer, it would be better to take advantage of using a write commands Bulk API that allow for the execution of bulk insert operations which are simply abstractions on top of the server to make it easy to build bulk operations. These bulk operations come mainly in two flavours:
Note, for older servers than 2.6 the API will downconvert the operations. However it's not possible to downconvert 100% so there might be some edge cases where it cannot correctly report the right numbers.
In your case, you could implement the Bulk API like this:
var MongoClient = require('mongodb').MongoClient;
MongoClient.connect("mongodb://localhost:27017/course", function(err, db) {
// Handle error
if(err) throw err;
// Get the collection and bulk api artefacts
var col = db.collection('students'),
types = ['exam', 'quiz', 'homework', 'homework'],
bulk = col.initializeOrderedBulkOp(), // Initialize the Ordered Batch
counter = 0;
// Drop the collection
col.drop();
// Representing a long loop with 1 Million Records
for (var i = 0; i < 1000000; i++) {
var scores = [],
class_id = 0,
record = {};
// Each student taking 10 classes
for (var class_counter = 0; class_counter < 10; class_counter ++) {
// Each Class has 4 grades
// and each class has 4 grades
for (var j = 0; j < 4; j++) {
scores.push({ 'type': types[j], 'score': Math.random()*100 });
}
// there are 500 different classes that they can take
class_id = Math.floor(Math.random() * 501); // get a class id between 0 and 500
record['student_id'] = i;
record['scores'] = scores;
record['class_id'] = class_id;
}
bulk.insert(record);
counter++;
if (counter % 1000 == 0 ) {
bulk.execute(function(err, result) {
// re-initialise batch operation
bulk = col.initializeOrderedBulkOp();
});
}
}
if (counter % 1000 != 0 ){
bulk.execute(function(err, result) {
// do something with result
db.close();
});
}
});
-- UPDATE --
Kudos to @MarkusWMahlberg, for generating dummy content you may want to try the package mgenerate.
Upvotes: 4