Reputation: 59
I'm trying to insert over 1 million rows into Postgres Table using NodeJs The problem is when i start script, the memory constantly keep increasing till it reach 1.5 GB of RAM and then I get error: FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - process out of memory
The result always the same - about 7000 inserted rows instead of 1 million
Here is the code
var pg = require('pg');
var fs = require('fs');
var config = require('./config.js');
var PgClient = new pg.Client(config.pg);
PgClient.connect();
var lineReader = require('readline').createInterface({
input: require('fs').createReadStream('resources/database.csv') //file contains over 1 million lines
});
var n=0;
lineReader.on('line', function(line) {
n++;
var insert={"firstname":"John","lastname":"Conor"};
//No matter what data we insert, the point is that the number of inserted rows much less than it should be
PgClient.query('INSERT INTO HUMANS (firstname,lastname) values ($1,$2)', [insert.firstname,insert.lastname]);
});
lineReader.on('close',function() {
console.log('end '+n);
});
Upvotes: 3
Views: 4162
Reputation: 59
I used pg-promise as vitaly-t suggested. And this code works realy fast
const fs = require('fs');
const pgp = require('pg-promise')();
const config = require('./config.js');
// Db connection
const db = pgp(config.pg);
// Transform a lot of inserts into one
function Inserts(template, data) {
if (!(this instanceof Inserts)) {
return new Inserts(template, data);
}
this._rawType = true;
this.toPostgres = () => {
return data.map(d => '(' + pgp.as.format(template, d) + ')').join();
};
}
// insert Template
function Insert() {
return {
firstname: null,
lastname: null,
birthdate: null,
phone: null,
email: null,
city: null,
district: null,
location: null,
street: null
};
};
const lineReader = require('readline').createInterface({
input: require('fs').createReadStream('resources/database.csv')
});
let n = 0;
const InsertArray = [];
lineReader.on('line', function(line) {
var insert = new Insert();
n ++;
var InsertValues=line.split(',');
if (InsertValues[0]!=='"Firstname"'){ //skip first line
let i = 0;
for (let prop in insert){
insert[prop] = (InsertValues[i]=='')?insert[prop]:InsertValues[i];
i++;
}
InsertArray.push(insert);
if (n == 10000){
lineReader.pause();
// convert insert array into one insert
const values = new Inserts('${firstname}, ${lastname},${birthdate},${phone},${email},${city},${district},${location},${street}', InsertArray);
db.none('INSERT INTO users (firstname, lastname,birthdate,phone,email,city,district,location,street) VALUES $1', values)
.then(data => {
n = 0;
InsertArray=[];
lineReader.resume();
})
.catch(error => {
console.log(error);
});
}
}
});
lineReader.on('close',function() {
console.log('end '+n);
//last insert
if (n > 0) {
const values = new Inserts('${firstname}, ${lastname},${birthdate},${phone},${email},${city},${district},${location},${street}', InsertArray);
db.none('INSERT INTO users (firstname, lastname,birthdate,phone,email,city,district,location,street) VALUES $1', values)
.then(data => {
console.log('Last');
})
.catch(error => {
console.log(error);
});
}
});
Upvotes: 3
Reputation: 59
So i resolved the issue. There is PgClient.queryQueue which is processed with much less speed than reading of file. And when big file is read the queue is overflowed. And here the solution, we should change lineReader.on('line',cb) section, every time the queue has a lot of elements we pause lineReader
lineReader.on('line', function(line) {
n++;
var insert={"firstname":"John","lastname":"Conor"};
PgClient.query('INSERT INTO HUMANS (firstname,lastname) values ($1,$2)', [insert.firstname,insert.lastname],function (err,result){
if (err) console.log(err);
if (PgClient.queryQueue.length>15000) {
lineReader.pause();
}
else lineReader.resume();
});
});
Upvotes: -1