m3n1at
m3n1at

Reputation: 59

Insert big number of rows into Postgres DB using NodeJS

I'm trying to insert over 1 million rows into Postgres Table using NodeJs The problem is when i start script, the memory constantly keep increasing till it reach 1.5 GB of RAM and then I get error: FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - process out of memory

The result always the same - about 7000 inserted rows instead of 1 million

Here is the code

var pg = require('pg');
var fs = require('fs');
var config = require('./config.js');



var PgClient = new pg.Client(config.pg);
PgClient.connect();

var lineReader = require('readline').createInterface({
      input: require('fs').createReadStream('resources/database.csv') //file contains over 1 million lines
    });
var n=0;




lineReader.on('line', function(line) {
      n++;
      var insert={"firstname":"John","lastname":"Conor"};

      //No matter what data we insert, the point is that the number of inserted rows much less than it should be 
      PgClient.query('INSERT INTO HUMANS (firstname,lastname) values ($1,$2)', [insert.firstname,insert.lastname]);

});

lineReader.on('close',function() {
     console.log('end '+n); 
});

Upvotes: 3

Views: 4162

Answers (2)

m3n1at
m3n1at

Reputation: 59

I used pg-promise as vitaly-t suggested. And this code works realy fast

const fs = require('fs');
const pgp = require('pg-promise')();
const config = require('./config.js');

// Db connection
const db = pgp(config.pg);

// Transform a lot of inserts into one
function Inserts(template, data) {
    if (!(this instanceof Inserts)) {
        return new Inserts(template, data);
    }
    this._rawType = true;
    this.toPostgres = () => {
        return data.map(d => '(' + pgp.as.format(template, d) + ')').join();
    };
}

// insert Template
function Insert() {
      return {
          firstname:   null,
          lastname:    null,
          birthdate:     null,
          phone:    null,
          email:   null,
          city: null,
          district:    null,
          location: null,
          street: null
      };
};
const lineReader = require('readline').createInterface({
      input: require('fs').createReadStream('resources/database.csv')
    });


let n = 0;
const InsertArray = [];

lineReader.on('line', function(line) {   
      var insert = new Insert();
      n ++;   
      var InsertValues=line.split(',');
      if (InsertValues[0]!=='"Firstname"'){ //skip first line
          let i = 0;
          for (let prop in insert){
              insert[prop] = (InsertValues[i]=='')?insert[prop]:InsertValues[i];
              i++;
          }
          InsertArray.push(insert);
          if (n == 10000){
              lineReader.pause();
              // convert insert array into one insert
              const values = new Inserts('${firstname}, ${lastname},${birthdate},${phone},${email},${city},${district},${location},${street}', InsertArray);
              db.none('INSERT INTO users (firstname, lastname,birthdate,phone,email,city,district,location,street) VALUES $1', values)
                .then(data => {
                    n = 0;
                    InsertArray=[];
                    lineReader.resume();
                })
                .catch(error => {
                    console.log(error);
                });
          }
      }
});


lineReader.on('close',function() {
     console.log('end '+n); 
     //last insert
     if (n > 0) {
         const values = new Inserts('${firstname}, ${lastname},${birthdate},${phone},${email},${city},${district},${location},${street}', InsertArray);
         db.none('INSERT INTO users (firstname, lastname,birthdate,phone,email,city,district,location,street) VALUES $1', values)
            .then(data => {
                console.log('Last');
            })
            .catch(error => {
                console.log(error);
            });
     }
});

Upvotes: 3

m3n1at
m3n1at

Reputation: 59

So i resolved the issue. There is PgClient.queryQueue which is processed with much less speed than reading of file. And when big file is read the queue is overflowed. And here the solution, we should change lineReader.on('line',cb) section, every time the queue has a lot of elements we pause lineReader

lineReader.on('line', function(line) {
      n++;
      var insert={"firstname":"John","lastname":"Conor"};
      PgClient.query('INSERT INTO HUMANS (firstname,lastname) values ($1,$2)', [insert.firstname,insert.lastname],function (err,result){
          if (err) console.log(err);
          if (PgClient.queryQueue.length>15000) {
              lineReader.pause(); 
          }
          else lineReader.resume(); 
      });
});

Upvotes: -1

Related Questions