Abhinav Das
Abhinav Das

Reputation: 606

How to insert about 1 million rows to a MySQL db using node-mysql?

I'm trying to build an app where there are two given lists - one of first-names and the other of last-names. I am trying to create a database with every combination of first-name and last-name. I'm generating the combinations and then trying to insert them into my database. But it seems that the memory is insufficient to handle this even though I have sliced my fullNames (firstname+lastname) array into smaller arrays.

// using the node-mysql driver to interact with mysql db
var mysql      = require('mysql');
var async = require('async');

var connection = mysql.createConnection({
    host     : 'localhost',
    user     : 'root',
    password : '*******',
    database : '*******',
    multipleStatements: true,
});

var firstNames, lastNames; // arrays that will store
                           // first-names and last-names from files

function readFirstLastNames() {
    connection.connect();
    var fs = require("fs");
    firstNames = fs.readFileSync('firstnames.out').toString().split("\n");
    firstNames.sort();
    lastNames = fs.readFileSync('lastnames.out').toString().split("\n");
    lastNames.sort();
    connection.end();
}

/*
const f = firstNames; const l = lastNames;

genNames(firstNames,lastNames);

var allNames;

function genNames(fN, lN) {
    flatmap = (xs, fN) => xs.map(fN).reduce((a, b) => a.concat(b), []);
    allNames = flatmap(fN, a => lN.map(b => `${a} ${b}`));
}
*/

function insertIntoDB(x, y) {
    connection.connect();
    var fullname_part = fullNames.slice(x, y);
    connection.query('INSERT INTO names (firstname, lastname) VALUES ?', [fullNames], (err, result) => {
        if (err) {
            console.error('error connecting: ' + err.stack);
            return;
        }
    });
    console.log("Done inserting all combinations of names.");
    connection.end();
}

readFirstLastNames();

var fullNames = firstNames.reduce((pairs, first) => { lastNames.forEach(last => pairs.push([first, last])); return pairs; }, [])

var x = 1;
for (var y = 10000; y < 1000000;) {
    insertIntoDB(x, y);
    x = y;
    y = y + 10000;
}

But when I try to run - node index.js, I get the following errors:

Done inserting all combinations of names.
Done inserting all combinations of names.
Done inserting all combinations of names.
Done inserting all combinations of names.
Done inserting all combinations of names.

<--- Last few GCs --->

    7095 ms: Scavenge 1399.0 (1457.9) -> 1399.0 (1457.9) MB, 1.4 / 0 ms (+ 56.0 ms in 1 steps since last GC) [allocation failure] [incremental marking delaying mark-sweep].
    7843 ms: Mark-sweep 1399.0 (1457.9) -> 1398.4 (1457.9) MB, 748.5 / 0 ms (+ 441.4 ms in 1126 steps since start of marking, biggest step 60.3 ms) [last resort gc].
    8585 ms: Mark-sweep 1398.4 (1457.9) -> 1398.4 (1457.9) MB, 741.2 / 0 ms [last resort gc].


<--- JS stacktrace --->

==== JS stack trace =========================================

Security context: 0x3fc5864b4629 <JS Object>
    2: arrayToList [/Users/adas/Downloads/signzy/node_modules/sqlstring/lib/SqlString.js:~58] [pc=0x3d90a7d8ead7] (this=0x233152605a09 <an Object with map 0x38dc0d04dcc1>,array=0x225c88bf01f1 <JS Array[881892]>,timeZone=0x2ed2ed0de679 <String[5]: local>)
    3: escape [/Users/adas/Downloads/signzy/node_modules/sqlstring/lib/SqlString.js:~33] [pc=0x3d90a7d877e8] (this=0x233152605a09 <an Object w...

FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - process out of memory
Abort trap: 6

I would like to understand how I may get past this issue and solve my problem! Thank you in advance.

EDIT 1 : As per the comment below by @tadman, I have executed the following changes to my code so as to use LOAD DATA INFILE.

// using the node-mysql driver to interact with mysql db
var mysql      = require('mysql');
var async = require('async');

var connection = mysql.createConnection({
    host     : 'localhost',
    user     : 'root',
    password : '******',
    database : '******',
    multipleStatements: true,
});

connection.connect();

var firstNames, lastNames; // arrays that will store
                           // first-names and last-names from files

function readFirstLastNames() { // reads firstnames.out and lastnames.out, puts them in arrays and sorts etc.
    var fs = require("fs");
    firstNames = fs.readFileSync('firstnames.out').toString().split("\n");
    lastNames = fs.readFileSync('lastnames.out').toString().split("\n");
}


readFirstLastNames();

var fullNames = firstNames.reduce((pairs, first) => { lastNames.forEach(last => pairs.push([first, last])); return pairs; }, []) // fullNames has all combinations of names.
fullNames.sort();


// Writing all combinations out to file in a comma separated syntax
var fs = require('fs');
var file = fs.createWriteStream('db_inserts.txt');
file.on('error', function(err) { /* error handling */ });
fullNames.forEach(function(v) { file.write(v.join(',') + '\n'); });
file.end();


// using node-mysql in conjunction with LOAD DATA INFILE to read the inserts into the table.
connection.query('LOAD DATA INFILE \'/Users/adas/Downloads/signzy/db_inserts.txt\' INTO TABLE names (firstname, lastname) FIELDS TERMINATED BY \',\'', (err, result) => {
    if (err) {
        console.error('error connecting: ' + err.stack);
        return;
    }
});
connection.end();
console.log("Done");

But now I see a different issue. It gives me a syntax error: #1064 - You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'FIELDS TERMINATED BY ', '' at line 1 .

EDIT 2 : As pointed out by @tadman, the column name list should come at the end of the query. So changing the query to: LOAD DATA INFILE '/Users/adas/Downloads/signzy/db_inserts.txt' INTO TABLE names FIELDS TERMINATED BY ',' (firstname, lastname); fixed the issue.

SIDENOTE : @tadman also pointed out that with Node, we don't need to force-use backslash to escape single-quotes. Simply surround the entire query with double-quotes and then go on to use single-quotes wherever required in between.

Upvotes: 1

Views: 4451

Answers (1)

msanford
msanford

Reputation: 12229

EDIT: As the thread has progressed, it's obvious that the way to go for this problem is to generate a file and load it into the database; I'll leave this answer here in case someone actually needs to do what I describe.


You're probably running out of memory because node-mysql's .query() method is, like most things in Node, asynchronous. So what you're actually doing is calling insertIntoDB(x, y); within the for loop and then instantly calling another one, not waiting for the first query to finish before trying another one, until you run out of memory.

I see that you're already including async. You can use something like async.mapSeries to serialize your INSERTs (given that your re-implement your allNames accumulator).

'use strict';
connection.connect();
async.mapSeries(allNames, (data, callback) => {
    connection.query('INSERT INTO names (firstname, lastname) VALUES ?', [data.firstName, data.firstName], (err, result) => {
        if (err) {
            console.error('error: ' + err.stack);
            callback(err);
        } else {
            callback(null, result);
        }
    });
}, (err, results) => {
    // Final callback
    if (err) {
        console.log(`Error: ${err}`);
    }
});
connection.end();

Additionally:

  1. Sort your file once and write it back to disk so you don't have to sort it every time you load it.

  2. Remove additional calls to connection.connect() and .end(): just connect once at the start of your batch, or the whole script.

Upvotes: 2

Related Questions