Reputation: 15504
I'm reading a 45Mb text file in NodeJS and doing some processing to each char on it.
The first version of my script took one char from the original file, store it in an accumulator variable (result += char), and at then end saves result in a text file. This does not work since the file was so big I was putting too much data on RAM, so I got an error: Javascript Heap out of memory. I decided to use a write stream so I can write data directly to disk one chat at the time so this could solve the issue:
fs = require('fs');
var proportion = 2;
var c = '';
fs.readFile('./concat/all.txt', 'utf8', function (err,data) {
if (err) {
return console.log(err);
}
var h = parseInt(Math.sqrt(data.length / proportion));
var w = parseInt(h * proportion);
console.log(w, h);
var wstream = fs.createWriteStream('output.txt');
var lineCount = 0;
for(var x = 0; x < data.length; x++) {
if(data.charCodeAt(x) === 32 && x > 0 && data.charCodeAt(x - 1) === 32)
continue;
if(data.charCodeAt(x) === 10 && x > 0 && data.charCodeAt(x - 1) === 10)
continue;
c = (data.charCodeAt(x) === 10 || data.charCodeAt(x) === 13 || data.charCodeAt(x) === 9) ? " " : data.charAt(x);
lineCount++;
if(lineCount > w) {
c += "\n";
lineCount = 0;
}
wstream.write(c);
}
wstream.end();
});
But still, I'm getting an out of memory error. The script runs with no problems if I comment wstream.write(c)
. Why?
Upvotes: 0
Views: 1129
Reputation: 6219
Throttle the I/O :
Stream the input in chunks, pause the stream for each chunk, parse & manipulate each character of the current chunk and write it to the output.
Then you may resume the input stream to continue with the next one, repeat until finish.
I had no problem with throttling your code using createReadStream
& createWriteStream
Here is the code (tested successfully with a 64MB
file)
const fs = require('fs')
var w; var charCount = 0; var proportion = 2
//:This step was required to populate 'w'
fs.readFile('input.txt', 'utf8', function (err,data) {
if(err){return console.log(err)}
let h = parseInt(Math.sqrt(data.length / proportion))
w = parseInt(h * proportion); data = undefined;
console.log('[+] starting ( w:',w,')'); EMX()
});
//:Here is the magick
function EMX(){
const I = fs.createReadStream('input.txt')
const O = fs.createWriteStream('output.txt')
I.on('end',()=>{console.log("[+] operation completed")})
I.on('data', (chunk) => {
I.pause(); let data = chunk.toString()
for(var x=0;x<data.length;x++){
if(data.charCodeAt(x) === 32 && x > 0 && data.charCodeAt(x - 1) === 32) continue
if(data.charCodeAt(x) === 10 && x > 0 && data.charCodeAt(x - 1) === 10) continue
c = (data.charCodeAt(x) === 10 || data.charCodeAt(x) === 13 || data.charCodeAt(x) === 9) ? " " : data.charAt(x)
if(charCount>w){c+="\n";charCount=0;} charCount++
O.write(c)
}
I.resume()
})
}
Upvotes: 1