stwissel
stwissel

Reputation: 20404

Splitting a text file based on number of lines in node.js

Hitting the limit of my understanding here. I have a large text file that I need to split into chunks 200 lines each (use mime.types to practice). I'm using readline but it seems to ignore my pause / resume statements. What do I miss:

// Splits a given file into smaller subfiles by line number
var infileName = 'mime.types';
var fileCount = 1;
var count = 0;
var fs = require('fs');
var outfileName = infileName + '.' + fileCount;
var inStream = fs.createReadStream(infileName);
var outStream = fs.createWriteStream(outfileName);
var lineReader = require('readline').createInterface({
  input: inStream
});

lineReader.on('line', function(line) {
  count++;
  lineReader.pause();
  outStream.write(line + '\n');
  if (count >= 200) {
    fileCount++;
    console.log('file ', outfileName, count);
    outStream.close();
    outfileName = infileName + '.' + fileCount;
    outStream = fs.createWriteStream(outfileName);
    count = 0;
  }
  lineReader.resume();
});

lineReader.on('close', function() {
  if (count > 0) {
    console.log('Final close:', outfileName, count);
  }
  inStream.close();
  outStream.close();
  console.log('Done');
});

I also tried to put the change of stream into the callback of the write operation:

lineReader.on('line', function(line) {
  count++;
  lineReader.pause();
  outStream.write(line + '\n', function() {
    if (count >= 200) {
      fileCount++;
      console.log('file ', outfileName, count);
      outStream.close();
      outfileName = infileName + '.' + fileCount;
      outStream = fs.createWriteStream(outfileName);
      count = 0;
    }
    lineReader.resume();
  });
});

The result is the same in both cases:
node split file mime.types.1 1588 Done

Ideally I want a solution that doesn't rely on extra npm

Upvotes: 3

Views: 4060

Answers (1)

Ludovic C
Ludovic C

Reputation: 3065

I got rid of the pause / resume. They seem like they are not supported by the readline stream. All I did is to provide a new writable stream every 200 line.

// Splits a given file into smaller subfiles by line number
var infileName = 'mime.types';
var fileCount = 1;
var count = 0;
var fs = require('fs');
var outStream;
var outfileName = infileName + '.' + fileCount;
newWriteStream();
var inStream = fs.createReadStream(infileName);

var lineReader = require('readline').createInterface({
    input: inStream
});

function newWriteStream(){
    outfileName = infileName + '.' + fileCount;
    outStream = fs.createWriteStream(outfileName);
    count = 0;
}

lineReader.on('line', function(line) {
    count++;
    outStream.write(line + '\n');
    if (count >= 200) {
        fileCount++;
        console.log('file ', outfileName, count);
        outStream.end();
        newWriteStream();
    }
});

lineReader.on('close', function() {
    if (count > 0) {
        console.log('Final close:', outfileName, count);
    }
    inStream.close();
    outStream.end();
    console.log('Done');
});

Upvotes: 5

Related Questions