I have large text files, which range between 30MB and 10GB . How can I count the number of lines in a file using Node.js ? I have these limitations: The entire file does not need to be written to memory A child process is not required to perform the task

Reputation: 91759

Node.js: Count the number of lines in a file

I have large text files, which range between 30MB and 10GB. How can I count the number of lines in a file using Node.js?

I have these limitations:

The entire file does not need to be written to memory
A child process is not required to perform the task

Upvotes: 50

Answers (11)

Kamlesh Kumar

Reputation: 1680

You can try this solution for getting number of lines containing in a file.

const fs = require('fs'); 
const path = require('path');
        
        
        const filePath = path.join(__dirname, 'data.js');;
        
        function countLines(filePath) {
        
            const fileContents = fs.readFileSync(filePath, 'utf8');
        
            const lines = fileContents.split('\n');
        
            return lines.length; }
        
        const lineCount = countLines(filePath);
        
        
        
    console.log("filePath", filePath); 
    console.log("File contains no of line: ", lineCount);

output:

File contains no of line: 11

Upvotes: 0

Richie Bendall

Reputation: 9222

Simple solution using readline:

import readline from 'node:readline';

export default async function countLines(input) {
    let lineCount = 0;

    for await (const _ of readline.createInterface({input, crlfDelay: Infinity})) {
        lineCount++;
    }

    return lineCount;
}

import fs from 'node:fs';

console.log(await countLines(fs.createReadStream('file.txt')));
//=> <number>

Upvotes: 1

Menztrual

Reputation: 41597

You could do this as the comments suggest using wc

var exec = require('child_process').exec;

exec('wc -l /path/to/file', function (error, results) {
    console.log(results);
});

Upvotes: 31

David Dombrowsky

Reputation: 1705

Best solution I've found is using promises, async, and await. This is also an example of how await for the fulfillment of a promise:

#!/usr/bin/env node
const fs = require('fs');
const readline = require('readline');
function main() {
    function doRead() {
        return new Promise(resolve => {
            var inf = readline.createInterface({
                input: fs.createReadStream('async.js'),
                crlfDelay: Infinity
            });
            var count = 0;
            inf.on('line', (line) => {
                console.log(count + ' ' + line);
                count += 1;
            });
            inf.on('close', () => resolve(count));
        });
    }
    async function showRead() {
        var x = await doRead();
        console.log('line count: ' + x);
    }
    showRead();
}
main();

Upvotes: 3

Jason Kim

Reputation: 19051

If you use Node 8 and above, you can use this async/await pattern

const util = require('util');
const exec = util.promisify(require('child_process').exec);

async function fileLineCount({ fileLocation }) {
  const { stdout } = await exec(`cat ${fileLocation} | wc -l`);
  return parseInt(stdout);
};

// Usage

async someFunction() {
  const lineCount = await fileLineCount({ fileLocation: 'some/file.json' });
}

Upvotes: 4

Emil Vikström

Reputation: 91983

We can use indexOf to let the VM find the newlines:

function countFileLines(filePath){
  return new Promise((resolve, reject) => {
  let lineCount = 0;
  fs.createReadStream(filePath)
    .on("data", (buffer) => {
      let idx = -1;
      lineCount--; // Because the loop will run once for idx=-1
      do {
        idx = buffer.indexOf(10, idx+1);
        lineCount++;
      } while (idx !== -1);
    }).on("end", () => {
      resolve(lineCount);
    }).on("error", reject);
  });
};

What this solution does is that it finds the position of the first newline using .indexOf. It increments lineCount, then it finds the next position. The second parameter to .indexOf tells where to start looking for newlines. This way we are jumping over large chunks of the buffer. The while loop will run once for every newline, plus one.

We are letting the Node runtime do the searching for us which is implemented on a lower level and should be faster.

On my system this is about twice as fast as running a for loop over the buffer length on a large file (111 MB).

Upvotes: 34

ruchi gupta

Reputation: 103

var fs=require('fs');
filename=process.argv[2];
var data=fs.readFileSync(filename);
var res=data.toString().split('\n').length;
console.log(res-1);`

Upvotes: 7

Jeff Kilbride

Reputation: 2814

You can also use indexOf():

var index = -1;
var count = 0;
while ((index = chunk.indexOf(10, index + 1)) > -1) count++;

Upvotes: 1

Alan Viars

Reputation: 3172

Here is another way without so much nesting.

var fs = require('fs');
filePath = process.argv[2];
fileBuffer =  fs.readFileSync(filePath);
to_string = fileBuffer.toString();
split_lines = to_string.split("\n");
console.log(split_lines.length-1);

Upvotes: 4

undoZen

Reputation: 237

since iojs 1.5.0 there is Buffer#indexOf() method, using it to compare to Andrey Sidorov' answer:

ubuntu@server:~$ wc logs
  7342500  27548750 427155000 logs
ubuntu@server:~$ time wc -l logs 
7342500 logs

real    0m0.180s
user    0m0.088s
sys 0m0.084s
ubuntu@server:~$ nvm use node
Now using node v0.12.1
ubuntu@server:~$ time node countlines.js logs 
7342500

real    0m2.559s
user    0m2.200s
sys 0m0.340s
ubuntu@server:~$ nvm use iojs
Now using node iojs-v1.6.2
ubuntu@server:~$ time iojs countlines2.js logs 
7342500

real    0m1.363s
user    0m0.920s
sys 0m0.424s
ubuntu@server:~$ cat countlines.js 
var i;
var count = 0;
require('fs').createReadStream(process.argv[2])
  .on('data', function(chunk) {
    for (i=0; i < chunk.length; ++i)
      if (chunk[i] == 10) count++;
  })
  .on('end', function() {
    console.log(count);
  });
ubuntu@server:~$ cat countlines2.js 
var i;
var count = 0;
require('fs').createReadStream(process.argv[2])
  .on('data', function(chunk) {
    var index = -1;
    while((index = chunk.indexOf(10, index + 1)) > -1) count++
  })
  .on('end', function() {
    console.log(count);
  });
ubuntu@server:~$

Upvotes: 5

Andrey Sidorov

Reputation: 25466

solution without using wc:

var i;
var count = 0;
require('fs').createReadStream(process.argv[2])
  .on('data', function(chunk) {
    for (i=0; i < chunk.length; ++i)
      if (chunk[i] == 10) count++;
  })
  .on('end', function() {
    console.log(count);
  });

it's slower, but not that much you might expect - 0.6s for 140M+ file including node.js loading & startup time

>time node countlines.js video.mp4 
619643

real    0m0.614s
user    0m0.489s
sys 0m0.132s

>time wc -l video.mp4 
619643 video.mp4
real    0m0.133s
user    0m0.108s
sys 0m0.024s

>wc -c video.mp4
144681406  video.mp4

Upvotes: 48

Node.js: Count the number of lines in a file

Answers (11)

Related Questions