oniramarf
oniramarf

Reputation: 903

Read text file row by row using stream

I need to read a text file row by row, perform some operations and write on an output file.

Since the file is pretty large (could be more than 100MB), streams are suggested to improve performances.

I have written the following function to read the file, but the size of the chunk is constant and so it won't always contain one row or a precise multiple of rows.

function loadLogFileInfo(inputFilename, outputFilename) {
    return new Promise((resolve, reject) => {
        let inputStream = fs.createReadStream(inputFilename, "utf8");
        let outputStream = fs.createWriteStream(outputFilename);

        inputStream.on("error", () => {
            outputStream.end();
            reject();
        });
        inputStream.on("end", () => {
            outputStream.end();
            resolve();
        });

        inputStream.on("data", (chunk) => {
            let data = "";
            // Compute some data on the row and put in data variable
            outputStream.write(data + "\n");
        });
    });
}

I have also found this question that answers the problem for binary files, but since in this case it's a text file, maybe there could be a simpler solution.

Upvotes: 0

Views: 762

Answers (2)

Jim Wright
Jim Wright

Reputation: 6058

You can use split (which is available on NPM) to handle this very nicely. You can see the code used on GitHub. The following example reads your input and writes it line by line to the output stream.

const split = require('split');

function loadLogFileInfo(inputFilename, outputFilename) {
  return new Promise((resolve, reject) => {
    let inputStream = fs.createReadStream(inputFilename, "utf8").pipe(split());
    let outputStream = fs.createWriteStream(outputFilename);

    inputStream.on("error", () => {
      outputStream.end();
      reject();
    });
    inputStream.on("end", () => {
      outputStream.end();
      resolve();
    });
    inputStream.on("data", (line) => {
      outputStream.write(line + "\n");
    });
  });
}

Upvotes: 1

oniramarf
oniramarf

Reputation: 903

Since there is still no answer or comment on this, I'm posting here my own solution inspired by the question mentioned in the question.

function loadLogFileInfo(inputFilename, outputFilename) {
  return new Promise((resolve, reject) => {
    let inputStream = fs.createReadStream(inputFilename, "utf8");
    let outputStream = fs.createWriteStream(outputFilename);
    let pending = "";

    inputStream.on("error", () => {
      outputStream.end();
      reject();
    });
    inputStream.on("end", () => {
      outputStream.end();
      resolve();
    });

    inputStream.on("data", (chunk) => {
      pending += chunk;
      let data = "";
      let idxRow = pending.indexOf("\n");

      while (idxRow !== -1) {
        let row = pending.slice(0, idxRow);
        pending= pending.slice(idxRow + 1);

        // compute data on  current row

        idxRow = pending.indexOf("\n");
      }

      outputStream.write(data + "\n");
    });
  });
}

Upvotes: 0

Related Questions