Psymon25
Psymon25

Reputation: 356

Node express async file read

I have the below code which is reading a large XML file approx 600mb in size and returns the following output:

Total Number of Lines in File: 12077214
Read in 3774.015ms

CODE:

var express = require("express");
var router = express.Router();
var fs = require("fs");
var es = require("event-stream");
var now = require("performance-now");

router.get("/testapi", function (req, res, next) {
  var totalLines = 0;
  var t0 = now();
  var t1;

  fs.createReadStream("./large_xml_test.xml")
    .pipe(es.split())
    .pipe(
      es
        .mapSync(function (line) {
          if (line != "") {
            totalLines++;
          }
        })
        .on("error", function (err) {
          console.log("Error while reading file.", err);
        })
        .on("end", function () {
          console.log("Read entire file.");
          t1 = now();
          console.log("Total Number of lines: ", totalLines);

          console.log(
            `Performance now line count timing: ` + (t1 - t0).toFixed(3) + `ms`
          );
          res.send(
            `Total Number of Lines in File: ${totalLines.toString()}<br/>Read in ${(
              t1 - t0
            ).toFixed(3)}ms`
          );
        })
    );
});

module.exports = router;

I have tried moving this to a separate function with using the async and awaits keyword but the results are not displayed when using res.send ie the API is called but a 200 is returned immediately and the await was not happening?

it seems the console log functions fire in vs code only but that's it, is there a way to have this function moved to a separate file and have the res.send await the return value?

Thanks just learning nodejs currently.

=================== EDIT ===================

In answer to Keiths statement this was the last item I tested but failed with:

FILE 1:

var express = require("express");
var router = express.Router();
const test = require("../functions/readFileEventStream");

router.get("/testapi", function (req, res, next) {
  let message = "API is working properly";
  const results = (async function () {
    await test.readLargeFile();
  })();
  res.send(results);
});

module.exports = router;

File 2

var fs = require("fs");
var es = require("event-stream");
var now = require("performance-now");

var totalLines = 0;
var t0 = now();
var t1;

async function readLargeFile() {
  fs.createReadStream("./large_xml_test.xml")
    .pipe(es.split())
    .pipe(
      es
        .mapSync(function (line) {
          if (line != "") {
            totalLines++;
          }
        })
        .on("error", function (err) {
          console.log("Error while reading file.", err);
        })
        .on("end", function () {
          t1 = now();
          var msg = `Total Number of Lines in File: ${totalLines.toString()},\r\nRead in ${(
            t1 - t0
          ).toFixed(3)}ms`;
          return msg;
        })
    );
}

module.exports = {
  readLargeFile,
};

Upvotes: 1

Views: 309

Answers (2)

Eduard Hasanaj
Eduard Hasanaj

Reputation: 895

The logic that reads the file can be encapsulated into a Promise. Why a Promise? The Promise is supported by await mechanism so it can be awaited in an async function. Usually in a Promise is placed code that is not synchronous so the mapSync is changed to map so the file can be processed asynchronously.

router.get("/testapi", async function (req, res) {
    try {
        let t0 = now();
        let totalLines = await readFile('./large_xml_test.xml');
        console.log("Read entire file.");
        let t1 = now();
        console.log("Total Number of lines: ", totalLines);

        console.log(
            `Performance now line count timing: ` + (t1 - t0).toFixed(3) + `ms`
        );
                        
        res.send(
            `Total Number of Lines in File: ${totalLines.toString()}<br/>Read in ${(
                t1 - t0
            ).toFixed(3)}ms`
        );
    }
    catch(err) {
        console.log('error reading file ' + err);
        res.status(500).send('failed to read file');
    }
});

function readFile(path) {
    return new Promise(function (resolve, reject) {
        let totalLines = 0;
        fs.createReadStream(path)
            .pipe(es.split())
            .pipe(
                es.map(function (line, cb) {
                    if (line != "") {
                        totalLines++;
                    }

                    cb(null);
                })
                    .on("error", function (err) {
                        reject(err);
                    })
                    .on("end", function () {
                        
                        resolve(totalLines);
                    })
            );
    })
}

Upvotes: 2

O. Jones
O. Jones

Reputation: 108851

The thing to keep in mind here is that your XML reader and your route handler are both event-driven functions. In your example you call res.send() from inside your on("end"...) event handler. That's why things wait the way you want.

The easiest way to split up the XML reader and the route handler is to make them both async. This code is examples, and is not debugged.

Route handler

router.get("/testapi", async function (req, res, next) {

  var t0 = now();
  var t1;

  var totalLines = await getXML ("./large_xml_test.xml"); 

  console.log("Read entire file.");
  t1 = now();
  console.log( whatever );
  res.send( whatever );
} )

Your xml reader module

var fs = require("fs");
var es = require("event-stream");

async function getXML (file)
  let totalLines = 0
  fs.createReadStream(file)
    .pipe(es.split())
    .pipe(
      es
        .mapSync(function (line) {
          if (line != "") {
            totalLines++;
          }
        })
        .on("error", function (err) {
          throw new Error ("Error while reading file.", err);
        })
        .on("end", function () {
           return totalLines;
        })
     );
});

module.exports = getXML;

The trick is this. async getXML doesn't return until it completes and hits its on("end"...) event handler. And so your route handler doesn't call res.send()` until your xml loading operation is done.

Upvotes: 1

Related Questions