anxxxious
anxxxious

Reputation: 27

How to write this with Promises?

var pdfParser = require('pdf-parser')
var fs = require('fs')
var PDF_PATH = __dirname + '/pdfs'
var results = []
var failed = []

fs.readdir(PDF_PATH, function(err, files){
    if(err){
        return console.log(err)
    }
    for(const file of files){
        let the_ent = {
            'name': '',
            'other data': []
        }

        pdfParser.pdf2json(PDF_PATH + '/' + file, function(error, pdf){
            if(error != null){
                console.log(error)
            }else if(pdf['pages'] == undefined){
                failed.push(file)
                console.log(file +' failed')
            }else{
                //populate 'results' array
            }
                console.log(/*pdf_data*/)
                results.push(/*pdf_data*/)

        })
    }
    console.log(results)
    console.log(failed)
    results = JSON.stringify(results)
    //fs.writeFileSync() write results to json
})

I don't know what is wrong with me this morning, I can't work out how to write this in async; obviously the logs/writefile at the bottom fire as soon as the script executes.

I have tried wrapping in async functions and awaiting the readdir / pdf parsing instead of using callbacks - clearly not correctly. I'm just trying to parse every pdf in a folder - push what I want to some arrays and then log them once the loop finishes zzz.

Upvotes: 1

Views: 1400

Answers (3)

jfriend00
jfriend00

Reputation: 707238

I would promisify the async operations and use async/await. For the fs operations, use the new fs.promises API. For others, use util.promisify() to make promisified versions.

The resolved value of the parsePDFs function I create will be an array of JSON and an array of failed filenames so you get both pieces of information back:

const util = require('util');
const pdfParser = require('pdf-parser');
// make promisified version of the function
const pdfParser.pdf2jsonP = util.promisify(pdfParser.pdf2json);

const fsp = require('fs').promises;
const path = require('path');
const PDF_PATH = path.join(__dirname, 'pdfs');

async function parsePDFs(dir) {
    const files = await fsp.readdir(dir);
    const results = [];
    const failed = [];

    for (const file of files) {
        let the_ent = {
            'name': '',
            'other data': []
        }

        try {
            let pdf = await pdfParser.pdf2jsonP(path.join(dir, file));
            if (!pdf || pdf.pages === undefined) {
                throw new Error("pdf.pages is empty")
            }
            results.push(pdf);
        } catch(e){
            console.log(e);
            failed.push(file);
        }
    }
    // this will be the resolved value of the returned promise
    return {results, failed};
}

parsePDFs(PDF_PATH).then(data => {
    console.log("failed filenames: " data.failed);
    console.log("json results: ", data.results);
    // do something with data.results and data.failed
}).catch(err => {
    console.log(err);
});

Note: You declare, but never use the variable the_ent.

Upvotes: 2

Samuel Goldenbaum
Samuel Goldenbaum

Reputation: 18909

You can use util.promisify to promisify the sync functions:

const readdir = util.promisify(fs.readdir);
const reader = util.promisify(pdfParser.pdf2json);

Minimal demo:

const fs = require('fs');
const util = require('util');
var pdfParser = require('pdf-parser');

const readdir = util.promisify(fs.readdir);
const reader = util.promisify(pdfParser.pdf2json);

var PDF_PATH = __dirname + '/pdfs';

(async () => {
    async function processFiles() {
        let results = [];

        let files = await readdir(PDF_PATH);

        for (const file of files) {
            let pdf = await reader(PDF_PATH + '/' + file);
            results.push(pdf);
        }

        return results;
    }

    const result = await processFiles();

    console.info(result);
})();

Upvotes: 1

Jonas Wilms
Jonas Wilms

Reputation: 138257

Wrap the smallest asynchronous tasks into Promises, then use async/await to combine them:

   // the Promise wrapper:
   const parsePdf = file => new Promise((res, rej) => pdfParser.pdf2json(file, (err, r) => err ? rej(err) : res(r));

 (async function () { // start an asynchronous context
   const PDF_PATH = __dirname + '/pdfs'; 
   const results = [], failed = []; // prefer const over let

    // reading files in a promising way is already provided natively:
   const files = await fs.promises.readdir(PDF_PATH);

   for(const file of files){ // this is in series, in parallel would probably be faster
     let the_ent = {
         name: '',
         'other data': [], // avoid whitespaces!
     };

     const pdf = await parsePdf(PDF_PATH + '/' +file);

     if(pdf.pages === undefined) { // prefer equality (===) over comparison (==)
        failed.push(file);
        console.log(file + ' failed');
     } else {
        // populate 'results' array
     }             
  }

  console.log(results, failed);
})();

You can probably process the files in parallel too.

Upvotes: 2

Related Questions