Reputation: 18538
I successfully implemented pdf2json to fetch and read pdf from url using node.
However, Azure function is an async function and finishes execution before pdfPipe.on("pdfParser_dataReady", pdf => {}) is executed.
My implementation is as follows
var request = require('request');
var PDFParser = require("pdf2json");
var pdfParser = new PDFParser(this, 1);
var pdfPipe = request({ url: pdfUrl, encoding: null }).pipe(pdfParser);
pdfPipe.on("pdfParser_dataError", err => console.error(err));
pdfPipe.on("pdfParser_dataReady", pdf => {
/// hndle pdfData
});
This works well in node on my machine. But I need to convert pdfPipe.on('---', function(){}) to async and await as the azure function does not wait for pdfPipe to finish.
How to do that?
Upvotes: 2
Views: 601
Reputation: 18538
Solved Using following approach
async function streamToPdf(readableStream) {
return new Promise((resolve, reject) => {
readableStream.on("pdfParser_dataReady", (pdf) => {
resolve(pdf);
});
readableStream.on("pdfParser_dataError", reject);
});
}
and then
var pdfPipe = request({ url: pdfUrl, encoding: null }).pipe(pdfParser);
var pdf = await streamToPdf(pdfPipe);
Upvotes: 1