Reputation: 45
I have an issue using promises with Node.js. I am using cheerio and request-promise to do web scraping, and I want to write my results to a CSV file once all asynchronous functions have been executed, using the Promise.all(promises).then(...)
syntax. It was working fine but suddenly the program finishes without any error or rejects but without executing the then(...)
part (no file and no log neither). Here is my code:
const rp = require('request-promise');
const cheerio = require('cheerio');
const csv = require('fast-csv');
const fs = require('fs');
var results = [];
var promises = [];
function getResults(district, branch) {
for (let i = 65; i <= 90; i++) {
let letter = String.fromCharCode(i);
let generalOptions = {
uri: 'https://ocean.ac-lille.fr/publinet/resultats?previousValCri1=' + branch + '&previousValCri0=0' + district + '&previousIdBaseSession=pub_24&actionId=6&valCriLettre=' + letter,
transform: function (body) {
return cheerio.load(body);
}
};
promises.push(new Promise(function(resolve, reject) {
rp(generalOptions)
.then(($) => {
$('.tableauResultat').find('tr').each(function(i, elem) {
let children = $(elem).children('td');
let name = $(children[0]).text();
results.push({name: name, result: 42, branch: branch});
resolve();
});
})
.catch((err) => {
console.log(err);
//reject();
});
}
));
}
}
getResults(process.argv[2], process.argv[3]);
Promise.all(promises).then(() => {
console.log("Finished!");
var ws = fs.createWriteStream('results_bis.csv', {flag: 'w'});
csv.write(results).pipe(ws);
}).catch((err) => {
console.log(err);
});
Upvotes: 2
Views: 726
Reputation: 222498
results
array is usually an antipattern when being used with Promise.all
. It's expected that promises that are passed to Promise.all
return necessary results, so it's possible to access them as Promise.all(promises).then(results => { ... })
.
There is no need for callback-based processing, $('.tableauResultat').find('tr').each(...)
, it results in poor control flow. Since Cheerio provides jQuery-like API, results can be converted to array and processed in a way that is idiomatic to vanilla JavaScript.
The code above uses promise construction antipattern. There's no need for new Promise
if there's existing one rp(generalOptions)
. It contributes to the problem; Node.js exists when there's nothing scheduled for execution. Some of promises in promise
are pending without a chance for them to be resolved because neither resolve
nor reject
is called. This happens if each
callback is never triggered. The problem could be solved by moving resolve()
outside each
callback.
A more straightforward way to do this that leaves much less place for obscure problems and is easier to debug:
const promises = [];
function getResults(district, branch) {
for (let i = 65; i <= 90; i++) {
...
promises.push(
rp(generalOptions)
.then(($) => {
const trs = $('.tableauResultat').find('tr').toArray();
const results = trs.map(elem => {
let children = $(elem).children('td');
let name = $(children[0]).text();
return {name, result: 42, branch};
});
return results;
})
.catch((err) => {
console.log(err);
return null; // can be filtered out in Promise.all results
// or rethrow an error
})
);
}
}
getResults(...);
Promise.all(promises).then(nestedResults => {
const results = nestedResults.reduce((flatArr, arr) => flatArr.concat(arr), []);
console.log("Finished!");
var ws = fs.createWriteStream('results_bis.csv', {flag: 'w'});
csv.write(results).pipe(ws);
}).catch((err) => {
console.log(err);
});
Notice that errors from file streams are currently not handled. It's unlikely that Promise.all
catch
will ever be triggered, even if there are problems.
Upvotes: 1