Reputation: 98
Here I am trying to loop array of URLS and get the response from one set of URLs and process the response URLs . In code I want the outer loop should enter after completing all the inner request and want the result like below.
Checking Urls in : https://stackoverflow.com status 200 done .... .... Checking Urls in : https://example.com ..... ..... .....
Total links #20
But in my code outer loop is finishing before the request done.
const getHrefs = require('get-hrefs');
const async = require("async");
var req = require('request-promise');
var errors = require('request-promise/errors');
var pageUrls = ['https://stackoverflow.com','https://www.exsample.com'];
testUrls='';
async.map(pageUrls, function(pageUrl,callback){
//process itemA
req(pageUrl, function (err, response, body) {
console.log(pageUrl, " STATUS: ", response.statusCode);
if ( err){
return callback(err);
}
else {
testUrls= getHrefs(response.body);
async.map(testUrls, function(testUrl,callback1){
linkCount++;
req(testUrl).catch(errors.StatusCodeError, function (reason) {
brokenLinks++;
console.log("URL: "+ testUrl+ "reason: "+ reason.statusCode);
})
.catch(errors.RequestError, function (reason) {
}).finally(function () {
});
return callback1();
},function(err){
callback();
}) ;
}
})
} ,function(err){
console.log("OuterLoopFinished");
console.log('*************************************************************' + '\n');
console.log('Check complete! || Total Links: ' + linkCount + ' || Broken Links: ' + brokenLinks);
console.log('*************************************************************');
});
Upvotes: 1
Views: 1381
Reputation: 7282
I think you should relook at your approach. This makes 400 URLs. You should fire all request in parallel (for sublinks) and then you can track the count of broken URIs from host URL. This will complete your script faster.
const pageUrls = ['https://stackoverflow.com','https://www.google.com'];
const rp = require('request-promise');
const allRequestPromises = [];
const getHrefs = require('get-hrefs');
const checkBrokenCount = (url, host) => {
rp(url).then((response) => {
console.log('valid url', url, host);
// other code
})
.catch((error) => {
console.log('invalid url', url, host);
});
}
pageUrls.forEach((pageUrl)=> {
// Lets call all the base URLs in parallel asuming there are not incorrect.
allRequestPromises.push(rp({uri: pageUrl, resolveWithFullResponse: true}));
});
Promise.all(allRequestPromises).then((responses) => {
responses.forEach((response, index) => {
// Promise.all guarantees the order of result.
console.log(pageUrls[index], response.statusCode);
const testUrls= getHrefs(response.body);
testUrls.forEach((testUrl) => {
checkBrokenCount(testUrl, pageUrls[index]);
});
});
});
Upvotes: 1