Learner
Learner

Reputation: 98

Node. js Nested async map with multiple http request

Here I am trying to loop array of URLS and get the response from one set of URLs and process the response URLs . In code I want the outer loop should enter after completing all the inner request and want the result like below.

Checking Urls in : https://stackoverflow.com status 200 done .... .... Checking Urls in : https://example.com ..... ..... .....

Total links #20

But in my code outer loop is finishing before the request done.

const getHrefs = require('get-hrefs');
const async = require("async");
var req = require('request-promise');
var errors = require('request-promise/errors');

var pageUrls = ['https://stackoverflow.com','https://www.exsample.com'];
testUrls='';

async.map(pageUrls, function(pageUrl,callback){
    //process itemA
    req(pageUrl, function (err, response, body) {
        console.log(pageUrl, " STATUS: ", response.statusCode);
        if ( err){
            return callback(err);
        } 
        else {
        testUrls= getHrefs(response.body);

        async.map(testUrls, function(testUrl,callback1){
             linkCount++;
               req(testUrl).catch(errors.StatusCodeError, function (reason) {
                        brokenLinks++;
                        console.log("URL: "+ testUrl+ "reason: "+ reason.statusCode);
                    })
                    .catch(errors.RequestError, function (reason) {

                    }).finally(function () {


                    });

                return  callback1();
             },function(err){

                 callback();

              }) ;
        }
    })

} ,function(err){
    console.log("OuterLoopFinished");
    console.log('*************************************************************' + '\n');
    console.log('Check complete! || Total Links: ' + linkCount + ' || Broken Links: ' + brokenLinks);
    console.log('*************************************************************');

});

Upvotes: 1

Views: 1381

Answers (1)

AbhinavD
AbhinavD

Reputation: 7282

I think you should relook at your approach. This makes 400 URLs. You should fire all request in parallel (for sublinks) and then you can track the count of broken URIs from host URL. This will complete your script faster.

const pageUrls = ['https://stackoverflow.com','https://www.google.com'];
const rp = require('request-promise');
const allRequestPromises = [];
const getHrefs = require('get-hrefs');

const checkBrokenCount = (url, host) => {
  rp(url).then((response) => {
    console.log('valid url', url, host);
    // other code
  })
  .catch((error) => {
    console.log('invalid url', url, host);
  });
}

pageUrls.forEach((pageUrl)=> {
  // Lets call all the base URLs in parallel asuming there are not incorrect.
  allRequestPromises.push(rp({uri: pageUrl, resolveWithFullResponse: true}));
});
Promise.all(allRequestPromises).then((responses) => {
  responses.forEach((response, index) => {
    // Promise.all guarantees the order of result.
    console.log(pageUrls[index], response.statusCode);
    const testUrls= getHrefs(response.body);
    testUrls.forEach((testUrl) => {
      checkBrokenCount(testUrl, pageUrls[index]);
    });
  });
});

Upvotes: 1

Related Questions