Bannerman
Bannerman

Reputation: 101

Asynchronous Request Within a ForEach in node.js

I am new to node.js (and to request.js). I'd like to get the body of a website back from a specific url with different paths (in the example below http://www.example.com/path1, http://www.example.com/path2, etc.) and log this data in an object with a key/value mapping (siteData[path] below).

var request = require('request'),
    paths = ['path1','path2','path3'],
    siteData = {},
    pathLength = paths.length,
    pathIndex = 0;

paths.forEach((path) => {
    var url="http://www.example.com/"+path;
    request(url, function(error, response, html){
        if(!error){
            siteData[path] = response.body;
            pathIndex++;
            if(pathIndex===pathLength){
                someFunction(siteData);
            }
        }
});

function someFunction(data){
    //manipulate data
}

My questions are:

Thanks for your help!

Upvotes: 1

Views: 8649

Answers (4)

SomeKittens
SomeKittens

Reputation: 39522

Looks like Promises are the right tool to get the job done here. Instead of a callback, we'll create a new Promise object that will resolve when the job is done. We can say "once you're done, do some more stuff" with the .then operator:

var rp = require('request-promise');

rp('http://www.google.com')
  .then((htmlString) => {
    // Process html... 
  });

(if anything goes wrong, the promise rejects and goes straight to .catch)

someFunctionThatErrors('Yikes!')
  .then((data) => {
    // won't be called
  })
.catch((err) => {
  // Will be called, we handle the error here
});

We've got lots of async tasks to do, so just one promise won't work. One option is to string them all together in series, like so:

rp('http://www.google.com')
  .then((htmlString) => rp('http://someOtherUrl.com'))
  .then((otherHtmlString) => {
    // and so forth...

But that loses some of the awesome of async - we can do all of these tasks in parallel.

var myRequests = [];
myRequests.push(rp('http://www.google.com').then(processStuff).catch(handleErr));
myRequests.push(rp('http://someOtherUrl.com').then(processStuff).catch(handleErr));

...boy does that look ugly. There's a better way with all of this - Promise.all() (You're using arrow functions so I assume native Promise will work for you too). It takes an array of promises and returns a promise that resolves when all of the array's promises have finished executing. (If any of them error, it immediately rejects). The .then function will be given an array representing the value each promise resolved to.

var myRequests = [];
myRequests.push(rp('http://www.google.com'));
myRequests.push(rp('http://someOtherUrl.com'));
Promise.all(myRequests)
  .then((arrayOfHtml) => {
    // arrayOfHtml[0] is the results from google,
    // arrayOfHtml[1] is the results from someOtherUrl
    // ...etc
    arrayOfHtml.forEach(processStuff);
  })
  .catch(/* handle error */);

Still, we have to manually call .push for every link we want to hit. That won't do! Let's pull a nifty trick using Array.prototype.map which will iterate over our array, manipulating each value in turn and return a new array comprised of the new values:

var arrayOfPromises = paths.map((path) => rp(`http://www.example.com/${path}`));
Promise.all(arrayOfPromises)
  .then((arrayOfHtml) => arrayOfHtml.forEach(processStuff))
  .catch(function (err) { console.log('agh!'); });

Much cleaner and easier error handling.

Upvotes: 10

dee.ronin
dee.ronin

Reputation: 1038

In my experience, you can't just use a forEach or any kind of loop when dealing with request module since it executes asynchronously and ends up with EventEmitter memory leak.

The way I solve this is by using a recursive function. You can refer on the code below:

var request = require('request'),
    paths = ['path1','path2','path3'],
    siteData = {};

function requestSiteData(paths) {
    if (paths.length) {
        var path = paths.shift();
        var url = "http://www.example.com/" + path;

        request(url, function(error, response, html) {
            if(!error) {
                siteData[path] = response.body;
            } //add else block if want to terminate when error occur

            //continue to process data even if error occur
            requestSiteData(paths); //call the same function
        });
    } else {
        someFunction(siteData); //all paths are requested
    }
}

function someFunction(data){
    //manipulate data
}

requestSiteData(paths); //start requesting data

Upvotes: 2

BBS
BBS

Reputation: 1409

I agree with the above solution that promises are probably the way to go in this instance; however, you can use callbacks to achieve the same as well.

The lodash library offers convenient ways of tracking how many asynchronous calls have been completed.

'use strict';

var _ = require('lodash');
var path = require('path');

var paths = ['a', 'b', 'c'];
var base = 'www.example.com';

var done = _.after(paths.length, completeAfterDone);

_.forEach(paths, function(part) {
    var url = path.join(base, part);
    asynchFunction(url, function() {
        done();
    });
});

function completeAfterDone() {
    console.log('Process Complete');
}

function asynchFunction(input, cb) {
    setTimeout(function() {
        console.log(input);
        cb();
    }, Math.random() * 5000);
};

With this method the done function will keep track of how many of the requests have finished and will call the final callback once each url is loaded.

Upvotes: 0

Chandan
Chandan

Reputation: 1138

Due to asynchronous nature of request method in nodejs, you can not directly know their responses and act on realtime. You have wait for the callback to arrive and then only you can call the next request method.

Here in this case, you are calling all the request methods in forEach loop meaning they are getting called one by one without waiting for the previous responses.

I would suggest to use wonderful async library for this purpose as below -

 var async = require('aysnc');
 var request = require('request'),
 paths = ['path1','path2','path3'],
 siteData = {},
 pathLength = paths.length,
 pathIndex = 0,
 count = 0;

async.whilst(
  function () { return count < pathLength; },
  function (callback) {
    // do your request call here 
    var path = paths[pathLength];
    var url="http://www.example.com/"+path;
  request(url, function(error, response, html){
    if(!error){
        siteData[path] = response.body;
         // call another request method
        count++;
        callback();
    }
   });
 },
 function (err) {
  // all the request calls are finished or an error occurred
  // manipulate data here 
  someFunction(siteData);
 }
);

Hope this helps.

Upvotes: 1

Related Questions