Reputation: 101
I am new to node.js (and to request.js). I'd like to get the body of a website back from a specific url with different paths (in the example below http://www.example.com/path1, http://www.example.com/path2, etc.) and log this data in an object with a key/value mapping (siteData[path] below).
var request = require('request'),
paths = ['path1','path2','path3'],
siteData = {},
pathLength = paths.length,
pathIndex = 0;
paths.forEach((path) => {
var url="http://www.example.com/"+path;
request(url, function(error, response, html){
if(!error){
siteData[path] = response.body;
pathIndex++;
if(pathIndex===pathLength){
someFunction(siteData);
}
}
});
function someFunction(data){
//manipulate data
}
My questions are:
(node) warning: possible EventEmitter memory leak detected. 11 unpipe listeners added. Use emitter.setMaxListeners() to increase limit.
I tried chaining request(url, function(...){}).setMaxListeners(100);
but that didn't work.Thanks for your help!
Upvotes: 1
Views: 8649
Reputation: 39522
Looks like Promises are the right tool to get the job done here. Instead of a callback, we'll create a new Promise
object that will resolve when the job is done. We can say "once you're done, do some more stuff" with the .then
operator:
var rp = require('request-promise');
rp('http://www.google.com')
.then((htmlString) => {
// Process html...
});
(if anything goes wrong, the promise rejects and goes straight to .catch
)
someFunctionThatErrors('Yikes!')
.then((data) => {
// won't be called
})
.catch((err) => {
// Will be called, we handle the error here
});
We've got lots of async tasks to do, so just one promise won't work. One option is to string them all together in series, like so:
rp('http://www.google.com')
.then((htmlString) => rp('http://someOtherUrl.com'))
.then((otherHtmlString) => {
// and so forth...
But that loses some of the awesome of async - we can do all of these tasks in parallel.
var myRequests = [];
myRequests.push(rp('http://www.google.com').then(processStuff).catch(handleErr));
myRequests.push(rp('http://someOtherUrl.com').then(processStuff).catch(handleErr));
...boy does that look ugly. There's a better way with all of this - Promise.all()
(You're using arrow functions so I assume native Promise
will work for you too). It takes an array of promises and returns a promise that resolves when all of the array's promises have finished executing. (If any of them error, it immediately rejects). The .then
function will be given an array representing the value each promise resolved to.
var myRequests = [];
myRequests.push(rp('http://www.google.com'));
myRequests.push(rp('http://someOtherUrl.com'));
Promise.all(myRequests)
.then((arrayOfHtml) => {
// arrayOfHtml[0] is the results from google,
// arrayOfHtml[1] is the results from someOtherUrl
// ...etc
arrayOfHtml.forEach(processStuff);
})
.catch(/* handle error */);
Still, we have to manually call .push
for every link we want to hit. That won't do! Let's pull a nifty trick using Array.prototype.map
which will iterate over our array, manipulating each value in turn and return a new array comprised of the new values:
var arrayOfPromises = paths.map((path) => rp(`http://www.example.com/${path}`));
Promise.all(arrayOfPromises)
.then((arrayOfHtml) => arrayOfHtml.forEach(processStuff))
.catch(function (err) { console.log('agh!'); });
Much cleaner and easier error handling.
Upvotes: 10
Reputation: 1038
In my experience, you can't just use a forEach or any kind of loop when dealing with request module since it executes asynchronously and ends up with EventEmitter memory leak.
The way I solve this is by using a recursive function. You can refer on the code below:
var request = require('request'),
paths = ['path1','path2','path3'],
siteData = {};
function requestSiteData(paths) {
if (paths.length) {
var path = paths.shift();
var url = "http://www.example.com/" + path;
request(url, function(error, response, html) {
if(!error) {
siteData[path] = response.body;
} //add else block if want to terminate when error occur
//continue to process data even if error occur
requestSiteData(paths); //call the same function
});
} else {
someFunction(siteData); //all paths are requested
}
}
function someFunction(data){
//manipulate data
}
requestSiteData(paths); //start requesting data
Upvotes: 2
Reputation: 1409
I agree with the above solution that promises are probably the way to go in this instance; however, you can use callbacks to achieve the same as well.
The lodash library offers convenient ways of tracking how many asynchronous calls have been completed.
'use strict';
var _ = require('lodash');
var path = require('path');
var paths = ['a', 'b', 'c'];
var base = 'www.example.com';
var done = _.after(paths.length, completeAfterDone);
_.forEach(paths, function(part) {
var url = path.join(base, part);
asynchFunction(url, function() {
done();
});
});
function completeAfterDone() {
console.log('Process Complete');
}
function asynchFunction(input, cb) {
setTimeout(function() {
console.log(input);
cb();
}, Math.random() * 5000);
};
With this method the done function will keep track of how many of the requests have finished and will call the final callback once each url is loaded.
Upvotes: 0
Reputation: 1138
Due to asynchronous nature of request
method in nodejs, you can not directly know their responses and act on realtime. You have wait for the callback to arrive and then only you can call the next request
method.
Here in this case, you are calling all the request
methods in forEach
loop meaning they are getting called one by one without waiting for the previous responses.
I would suggest to use wonderful async
library for this purpose as below -
var async = require('aysnc');
var request = require('request'),
paths = ['path1','path2','path3'],
siteData = {},
pathLength = paths.length,
pathIndex = 0,
count = 0;
async.whilst(
function () { return count < pathLength; },
function (callback) {
// do your request call here
var path = paths[pathLength];
var url="http://www.example.com/"+path;
request(url, function(error, response, html){
if(!error){
siteData[path] = response.body;
// call another request method
count++;
callback();
}
});
},
function (err) {
// all the request calls are finished or an error occurred
// manipulate data here
someFunction(siteData);
}
);
Hope this helps.
Upvotes: 1