Johnny Ha
Johnny Ha

Reputation: 633

Nodejs request loop limit

This is my first nodejs application. I am trying to do a cronjob with fetching some external API and do some calculation of the response.

Everything works fine until the curlList is to big. With 10 items in curlList it's okey, but I have a big big curlList more than 90 items.

What is the best way to do this implementation.

Thanks for helping.

Best regards, Johnny

var request = require('request');

var curlList = [{
    id: 1,
    href: '/name1'
}, {
    id: 2,
    href: '/name2'
}, {
    id: 3,
    href: '/name3'
}];

var curl = function(id, url) {
    var payload = {
        id: id
    };

    var options = {
        method: 'post',
        body: payload,
        json: true,
        url: ""
    }

    request(options, function(err, res, body) {
        if (err) {
            console.log(err, 'error posting json')
            return
        }
        //Calculate response data

        //If match 
        if (match) {
            console.log(url);
        }
    });
};

app.listen(3000, function() {

    for (var i = 0; i < curlList.length; i++) {
        var href = list[i].href;
        var id = list[i].id;
        curl(id, href);
    }

});

Upvotes: 0

Views: 1944

Answers (2)

Teemu Ikonen
Teemu Ikonen

Reputation: 11929

Modify your code to do something like this;

var request = require('request');

var curlList = [{
    id: 1,
    href: '/name1'
}, {
    id: 2,
    href: '/name2'
}, {
    id: 3,
    href: '/name3'
}];

var curl = function(id, url, done) {
    var payload = {
        id: id
    };

    var options = {
        method: 'post',
        body: payload,
        json: true,
        url: ""
    }

    request(options, function(err, res, body) {
        done(err);

        if (err) {
            console.log(err, 'error posting json')
            return
        }
        //Calculate response data

        //If match
        if (match) {
            console.log(url);
        }
    });
};

app.listen(3000, function() {
    int current = 1;
    int max = 5; // max 5 parallel
    var scheduleJobs = function() {
        current--;
        while(current < max) {
            current++;
            var job = curList.shift();
            curl(job.id, job.href, scheduleJobs);
        }
    }
    scheduleJobs();
});

This allows max 5 parallel requests.

Upvotes: 1

Robert Rossmann
Robert Rossmann

Reputation: 12131

Your for loop starts all the defined requests in a matter of few microseconds - websites usually detect such obtrusive behaviour as an attempt to overload the server (a DoS attack). It is also not a good idea to do this due to your own hardware / network limitations - if you needed to issue 1000 requests and each response would have 1 MB, you suddenly need to download 1 GB of response data. What's worse, your network might get so overloaded that some requests will simply time out.

You need to add some kind of throttling to limit the amount of requests being made at any given time to some reasonable amount. I personally recommend the async.js library, particuarly its eachLimit() utility.

Upvotes: 1

Related Questions