Ildar Akhmetzyanov
Ildar Akhmetzyanov

Reputation: 252

NodeJS. Async. Parallel. Same functions

I need to parse 10 webpages, and catch their main contents. So I'm using node-readability and don't want to rewrite same functions(only url changes) 10 times. Finally I've to count up content lengths. How I can do it using loop or any other ideas? Now it looks like:

for(var i=0; i<catchedUrl.length; i++){
    var data = {length: 0, count: 0};
    (function(i) {
        read(catchedUrl[i], function(err, article, meta){
            if(err) throw err;

            var content = article.content;
            content = content.split(' ');
            article.close();
            data.count += 1;
            data.length += length;
            // Send data to callback when functions done
        });
    })(i);
}

Upvotes: 2

Views: 430

Answers (2)

mr-wildcard
mr-wildcard

Reputation: 550

Egor's answer works great.

You could also make use of co to get rid of asynchronicity:

$ npm i --save co thunkify

var co = require('co');
var read = require('node-readability');
var thunkify = require('thunkify');

var cachedUrls = [
    'http://stackoverflow.com/questions/34414539/elasticsearch-filtering-mulitple-documents-with-same-term',
    'http://stackoverflow.com/questions/34414537/selecting-multiple-values-with-multiple-where-clauses',
    'http://stackoverflow.com/questions/34414536/how-to-create-functional-test-directory-in-grails',
    'http://stackoverflow.com/questions/34414534/azure-active-directory-application-key-renewal',
    'http://stackoverflow.com/questions/34414532/store-facebook-credential-in-android-for-google-smart-lock-password',
    'http://stackoverflow.com/questions/34414531/ssis-read-flat-file-skip-first-row',
    'http://stackoverflow.com/questions/34414529/set-non-database-attribute-for-rails-model-without-attr-accessor',
    'http://stackoverflow.com/questions/34414525/excel-code-blocking-other-excel-sheets-to-open',
    'http://stackoverflow.com/questions/34414522/app-crash-when-network-connection-gone',
    'http://stackoverflow.com/questions/34414520/nest-input-inside-label-with-simple-form-and-rails-4'
];

co(function *() {

    var data = { 
        length: 0, 
        count: 0
    };

    for (var i = 0, n = cachedUrls.length; i < n; i++) {

        let response = yield thunkify(read)(cachedUrls[i]);

        data.length += response['0'].content.split(' ').length;
        data.count++;       
    }

    return data;

}).then(function(value) {
    console.log('final value:', value);
});

Upvotes: 2

Egor Malkevich
Egor Malkevich

Reputation: 1536

You can use async module to simplify looping. Also please take a look to .bind() function bind documentation

So code sample to such case may look something like that

var async = require('async');

function step(number, callback) {
     [enter code here]
     callback();
}

module.exports = (job, done) => {
    var _pages = [URLS];
        async.eachSeries(_pages, (link, callback)=> {
            step(link, callback);
        }, ()=> done());
    });

};

Best regards, Egor

Upvotes: 3

Related Questions