Reputation: 252
I need to parse 10 webpages, and catch their main contents. So I'm using node-readability and don't want to rewrite same functions(only url changes) 10 times. Finally I've to count up content lengths. How I can do it using loop or any other ideas? Now it looks like:
for(var i=0; i<catchedUrl.length; i++){
var data = {length: 0, count: 0};
(function(i) {
read(catchedUrl[i], function(err, article, meta){
if(err) throw err;
var content = article.content;
content = content.split(' ');
article.close();
data.count += 1;
data.length += length;
// Send data to callback when functions done
});
})(i);
}
Upvotes: 2
Views: 430
Reputation: 550
Egor's answer works great.
You could also make use of co
to get rid of asynchronicity:
$ npm i --save co thunkify
var co = require('co');
var read = require('node-readability');
var thunkify = require('thunkify');
var cachedUrls = [
'http://stackoverflow.com/questions/34414539/elasticsearch-filtering-mulitple-documents-with-same-term',
'http://stackoverflow.com/questions/34414537/selecting-multiple-values-with-multiple-where-clauses',
'http://stackoverflow.com/questions/34414536/how-to-create-functional-test-directory-in-grails',
'http://stackoverflow.com/questions/34414534/azure-active-directory-application-key-renewal',
'http://stackoverflow.com/questions/34414532/store-facebook-credential-in-android-for-google-smart-lock-password',
'http://stackoverflow.com/questions/34414531/ssis-read-flat-file-skip-first-row',
'http://stackoverflow.com/questions/34414529/set-non-database-attribute-for-rails-model-without-attr-accessor',
'http://stackoverflow.com/questions/34414525/excel-code-blocking-other-excel-sheets-to-open',
'http://stackoverflow.com/questions/34414522/app-crash-when-network-connection-gone',
'http://stackoverflow.com/questions/34414520/nest-input-inside-label-with-simple-form-and-rails-4'
];
co(function *() {
var data = {
length: 0,
count: 0
};
for (var i = 0, n = cachedUrls.length; i < n; i++) {
let response = yield thunkify(read)(cachedUrls[i]);
data.length += response['0'].content.split(' ').length;
data.count++;
}
return data;
}).then(function(value) {
console.log('final value:', value);
});
Upvotes: 2
Reputation: 1536
You can use async
module to simplify looping. Also please take a look to .bind()
function bind documentation
So code sample to such case may look something like that
var async = require('async');
function step(number, callback) {
[enter code here]
callback();
}
module.exports = (job, done) => {
var _pages = [URLS];
async.eachSeries(_pages, (link, callback)=> {
step(link, callback);
}, ()=> done());
});
};
Best regards, Egor
Upvotes: 3