Reputation: 45
I want to spider some links and after all tasks done,i want to do something else.
How to track tasks done is hard for me.Hope someone can help.
here is my code:
var urlList=[];
//Ready file lines
lineReader.eachLine('url.txt', function(line) {
console.log('url is :'+line);
urlList.push(line);
}).then(function(){//After Read,begin to proceed each line
console.log('read done!begin collect');
async.each(urlList,function(line){
console.log('begin line :'+line);
//down each url
download(line,function(data,cb){
var $=cheerio.load(data);//load cheerio
var title=$('head>title').text();//get title
console.log('title is '+title);
});
});
//At here i want to track whether all urls has been download,and i can do something else
if(/* allproceed */)
{
console.log('Task all done!Begin Next');
}
});
function download(url, callback) {
http.get(url, function(res) {
var data = "";
res.on('data', function (chunk) {
data += chunk;
});
res.on("end", function() {
callback(data);
});
}).on("error", function(e) {
console.log("Got error: " + e.message);
callback(null);
});
}
Hope someone can help me.
Thanks very much.
Upvotes: 0
Views: 371
Reputation: 1166
I've made some fixes to your code, see the result below:
var urlList=[];
//Ready file lines
lineReader.eachLine('url.txt', function(line) {
console.log('url is :'+line);
urlList.push(line);
}).then(function(){//After Read,begin to proceed each line
console.log('read done!begin collect');
async.each(urlList,function(line, callback){
console.log('begin line :'+line);
//down each url
download(line,function(err, data){
if (err) {
return callback(err);
}
var $=cheerio.load(data);//load cheerio
var title=$('head>title').text();//get title
console.log('title is '+title);
callback(null, title);
});
}, function continueHere(err) {
//At here i want to track whether all urls has been download,and i can do something else
console.log('Task all done!Begin Next');
});
});
function download(url, callback) {
http.get(url, function(res) {
var data = "";
res.on('data', function (chunk) {
data += chunk;
});
res.on("end", function() {
callback(null, data);
});
}).on("error", function(e) {
console.log("Got error: " + e.message);
callback(e);
});
}
Some things to pay special attention to:
You were already very close to your answer. async.each()
is a tool you can use to get the job done, but you were not using correctly yet. The iterator function you pass it, the one that gets called for each item in urlList
, takes a callback that you can call if the job for that iteration is done. I added that callback.
async.each()
also takes a third argument: The function that gets called when all tasks have completed. In this function you can put the code that continues the rest of your application.
With regards to using callback: a pattern repeated across node.js is that the first argument passed to a callback is always an error, if one exists. If not, that argument is undefined
or null
. The actual result is passed as the second argument. It's a good idea to follow this pattern. async
, for instance, expects you to obey it. If any of the tasks in the async.each()
fail (by passing a non-null value as the first argument to the callback), async
consideres the entire series failed, and passes that error on to the series callback (in the code above the function continueHere
).
One last thing. Though the code above should work, it is mixing promises (signified by the .then()
statement) with callbacks. These are two different ways to manage asynchronous code. Though you're free to mix them up if you want, for readability of the code it might help to pick one patter and stick with it ;).
Upvotes: 1