Reputation: 9374
I'm using Cheerio each function to parse some URLS and save all the data into MongoDB. my problem is that cheerio each function as synchronous. and I don't know when the parsing is ended to start to do something else. so How to make these functions Asynchronous ?
request(URL, function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
var posturl = $('a',this).attr('href');
$('article').each(function(i, element){
parse_url(posturl, i);
});
}
});
here is my Parse URL function
function parse_url(url, i) {
request(url, function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
var title = $('article h1').text();
var postid = $('article').attr('id').substring(5);
var image = $('article img').attr('src');
var description = $('article p strong').text();
var json = { title : title, postid : postid, image : image, decription : description};
collection.insert(json,function (err, result) {
if (err) {
console.log(err);
} else {
}
});
}
});
}
Upvotes: 0
Views: 106
Reputation: 5265
Install the async-foreach
package with npm install async-foreach --save
. In your first request, change your $('articles').each
method to:
var forEach = require('async-foreach').forEach;
request(URL, function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
var posturl = $('a',this).attr('href');
forEach($('article').get(), function(article, index) {
this.async();
parse_url(article, index);
});
}
});
Now you still have to make your parse_url
function asynchronous as well, because it is currently blocking. To do that in Node, you use process.nextTick(fn)
, which is the equivalent of the browsers setTimeout(fn, 0)
but much more efficient.
function parse_url(url, i) {
process.nextTick(function () {
request(url, function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
var title = $('article h1').text();
var postid = $('article').attr('id').substring(5);
var image = $('article img').attr('src');
var description = $('article p strong').text();
var json = { title : title, postid : postid, image : image, decription : description};
collection.insert(json,function (err, result) {
if (err) {
console.log(err);
} else {
}
});
}
});
});
}
Hope ths solves your problem!
Upvotes: 2