Reputation: 6789
I'm pulling text from N
urls. First I get the N urls in linksOnPage
and then i run a doOnPage
function to get the text from each url. When i run code only 1 of the N urls gets processed through the function. I assume it's because the processing function is running asynchronously. How do I stack these up in a queue and run them all/ whats a better way to do this?
Here's the main JS code:
var nodeio, linksOnPage, lyricsFromLink, db;
nodeio = require('node.io');
db = require('./db');
db.loadDB();
var loadSong = function(artist, title, lyrics){
console.log("loadSong being called");
var newSongObj = {};
newSongObj['artist'] = artist;
newSongObj['title'] = title;
newSongObj['lyrics'] = lyrics;
//store the lyrics in a mongo table
var newSong = new db.Song(newSongObj);
newSong.save(function(err) {
if(err){
throw err;
} else{
console.log("saved with no errors!");
}
});
};
// generic utility for getting links on a page and running a function on each one
exports.linksOnPage = function(pageObj, linkSelector, doOnPage, contentSelector) {
nodeio.scrape(function(){
this.getHtml(pageObj.pageUrl, function(err, $) {
var links = [];
var i = 0;
$(linkSelector).each(function(link) {
var fullLink = pageObj.rootUrl + link.attribs.href
links.push(fullLink);
//run a function on each link
console.log('getting lyrics for song: ', i);
doOnPage(pageObj.artist, fullLink, contentSelector);
i = i+1;
});
//this.emit(links);
});
});
}
// get the lyrics for a specific song
exports.lyricsFromLink = function(artist, pageUrl, lyricsSelector) {
nodeio.scrape(function(){
this.getHtml(pageUrl, function(err, $) {
var lyrics = "";
console.log('before each statement');
$(lyricsSelector).each(function(lyricParagraph) {
lyrics = lyrics + " " + lyricParagraph.text;
});
console.log('after each statement');
loadSong(artist, pageUrl, lyrics);
this.emit(lyrics)
});
});
}
Upvotes: 0
Views: 81