Reputation: 1114
I have been having some issues with opening multiple webpages in phantomjs, I am first opening a website which contains a few links, which I want to open as well, and save a piece of text from each URL to my jobs_list which has many objects inside of it. And after all the URL's have been run, I want to exit phantomjs. But as it is right now it never exits, and I have trouble recieving data from second function.
var webPage = require('webpage');
var page = webPage.create();
var jobs_list = [];
page.open('url', function (status) {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
page.onConsoleMessage = function(msg) {
console.log(msg);
};
var list = page.evaluate(function() {
var jobs = [];
var job;
$('.test').each(function(){
$(this).find('span').each(function(){
var job_link = $(this).find('a');
var url = job_link.attr("href");
job = {title : job_link.text(), url : url, location : ""};
jobs.push(job);
})
});
return jobs;
});
var i = 0;
jobs_list = list;
next_page(i);
});
});
function next_page(i){
if (i <= (jobs_list.length-1)) {
var current_job = jobs_list[i];
var url = current_job.url;
page.open(url, function (status) {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function () {
var location = page.evaluate(function() {
var job_location;
$('.job-location').each(function(){
$(this).find('li').each(function(){
job_location = $(this).text();
})
})
console.log(job_location);
return job_location;
});
jobs_list[i].location = location;
if(i == (jobs_list.length-1)) {
phantom.exit(0);
}
});
});
console.log(i, current_job.title);
next_page(++i);
}
}
Upvotes: 0
Views: 857
Reputation: 61892
The problem is that the page.open
call is asynchronous. If you look closely to your next_page
function it can be shortened to this:
function next_page(i){
if (i <= (jobs_list.length-1)) {
var current_job = jobs_list[i];
var url = current_job.url;
page.open(url, function (status) {
...
});
console.log(i, current_job.title);
next_page(++i);
}
}
It means that next_page(++i);
is executed before page.open(url, ...)
even managed to load the first HTML content. This call leads to the next page.open(url, ...)
being executed immediately, thus overwriting the previous request. And you're never going to get any data this way.
You have to do two things:
next_page(++i);
call where the execution of one page is finishedI propose:
function next_page(i){
if (i <= (jobs_list.length-1)) {
var current_job = jobs_list[i];
var url = current_job.url;
page.open(url, function (status) {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function () {
var location = page.evaluate(function() {
var job_location;
$('.job-location').each(function(){
$(this).find('li').each(function(){
job_location = $(this).text();
})
})
console.log(job_location);
return job_location;
});
jobs_list[i].location = location;
console.log(i, current_job.title);
next_page(++i);
});
});
} else {
phantom.exit(0);
}
}
That's quite an old version of jQuery. Perhaps you want to load a newer version. If the page already has jQuery included, you will likely break the page by loading another jQuery into it. Don't load an additional jQuery version at all in this case.
Upvotes: 2