Reputation: 4486
I have the following code:
function getLinks() {
var links = document.querySelectorAll('a.row-link');
return Array.prototype.map.call(links, function(e) {
return e.getAttribute('href');
});
}
casper.start('http://somedomain.com/board/search/search/?p=' + pagee);
// Get all the pages here!
casper.then(function(){
while(pagenos = nextpagereg.exec(this.getHTML())) {;
this.echo(pagenos);
pages.push(pagenos);
}
this.echo(pages.length + ' PAGES FOUND');
return pages;
});
casper.then(function() {
pagee = 0;
links = this.evaluate(getLinks);
this.echo(links.length + ' links found:');
this.each(pages, function(self,page){
pagee++;
this.echo('WORKING ON PAGE' + pagee);
self.thenOpen('http://somedomain.com/board/search/search/?p=' + pagee, function(self){
this.each(links, function(self, link){
self.thenOpen(link, function(self, a){
title = this.getHTML('h2#job-title').trim();
if(casper.exists('p#job-subtitle a')){
company = this.getHTML('p#job-subtitle a');
} else {
title = "NA";
}
loc = this.getHTML('p#job-subtitle>strong');
email = regex.exec(this.getHTML());
this.echo("Title : " + title);
this.echo("Company : " + company);
this.echo("Location : " + loc);
this.echo("Email : " + email);
this.echo("************************************************************************************************************");
});
//this.echo(link);
});
});
});
});
casper.run(function() {
this.exit();
});
The problems seems to be that it doesn't run through the pages in order, the outside loop just crashes through all the page links!
EDIT
To clarify this is the output, as you can see the loop shoots through all the page increments before its processed each page:
http://somedomain.com/job-board/search/@/?p=2
http://somedomain.com/job-board/search/search/?p=3
http://somedomain.com/job-board/search/search/?p=4
http://somedomain.com/job-board/search/search/?p=5
http://somedomain.com/job-board/search/search/?p=6
5 PAGES FOUND
50 links found in page 1:
LOOP START
WORKING ON PAGE1
WORKING ON PAGE2
WORKING ON PAGE3
WORKING ON PAGE4
WORKING ON PAGE5
Upvotes: 0
Views: 348
Reputation: 61892
It looks like you need to scrape the links on each of the five pages, but you only do this once before you start iterating.
You should add
links = this.evaluate(getLinks);
this.echo(links.length + ' links found:');
into self.thenOpen('http://somedomain.com/board/search/search/?p=' + pagee, ...
before the each
.
A cleaner way would be to move the getLinks
into self.thenOpen
and exchange the top-most
this.each(pages, function(self,page){
...
});
with
for(var page = 0; page < pages.length; page++){
...
}
Upvotes: 1