Reputation: 11446
I am using CasperJS through PhantomJS to slurp data from a site which has no API. I am following the documentation and whenever I add my last function which is
getTractorData()
All of a sudden I don't get data.
The first part of the script creates an array of links. The last part of the script reads this array of links, traverses all of the links and looks for these elements on the page
<td class="details">
<span class="opening-bid theme-color">
When the script finds these elements then puts these items into an array.
Here is my script:
var casper = require('casper').create({
debug:true,
logLevel: 'verbose',
pageSettings: {
loadImages: true,
loadPlugins: true,
userAgent: 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36'
}
});
var links = [];
var tractorData = [];
function getLinks() {
var links = document.querySelectorAll('tr.onelinelistrow a');
return Array.prototype.map.call(links, function(e) {
return "http://www.ggg.com"+e.getAttribute('href');
});
}
function getTractorData() {
var links = document.querySelectorAll('tr.details');
return Array.prototype.map.call(links, function(e) {
return e.getAttribute('span.opening-bid theme-color');
});
}
casper.start('http://www.ggg.com/list/list.aspx?bcatid=464&DidSearch=1&EID=1&LP=TH&FullText=aa23&ETID=1&Manu=APACHE&mdlx=Contains&DisplayExtraTHOSpecs=1&Cond=All&SO=2&btnSearch=Search', function() {
});
casper.thenOpen('http://www.ggg.com/list/list.aspx?pg=1&ETID=1&SO=2&Manu=APACHE&mdlx=contains&bcatid=464&Pref=1&Thumbs=1&scf=false', function() {
links = links.concat(this.evaluate(getLinks));
});
casper.thenOpen('http://www.ggg.com/list/list.aspx?pg=2&ETID=1&SO=2&Manu=APACHE&mdlx=contains&bcatid=464&Pref=1&Thumbs=1&scf=false', function() {
links = links.concat(this.evaluate(getLinks));
});
casper.thenOpen('http://www.ggg.com/list/list.aspx?pg=3&ETID=1&SO=2&Manu=APACHE&mdlx=contains&bcatid=464&Pref=1&Thumbs=1&scf=false', function() {
links = links.concat(this.evaluate(getLinks));
});
casper.then(function(){
this.each(links,function(self,link){
self.thenOpen(link,function(a){
tractorData = tractorData.concat(this.evaluate(getTractorData));
});
});
});
casper.run(function() {
this.echo(tractorData.length + ' data found:');
this.echo(' - ' + tractorData.join('\n - ')).exit();
});
Upvotes: 1
Views: 154
Reputation: 61892
The problem is e.getAttribute('span.opening-bid theme-color')
, because span.opening-bid theme-color
is not an attribute of the td.details. It's not even a valid CSS (sub-)selector, because there is a space where a dot should be.
I suspect that you want to want the content of each span element that is a child of the td.details elements. You can get the content of each child span like this:
var links = document.querySelectorAll('tr.details');
return Array.prototype.map.call(links, function(e) {
return e.querySelector('span.opening-bid.theme-color').textContent;
});
or even
var links = document.querySelectorAll('tr.details > span.opening-bid.theme-color');
return Array.prototype.map.call(links, function(e) {
return e.textContent;
});
Upvotes: 1