Matthew Colley
Matthew Colley

Reputation: 11446

CasperJS scraper can't retrieve content by selector

I am using CasperJS through PhantomJS to slurp data from a site which has no API. I am following the documentation and whenever I add my last function which is

getTractorData()

All of a sudden I don't get data.

The first part of the script creates an array of links. The last part of the script reads this array of links, traverses all of the links and looks for these elements on the page

<td class="details">
    <span class="opening-bid theme-color">

When the script finds these elements then puts these items into an array.

Here is my script:

 var casper = require('casper').create({
 debug:true,
 logLevel: 'verbose',
 pageSettings: {
 loadImages: true,
 loadPlugins: true,
 userAgent: 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36'
 }
 });

 var links = [];
 var tractorData = [];

 function getLinks() {
   var links = document.querySelectorAll('tr.onelinelistrow a');
   return Array.prototype.map.call(links, function(e) {
      return "http://www.ggg.com"+e.getAttribute('href');
 });
 }

 function getTractorData() {
  var links = document.querySelectorAll('tr.details');
  return Array.prototype.map.call(links, function(e) {
    return e.getAttribute('span.opening-bid theme-color');
 });
 }

 casper.start('http://www.ggg.com/list/list.aspx?bcatid=464&DidSearch=1&EID=1&LP=TH&FullText=aa23&ETID=1&Manu=APACHE&mdlx=Contains&DisplayExtraTHOSpecs=1&Cond=All&SO=2&btnSearch=Search', function() {
 });

 casper.thenOpen('http://www.ggg.com/list/list.aspx?pg=1&ETID=1&SO=2&Manu=APACHE&mdlx=contains&bcatid=464&Pref=1&Thumbs=1&scf=false', function() {
 links = links.concat(this.evaluate(getLinks));
 });

 casper.thenOpen('http://www.ggg.com/list/list.aspx?pg=2&ETID=1&SO=2&Manu=APACHE&mdlx=contains&bcatid=464&Pref=1&Thumbs=1&scf=false', function() {
 links = links.concat(this.evaluate(getLinks));
 });

 casper.thenOpen('http://www.ggg.com/list/list.aspx?pg=3&ETID=1&SO=2&Manu=APACHE&mdlx=contains&bcatid=464&Pref=1&Thumbs=1&scf=false', function() {
 links = links.concat(this.evaluate(getLinks));
 });

 casper.then(function(){
   this.each(links,function(self,link){
    self.thenOpen(link,function(a){
        tractorData = tractorData.concat(this.evaluate(getTractorData));
     });
  });
  });  

  casper.run(function() {
   this.echo(tractorData.length + ' data found:');
   this.echo(' - ' + tractorData.join('\n - ')).exit();
   });    

Upvotes: 1

Views: 154

Answers (1)

Artjom B.
Artjom B.

Reputation: 61892

The problem is e.getAttribute('span.opening-bid theme-color'), because span.opening-bid theme-color is not an attribute of the td.details. It's not even a valid CSS (sub-)selector, because there is a space where a dot should be.

I suspect that you want to want the content of each span element that is a child of the td.details elements. You can get the content of each child span like this:

var links = document.querySelectorAll('tr.details');
return Array.prototype.map.call(links, function(e) {
    return e.querySelector('span.opening-bid.theme-color').textContent;
});

or even

var links = document.querySelectorAll('tr.details > span.opening-bid.theme-color');
return Array.prototype.map.call(links, function(e) {
    return e.textContent;
});

Upvotes: 1

Related Questions