L.T
L.T

Reputation: 15

CasperJS doesn't work with Amazon iframe

I'm running a little project that requires some of Amazon books's preview content (they can be either PNG images or html content).

For example, this book: https://www.amazon.com/gp/product/B00JNYEXCK/.

When clicking to the "Look inside" badge (img tag with id="sitbLogoImg"), a new frame appears, showing the preview content of this book. It has 2 version, printed preview (which are PNG images, these I can get a hold of) and kindle preview (which is iframe document).

I'm stuck with the iframe for kindle preview, which basically looks like this:

<div id="scrollElm-0" class="pageHtml">
  <div id="sitbReaderKindleSample">
    <iframe id="sitbReaderFrame">
      <html>
      <head></head>
      <body>
        <p>.......</p>
        <div>......</div>
        ....
      </body>
      </html>
    </iframe>
  </div>
</div>

Here's my CasperJS script:

var fs = require('fs');
var casper = require('casper').create({
  pageSettings: {
    loadPlugins: false,
    userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36'
  }
});
casper.options.viewportSize = {
  width: 1366,
  height: 768
};
casper.options.waitTimeout = 10000;

// use any cookies
var cookieFilename = "cookies.txt";
var data = fs.read(cookieFilename);
if (data) {
  phantom.cookies = JSON.parse(data);
}

casper.start('https://www.amazon.com/gp/product/B00JNYEXCK/', function() {
  this.echo(this.status(true));
  this.captureSelector('before.png', 'html');
});
casper.waitForSelector('img#sitbLogoImg', function() {
  //this.captureSelector('before.png','html');
});
casper.then(function() {
  this.click('img#sitbLogoImg');
});
casper.waitForSelector('div#sitbLBHeader', function() {

});
var lis_content = '';
casper.wait(3000, function() {
  this.captureSelector('after.png', 'html');
});
casper.withFrame(1, function() {
  lis_content = this.getHTML();
  this.captureSelector('lis_content.png', 'html');
});

//Write the sitbReaderFrame to file
casper.then(function() {
  var lis_content_filename = 'lis_content.html';
  fs.write(lis_content_filename, lis_content, 644);
});

// write the cookies
casper.wait(1000, function() {
  var cookies = JSON.stringify(phantom.cookies);
  fs.write(cookieFilename, cookies, 644);
});
casper.run();

The problem is the iframe only has id="sitbReaderFrame" but no name, I've tried casperjs.withFrame with frame index number from 0 to 4 but it doesn't seems to exits in CapserJS view.

I would like to hear any advice from you, as I'm really stuck here. Thank you very much and sorry for my bad English.

Upvotes: 0

Views: 383

Answers (1)

user4535610
user4535610

Reputation:

CasperJS script:

function on_init (page){
var width='1600',height='900';

page.viewportSize = {width:width,height:height}
page.evaluate(function (width,height){
screen = {width:width,height:height,availWidth:width,availHeight:height};
innerWidth=width;  innerHeight=height;   outerWidth=width;  outerHeight=height;
window.navigator = {
plugins: {length: 2, 'Shockwave Flash': {name: 'Shockwave Flash', filename: '/usr/lib/flashplugin-nonfree/libflashplayer.so', description: 'Shockwave Flash 11.2 r202', version: '11.2.202.440'}},
mimeTypes: {length: 2, "application/x-shockwave-flash": {description: "Shockwave Flash", suffixes: "swf", type: "application/x-shockwave-flash", enabledPlugin: {name: 'Shockwave Flash', filename: '/usr/lib/flashplugin-nonfree/libflashplayer.so', description: 'Shockwave Flash 11.2 r202', version: '11.2.202.440'}}},
appCodeName: "Mozilla",
appName: "Netscape",
appVersion: "5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36",
cookieEnabled: 1,
languages: "en-US,en",
language: "en",
onLine: 1,
doNotTrack: null,
platform: "Linux x86_64",
product: "Gecko",
vendor: "Google Inc.",
vendorSub: "",
productSub: 20030107,
userAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36",
geolocation: {getCurrentPosition: function getCurrentPosition(){},watchPosition: function watchPosition(){},clearWatch: function clearWatch(){}},
javaEnabled: function javaEnabled(){return 0} };},width,height);};

var casper = require('casper').create({
    verbose: true,
    logLevel: 'debug',
    waitTimeout: 5000,
    userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36'
}), fs = require('fs');

casper
.on("error", function(msg){ this.echo("error: " + msg, "ERROR") })
.on("page.error", function(msg, trace){ this.echo("Page Error: " + msg, "ERROR") })
.on("remote.message", function(msg){ this.echo("Info: " + msg, "INFO") })
.on('page.initialized', on_init)

    .start("https://www.amazon.com/gp/product/B00JNYEXCK/", function(){
    this.click('#ebooksSitbLogoImg');
    this
    .capture('lis.png')
    .wait(3000,function(){
    var index =this.evaluate(function(){var i,x=document.querySelectorAll('iframe'),r;
    for(i=0;i<x.length;i++){if(x[i].id=="sitbReaderFrame"){r=i+1}}return r;});
    this
    .echo("The index is: "+index,"INFO")
    .capture('lis_content.png')
    .withFrame(index,function(){
    fs.write('lis_content.html', this.getHTML(), 644);
    })
})
})
      .run();
You need to use the --cookies-file option, to avoid blocking.

./casperjs --cookies-file=./cookies_1.txt casis.js >/dev/stdout

If will print:

error: CasperError: Cannot dispatch mousedown event on nonexistent selector: #ebooksSitbLogoImg

Can't avoid blocking in anyway.

In that case
Try again after reconnecting to the internet and getting new IP address.

Upvotes: 1

Related Questions