turrican_34
turrican_34

Reputation: 769

How to exit phantom if it hangs on page.open (with example)

 var system = require("system");
      var page;

      // user supplied url
      var myurl = system.args[1];
      // var myurl = 'https://waffles.ch/';

      page = require('webpage').create();

      // suppress errors from output
      page.onError = function(msg, trace) {};

      // 5 seconds
      page.settings.resourceTimeout = 5000;

      // page.settings.javascriptEnabled = false;

      page.open(myurl, function(status) {

          //hack for page.open not hooking into phantom.onError
          setTimeout(function() {
              if (status !== "success") {
                  console.log(myurl);
                  phantom.exit();
                  throw new Error("Unable to access network");
              } else {
                  var pageTitle = myurl.replace(/http.*\/\//g, "").replace("www.", "").split("/")[0];
                  var filePath = "img/" + pageTitle + '.jpg';
                  page.render(filePath, {format: 'jpeg', quality: '75'});
                  console.log(filePath);
                  phantom.exit();
              }

          }, 0);
      });

Using the above code to take screenshots works fine for most webpages. Running the script through the console or a web app with the url "https://waffles.ch/", however, causes it to hang infinitely on page.open (i believe).

The reason for this I assume is because this URL contains some JS animation which doesn't stop running (an aeroplane flying across the screen), and it causes Phantom to lock up. Is this a known bug??

I'm quite sure JS causes the hang because if I switch it off with page.settings.javascriptEnabled = false; then the screenshot for the page is rendered without any problems.

I can't realistically switch off javascript to take screenshots for obvious reasons (page.evaluate, redirects etc), so here's my 2 questions.

1.) Is there a way to render a screenshot for a webpage containing animation like waffles.ch without having to switch off javascript??

2.) If a webpage does hang, on page.open how can I exit phantom and possibly also return an errror??)

Any help/advice would be greatly appreciated.

Phantom version: 2.1.1 OS: Windows 7 64 bit.

Other thing I've tried.(but still hangs for above url)

with try/catch

var system = require("system");
var page;
// user supplied url
var myurl = system.args[1];

var page = require('webpage').create();
page.open(myurl, function (status) {
    try {
        if (status !== "success") {
            console.log("Unable to access network");
            phantom.exit();
        } else {
            //do some stuff with the DOM
            var pageTitle = myurl.replace(/http.*\/\//g, "").replace("www.", "").split("/")[0];
            var filePath = "img/" + pageTitle + '.jpg';
            page.render(filePath, {format: 'jpeg', quality: '75'});
            console.log(filePath);
            phantom.exit();
        }
    } catch (ex) {
        var fullMessage = "\nJAVASCRIPT EXCEPTION";
        fullMessage += "\nMESSAGE: " + ex.toString();
        for (var p in ex) {
            fullMessage += "\n" + p.toUpperCase() + ": " + ex[p];
        }
        console.log(fullMessage);
    }
});


// ******************************

Using waitfor() function. https://github.com/ariya/phantomjs/blob/master/examples/waitfor.js

var system = require("system");
var page;
// user supplied url
var myurl = system.args[1];

var page = require('webpage').create();

// suppress errors from output
page.onError = function(msg, trace) {
  console.log("Error occurred" + msg);
  phantom.exit();
};

// 5 seconds
page.settings.resourceTimeout = 5000;

page.open(myurl, function (status) {
    // Check for page load success
    if (status !== "success") {
        console.log("Unable to access network");
        phantom.exit();
    } else {
        waitFor(function() {
            // Check in the page if a specific element is now visible
            return page.evaluate(function() {

                return $("body").is(":visible");
            });
        }, function() {
           console.log("body is visible");
           phantom.exit();
        });
    }
});

Upvotes: 1

Views: 1540

Answers (1)

Cool Blue
Cool Blue

Reputation: 6476

Turns out there is no way to terminate phantom in this situation, at least not by itself but, there is a way to avoid the problem.

The root cause is that the implementation of requestAnimationFrame in phantomJs doesn't play nice with tweenJs. The number returned to the callback by phantom is a UNIX epoch number (but with fractional seconds) and tweenJs expects a DOMHighResTimeStamp (like what you would get from performance.now(), starting from zero when the process starts). The epoch number is always much higher than the tween end time, so every update is seen as the end of a tween and this causes TWEEN.update to slam through into the next cycle, thus causing it to block.

The way to fix it is to inject a polyfill, including a performance.now pollyfil, to overwrite phantom's requestAnimationFrame implimentation, using page.injectJs.

Here is the code that needs to be injected (or st better)...

request-animation-frame.js

// Include a performance.now polyfill
var now = (function () {
    // In node.js, use process.hrtime.
    if (this.window === undefined && this.process !== undefined) {
      now = function () {
        var time = process.hrtime();

        // Convert [seconds, microseconds] to milliseconds.
        return time[0] * 1000 + time[1] / 1000;
      };
    }
    // In a browser, use window.performance.now if it is available.
    else if (this.window !== undefined &&
      window.performance !== undefined &&
      window.performance.now !== undefined) {

      // This must be bound, because directly assigning this function
      // leads to an invocation exception in Chrome.
      now = window.performance.now.bind(window.performance);
    }
    // Use Date.now if it is available.
    else if (Date.now !== undefined) {
      now = Date.now;
    }
    // Otherwise, use 'new Date().getTime()'.
    else {
      now = function () {
        return new Date().getTime();
      };
    }
    return now
  })();

// http://paulirish.com/2011/requestanimationframe-for-smart-animating/
// http://my.opera.com/emoller/blog/2011/12/20/requestanimationframe-for-smart-er-animating

// requestAnimationFrame polyfill by Erik Möller. fixes from Paul Irish and Tino Zijdel

// MIT license

// Adapted to shim floating point milliseconds since the page was opened
// https://developers.google.com/web/updates/2012/05/requestAnimationFrame-API-now-with-sub-millisecond-precision?hl=en


(function() {
  var lastTime = 0;
  var rAF = window.requestAnimationFrame;

  window.requestAnimationFrame = function(callback) {
    var currTime = now();
    var timeToCall = Math.max(0, 1000/60 - (currTime - lastTime));
    var tcb = currTime + timeToCall;
    var cbprxy = (function (cb, t) {
      return function (discard) {
        cb(t)
      }
    })(callback, tcb);
    var id = rAF
      ? rAF.call(window, cbprxy)
      : window.setTimeout(function() { callback(tcb); }, timeToCall);

    lastTime = currTime + timeToCall;

    return id;
  };

  if(!window.cancelAnimationFrame)
    window.cancelAnimationFrame = clearTimeout

}());

and here is the code to put in phantoms outer context to inject it...

page.onInitialized = function() {
  page.injectJs('request-animation-frame.js');
};

In the context of your question...

/**
 * Adjusted by cool.blue on 08-Sep-16.
 */
var system = require('system');
var page;

// user supplied url
var myurl = system.args[1] || 'https://waffles.ch/';

page = require('webpage').create();

// suppress errors from output
page.onError = function(msg, trace) {};

function exitPhantom (message) {
  console.log(message)
  phantom.exit(message.match("Error:") ? 1 : 0)
}

page.onConsoleMessage = function(message) {
  system.stdout.write('> ' + message + '\n')
};

page.onInitialized = function() {
  page.injectJs('request-animation-frame.js');
};

// 5 seconds
page.settings.resourceTimeout = 10000;

// page.settings.javascriptEnabled = false;
page.open(myurl, function(status) {

  //hack for page.open not hooking into phantom.onError
  setTimeout(function() {
    if (status !== "success") {
      exitPhantom('Error: ' + status);
      throw new Error("Unable to access network");
    } else {
      var pageTitle = myurl.replace(/http.*\/\//g, "").replace("www.", "").split("/")[0];
      var filePath = "img/" + pageTitle + '.jpg';
      page.render(filePath, {format: 'jpeg', quality: '75'});
      console.log(filePath);
      exitPhantom(status);
    }
  }, 1000);

});

In this case, the resourceTimeout feature will work as advertised and protect against over-long load times and any page that uses similar animation techniques will work fine.

Upvotes: 1

Related Questions