Reputation: 769
var system = require("system");
var page;
// user supplied url
var myurl = system.args[1];
// var myurl = 'https://waffles.ch/';
page = require('webpage').create();
// suppress errors from output
page.onError = function(msg, trace) {};
// 5 seconds
page.settings.resourceTimeout = 5000;
// page.settings.javascriptEnabled = false;
page.open(myurl, function(status) {
//hack for page.open not hooking into phantom.onError
setTimeout(function() {
if (status !== "success") {
console.log(myurl);
phantom.exit();
throw new Error("Unable to access network");
} else {
var pageTitle = myurl.replace(/http.*\/\//g, "").replace("www.", "").split("/")[0];
var filePath = "img/" + pageTitle + '.jpg';
page.render(filePath, {format: 'jpeg', quality: '75'});
console.log(filePath);
phantom.exit();
}
}, 0);
});
Using the above code to take screenshots works fine for most webpages. Running the script through the console or a web app with the url "https://waffles.ch/", however, causes it to hang infinitely on page.open (i believe).
The reason for this I assume is because this URL contains some JS animation which doesn't stop running (an aeroplane flying across the screen), and it causes Phantom to lock up. Is this a known bug??
I'm quite sure JS causes the hang because if I switch it off with page.settings.javascriptEnabled = false;
then the screenshot for the page is rendered without any problems.
I can't realistically switch off javascript to take screenshots for obvious reasons (page.evaluate, redirects etc), so here's my 2 questions.
1.) Is there a way to render a screenshot for a webpage containing animation like waffles.ch
without having to switch off javascript??
2.) If a webpage does hang, on page.open
how can I exit phantom and possibly also return an errror??)
Any help/advice would be greatly appreciated.
Phantom version: 2.1.1 OS: Windows 7 64 bit.
Other thing I've tried.(but still hangs for above url)
with try/catch
var system = require("system");
var page;
// user supplied url
var myurl = system.args[1];
var page = require('webpage').create();
page.open(myurl, function (status) {
try {
if (status !== "success") {
console.log("Unable to access network");
phantom.exit();
} else {
//do some stuff with the DOM
var pageTitle = myurl.replace(/http.*\/\//g, "").replace("www.", "").split("/")[0];
var filePath = "img/" + pageTitle + '.jpg';
page.render(filePath, {format: 'jpeg', quality: '75'});
console.log(filePath);
phantom.exit();
}
} catch (ex) {
var fullMessage = "\nJAVASCRIPT EXCEPTION";
fullMessage += "\nMESSAGE: " + ex.toString();
for (var p in ex) {
fullMessage += "\n" + p.toUpperCase() + ": " + ex[p];
}
console.log(fullMessage);
}
});
// ******************************
Using waitfor()
function.
https://github.com/ariya/phantomjs/blob/master/examples/waitfor.js
var system = require("system");
var page;
// user supplied url
var myurl = system.args[1];
var page = require('webpage').create();
// suppress errors from output
page.onError = function(msg, trace) {
console.log("Error occurred" + msg);
phantom.exit();
};
// 5 seconds
page.settings.resourceTimeout = 5000;
page.open(myurl, function (status) {
// Check for page load success
if (status !== "success") {
console.log("Unable to access network");
phantom.exit();
} else {
waitFor(function() {
// Check in the page if a specific element is now visible
return page.evaluate(function() {
return $("body").is(":visible");
});
}, function() {
console.log("body is visible");
phantom.exit();
});
}
});
Upvotes: 1
Views: 1540
Reputation: 6476
Turns out there is no way to terminate phantom in this situation, at least not by itself but, there is a way to avoid the problem.
The root cause is that the implementation of requestAnimationFrame
in phantomJs doesn't play nice with tweenJs. The number returned to the callback by phantom is a UNIX epoch number (but with fractional seconds) and tweenJs expects a DOMHighResTimeStamp (like what you would get from performance.now(), starting from zero when the process starts). The epoch number is always much higher than the tween end time, so every update is seen as the end of a tween and this causes TWEEN.update
to slam through into the next cycle, thus causing it to block.
The way to fix it is to inject a polyfill, including a performance.now
pollyfil, to overwrite phantom's requestAnimationFrame
implimentation, using page.injectJs
.
Here is the code that needs to be injected (or st better)...
request-animation-frame.js
// Include a performance.now polyfill
var now = (function () {
// In node.js, use process.hrtime.
if (this.window === undefined && this.process !== undefined) {
now = function () {
var time = process.hrtime();
// Convert [seconds, microseconds] to milliseconds.
return time[0] * 1000 + time[1] / 1000;
};
}
// In a browser, use window.performance.now if it is available.
else if (this.window !== undefined &&
window.performance !== undefined &&
window.performance.now !== undefined) {
// This must be bound, because directly assigning this function
// leads to an invocation exception in Chrome.
now = window.performance.now.bind(window.performance);
}
// Use Date.now if it is available.
else if (Date.now !== undefined) {
now = Date.now;
}
// Otherwise, use 'new Date().getTime()'.
else {
now = function () {
return new Date().getTime();
};
}
return now
})();
// http://paulirish.com/2011/requestanimationframe-for-smart-animating/
// http://my.opera.com/emoller/blog/2011/12/20/requestanimationframe-for-smart-er-animating
// requestAnimationFrame polyfill by Erik Möller. fixes from Paul Irish and Tino Zijdel
// MIT license
// Adapted to shim floating point milliseconds since the page was opened
// https://developers.google.com/web/updates/2012/05/requestAnimationFrame-API-now-with-sub-millisecond-precision?hl=en
(function() {
var lastTime = 0;
var rAF = window.requestAnimationFrame;
window.requestAnimationFrame = function(callback) {
var currTime = now();
var timeToCall = Math.max(0, 1000/60 - (currTime - lastTime));
var tcb = currTime + timeToCall;
var cbprxy = (function (cb, t) {
return function (discard) {
cb(t)
}
})(callback, tcb);
var id = rAF
? rAF.call(window, cbprxy)
: window.setTimeout(function() { callback(tcb); }, timeToCall);
lastTime = currTime + timeToCall;
return id;
};
if(!window.cancelAnimationFrame)
window.cancelAnimationFrame = clearTimeout
}());
and here is the code to put in phantoms outer context to inject it...
page.onInitialized = function() {
page.injectJs('request-animation-frame.js');
};
In the context of your question...
/**
* Adjusted by cool.blue on 08-Sep-16.
*/
var system = require('system');
var page;
// user supplied url
var myurl = system.args[1] || 'https://waffles.ch/';
page = require('webpage').create();
// suppress errors from output
page.onError = function(msg, trace) {};
function exitPhantom (message) {
console.log(message)
phantom.exit(message.match("Error:") ? 1 : 0)
}
page.onConsoleMessage = function(message) {
system.stdout.write('> ' + message + '\n')
};
page.onInitialized = function() {
page.injectJs('request-animation-frame.js');
};
// 5 seconds
page.settings.resourceTimeout = 10000;
// page.settings.javascriptEnabled = false;
page.open(myurl, function(status) {
//hack for page.open not hooking into phantom.onError
setTimeout(function() {
if (status !== "success") {
exitPhantom('Error: ' + status);
throw new Error("Unable to access network");
} else {
var pageTitle = myurl.replace(/http.*\/\//g, "").replace("www.", "").split("/")[0];
var filePath = "img/" + pageTitle + '.jpg';
page.render(filePath, {format: 'jpeg', quality: '75'});
console.log(filePath);
exitPhantom(status);
}
}, 1000);
});
In this case, the resourceTimeout
feature will work as advertised and protect against over-long load times and any page that uses similar animation techniques will work fine.
Upvotes: 1