Reputation: 1353
I am attempting to use PhantomJs to crawl our ASP.Net web app and take screenshots of a list of pages defined in a simple text file of URLs. I have been able to get it working fine for pages not behind the log-in wall, but can't seem to get my PhantomJs instance to get authenticated. Log messages show that I'm doing things in the right order with my two interval functions - any ideas how to make sure I'm logged in first?
var fs = require('fs'),
system = require('system');
var content = '',
f = null,
lines = null,
pages =null,
destinations = null,
eol = system.os.name == 'windows' ? "\r\n" : "\n";
//read in a line break separated list of urls
//page at index 0 is the login page
try {
f = fs.open(".\\urls.txt", "r");
content = f.read();
} catch (e) {
console.log(e);
}
if (f) {
f.close();
}
if (content) {
lines = content.split(eol);
pages = new Array();
destinations = new Array();
for (var i = 0, len = lines.length; i < len; i++) {
var pageName = lines[i].substring(lines[i].lastIndexOf('/') + 1);
pages[i] = pageName;
destinations[i] = ".\\NewScreenShot\\" + pageName + '.png';
}
}
console.log('Pages found: ' + pages.length);
var page = require('webpage').create();
var loginIndex = 0;
var loginInProgress = false;
var loginSteps = [
function() {
//Enter Credentials
page.evaluate(function() {
document.getElementById("txtusername").value = "[email protected]";
document.getElementById("txtpassword").value ="12345678";
return;
});
},
function() {
//Login
page.evaluate(function() {
var arr = document.getElementById("form1");
var i;
for (i=0; i < arr.length; i++) {
if (arr[i].getAttribute('method') == "POST") {
arr[i].submit();
return;
}
}
});
}
];
var LoadPage = function() {
if (!loadInProgress && pageindex < pages.length) {
console.log("image " + (pageindex + 1) + " of " + lines.length);
page.open(lines[pageindex]);
}
if (pageindex == lines.length) {
console.log("<< Image render complete! >>");
phantom.exit();
}
}
//PNG RENDER
var pageindex = 0;
var loadInProgress = false;
var interval = setInterval(LoadPage, 500);
page.onLoadStarted = function() {
loadInProgress = true;
if(pageindex == 0) {
loginInProgress = true;
}
console.log('page ' + (pageindex + 1) + ' load started');
};
page.onLoadFinished = function() {
loadInProgress = false;
if(pageindex == 0)
{
loginInProgress = false;
console.log("stopping page interval");
clearInterval(interval);
}
page.evaluate(
function () {
var scaleVal = "scale("+arguments[0] || '1.0'+")";
document.body.style.webkitTransform = scaleVal;
}
);
console.log("rendering:" + destinations[pageindex]);
page.render(destinations[pageindex]); // RENDER PAGE //
if (pageindex == 0){
var loginInterval = setInterval(function() {
if (!loginInProgress && typeof loginSteps[loginIndex] == "function") {
console.log("login step: " + loginIndex )
loginSteps[loginIndex]();
loginIndex++;
}
if (typeof loginSteps[loginIndex] != "function") {
console.log("stopping login interval");
clearInterval(loginInterval);
console.log("starting page interval");
setInterval(LoadPage, 500);
}
}, 50);
}
pageindex++;
}
Upvotes: 1
Views: 1716
Reputation: 1353
Turns out the problem was form submit vs button click. working code below:
var urlsLocation = "C:\\PhantomJs\\urls.txt";
var newScreenshotFolder = "C:\\PhantomJs\\NewScreenShot\\";
var fs = require('fs'),
system = require('system');
var content = '',
f = null,
lines = null,
pages =null,
destinations = null,
eol = system.os.name == 'windows' ? "\r\n" : "\n";
//read in a return separated list of urls
try {
f = fs.open(urlsLocation, "r");
content = f.read();
} catch (e) {
console.log(e);
}
if (f) {
f.close();
}
if (content) {
lines = content.split(eol);
pages = new Array();
destinations = new Array();
for (var i = 0, len = lines.length; i < len; i++) {
var pageName = lines[i].substring(lines[i].lastIndexOf('/') + 1);
pages[i] = pageName;
destinations[i] = newScreenshotFolder + pageName.replace(/[^a-zA-Z0-9\.]/g, "") + '.png';
}
}
console.log('Pages found: ' + pages.length);
var page = require('webpage').create();
var loginIndex = 0;
var loginInProgress = false;
var loginCompleted = false;
var loginSteps = [
function() {
//Enter Credentials
page.evaluate(function() {
document.getElementById("txtusername").value = "[email protected]";
document.getElementById("txtpassword").value = "12345678";
return;
});
},
function() {
//Login
page.evaluate(function() {
document.getElementById("btnLogin").click();
return;
});
}
];
var LoadPage = function() {
if (!loadInProgress && pageindex < pages.length) {
console.log("image " + (pageindex + 1) + " of " + lines.length);
page.open(lines[pageindex]);
}
if (pageindex == lines.length) {
console.log("<< Image render complete! >>");
phantom.exit();
}
}
//PNG RENDER
var pageindex = 0;
var loadInProgress = false;
var interval = setInterval(LoadPage, 500);
page.onLoadStarted = function() {
loadInProgress = true;
if(pageindex == 0) {
loginInProgress = true;
}
console.log('page ' + (pageindex + 1) + ' load started');
};
page.onLoadFinished = function() {
loadInProgress = false;
if(pageindex == 0)
{
loginInProgress = false;
console.log("stopping page interval");
clearInterval(interval);
}
page.evaluate(
function () {
var scaleVal = "scale("+arguments[0] || '1.0'+")";
document.body.style.webkitTransform = scaleVal;
}
);
console.log("rendering:" + destinations[pageindex]);
page.render(destinations[pageindex]); // RENDER PAGE //
if (pageindex == 0){
var loginInterval = setInterval(function() {
if (!loginInProgress && typeof loginSteps[loginIndex] == "function") {
console.log("login step: " + loginIndex )
loginSteps[loginIndex]();
loginIndex++;
}
if (typeof loginSteps[loginIndex] != "function") {
console.log("stopping login interval");
clearInterval(loginInterval);
console.log("starting page interval");
setInterval(LoadPage, 1000);
}
}, 50);
}
pageindex++;
}
Upvotes: 3