Reputation: 1630
I'd like to load the full DOM of a website and process each element later. The example code / testcase is going to log all tags of a website.
This works perfect inside a normal browser:
var dom = document.getElementsByTagName('*'),
i;
for (i in dom)
{
console.log(dom[i] && dom[i].tagName ? dom[i].tagName : 'invalid');
}
This testcase logs:
HTML
BODY
DIV
...etc...
This doesn't work inside PHANTOMJS:
var page = require('webpage').create();
page.open('https://google.com', function ()
{
var dom = page.evaluate(function ()
{
return document.getElementsByTagName('*');
}),
i;
for (i in dom)
{
console.log(dom[i] && dom[i].tagName ? dom[i].tagName : 'invalid');
}
phantom.exit();
});
For some reason, only the very first object contains values. Therefore this testcase logs:
HTML
invalid
invalid
...
Help is wanted! Thank you
Upvotes: 1
Views: 809
Reputation: 1630
From the docs:
Note: The arguments and the return value to the evaluate function must be a simple primitive object. The rule of thumb: if it can be serialized via JSON, then it is fine.
Closures, functions, DOM nodes, etc. will not work!
Finally, I found the answer... just prepare and return a simple object inside the evaluate method.
var page = require('webpage').create();
page.open('https://google.com', function ()
{
var dom = page.evaluate(function ()
{
var temp = document.getElementsByTagName('*'),
tempArray = [],
j;
for (j in temp)
{
tempArray.push(
{
tagName: temp[j].tagName,
className: temp[j].className
});
}
return tempArray;
}),
i;
for (i in dom)
{
console.log(dom[i] && dom[i].tagName ? dom[i].tagName : 'invalid');
console.log(dom[i] && dom[i].className ? dom[i].className : 'invalid');
}
phantom.exit();
});
Upvotes: 1