Reputation: 2757
NodeJS newbie here. I am trying to parse html with using NodeJS and PhamtomJS (phantomjs-node). When I run the the JQuery $("[class*='question-summary']")
from the browser console it returns an array. However, I couldn't manage to do the same thing on nodejs. I guess stackoverflow has JQuery so I don't need to use includeJs
to load jquery. Actually, when I run
Here is the nodejs example I am running;
var phantom = require('phantom');
async function getHtml() {
const instance = await phantom.create([
"--load-images=false"
]);
const page = await instance.createPage();
await page.on("onResourceRequested", function(requestData) {
console.info('Requesting', requestData.url)
});
const status = await page.open('http://stackoverflow.com');
console.log("STATUS: " + status);
const content = await page.property('content');
console.log(content);
var result = await page.evaluate(function(content) {
return $("[class*='question-summary']");
});
console.log("Result : " + result);
await instance.exit();
};
getHtml();
I run with the command >node --harmony-async-await phantomTest.js
. And the process gets stuck after printing content to console.
Upvotes: 1
Views: 339
Reputation: 2757
Answering my own question here. Creating an array inside evaluate function and pushing elements inside worked. I guess the only limitation is phantom-node
just supports returning objects with primitives.
var result = await page.evaluate(function() {
var questionSummaries = [];
$("[class*='question-summary']").each(function() {
questionSummaries.push(this.innerHTML);
});
return questionSummaries;
});
Upvotes: 2