Reputation: 893
I'm following the tutorial listed here :
http://code.tutsplus.com/tutorials/screen-scraping-with-nodejs--net-25560
When I run the code:
var host = 'http://www.shoutcast.com/?action=sub&cat=Hindi#134';
var phantom = require('phantom');
phantom.create(function(ph) {
return ph.createPage(function(page) {
return page.open(host, function(status) {
console.log("opened site? ", status);
page.injectJs('http://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js', function() {
//jQuery Loaded.
//Wait for a bit for AJAX content to load on the page. Here, we are waiting 5 seconds.
setTimeout(function() {
return page.evaluate(function() {
//Get what you want from the page using jQuery. A good way is to populate an object with all the jQuery commands that you need and then return the object.
console.log(document.getElementsByClassName('transition')[0]);
return document.getElementsByClassName('transition')[0];
}, function(result) {
console.log(result);
ph.exit();
});
}, 5000);
});
});
});
});
I get the following error :
phantom stdout: ReferenceError: Can't find variable: $
phantom stdout: phantomjs://webpage.evaluate():7
phantomjs://webpage.evaluate():10
phantomjs://webpage.evaluate():10
I have no idea what this means and there's no help on how to resolve it ... How can this be solved ?
Basically I want all the 'a' tags with class transition from the site I'm scraping. All these tags are loaded asynchronously on the site.
Upvotes: 1
Views: 3624
Reputation: 2482
The $
is due to jQuery and possible conflicts. You hardly require to inject jQuery just to scrape 'a' tags with class transition
. You always have document.querySelector
or document.querySelectorAll
.
var host = 'http://www.shoutcast.com/?action=sub&cat=Hindi#134';
var phantom = require('phantom');
phantom.create(function(ph) {
ph.createPage(function(page) {
page.open(host, function(status) {
console.log("opened site? ", status);
//Wait for a bit for AJAX content to load on the page. Here, we are waiting 5 seconds.
setTimeout(function() {
page.evaluate(function() {
// here you need to add more code to get the html/text
// more code incase you use querySelectorAll
return document.document.querySelector('a.transition');
//return document.document.querySelectorAll('a.transition');
},
function(result) {
console.log(result);
ph.exit();
});
}, 5000);
});
});
});
However, I am not able to understand the way function (result) { console.log(result); ...}
is coded. I am not aware if page.evaluate
takes callback function as second parameter. Please check that with the documentation.
Upvotes: 3