Reputation: 395
My goal is to scrape some data from a web site using Node.js.
I already managed to scrape data using only request
package, but the site that I want scrape has dynamic content, and request
only can't grab this dynamic data.
So I did some research and found that, to achieve that, and based in this SO question, I would need to install some packages via npm
(I do not know if all three are needed):
Based on the question too, I used the same code, just to understand how it works:
myFile.js
var phantom = require('phantom');
phantom.create(function (ph) {
ph.createPage(function (page) {
var url = "http://www.bdtong.co.kr/index.php?c_category=C02";
page.open(url, function() {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
page.evaluate(function() {
$('.listMain > li').each(function () {
console.log($(this).find('a').attr('href'));
});
}, function(){
ph.exit()
});
});
});
});
});
But when I try to run in terminal $ node myFile.js
, it don't work and keeps giving me the error:
(node:6576) UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: 1): Error: Unexpected type of parameters. Expecting args to be array.
(node:6576) DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.
Any ideas how to solve this?
EDIT:
Final solution based on @Shyam answer (solved the error) and this example:
var phantom = require('phantom');
var _ph, _page, _outObj;
phantom
.create()
.then(ph => {
_ph = ph;
return _ph.createPage();
})
.then(page => {
_page = page;
return _page.open('https:/www.google.com.br/');
})
.then(status => {
console.log(status);
return _page.property('content');
})
.then(content => {
console.log(content);
_page.close();
_ph.exit();
})
.catch(e => console.log(e))
;
Upvotes: 1
Views: 2032
Reputation: 1079
I am not sure where you got the format from but the latest phantom JS does not use callbacks and uses promises instead. And the constructor (Phantom.create) expects configs in form of array and not a callback function.
Your code need's to be something similar to this I presume(I have not tested this but should run).
var phantom = require('phantom');
var _ph, _page;
phantom.create()
.then(function (ph) {
_ph = ph;
return ph.createPage();
})
.then(function (page) {
_page = page;
var url = "http://www.bdtong.co.kr/index.php?c_category=C02";
return page.open(url);
})
.then(function(page) {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
page.evaluate(function() {
$('.listMain > li').each(function () {
console.log($(this).find('a').attr('href'));
});
});
});
})
.catch(function(err) {
_page.close();
_ph.exit();
})
Upvotes: 3