vinod
vinod

Reputation: 51

Scrape dynamic loading pages with phantomjs

I am trying to scrape a quora log page like this Quora question log to get to the bottom of page and get the name of person who added the question. i am using this code for phantomjs

var page = require('webpage').create();
var fs = require('fs');
var output = './temp_htmls/test1.html';
page.open('url', function() {
  fs.write(output,page.content,'w');
  phantom.exit();
});

But this only fetches a single web page and cannot fetch complete webpage until end. Can someone point what i am missing with phantomjs to be able to make this work

EDIT I tried to work different ways as mentioned in link in comment Link in comment and came up with this code

var system = require('system');
var fs = require('fs');
var output = './temp_htmls/test1.html';
var webpage = require('webpage').create();
webpage.viewportSize = { width: 1280, height: 800 };
webpage.scrollPosition = { top: 0, left: 0 };

var userid = system.args[1];
var profileUrl = "http://www.twitter.com/barackobama";

webpage.open(profileUrl, function(status) {
    if (status === 'fail') {
        console.error('webpage did not open successfully');
        phantom.exit(1);
    }
    var i = 0,
    top,
    queryFn = function() {
        return document.body.scrollHeight;
    };
    setInterval(function() {
        top = webpage.evaluate(queryFn);
        i++;
        console.log("1:");
        webpage.scrollPosition = { top: top + 1, left: 0 };
        console.log("top = " + top);//increments properly for twitter
        fs.write(output,webpage.content,'w');

        if (i >= 10) {
            phantom.exit();
        }
    }, 3000);
});

This works for twitter infinite scrolls and the console.log value of top for twitter is fine. But for Quora log link(in the original question), it does not increment. So scrolling does not seem to be working for Quora. What changes do i make. Any help?

Upvotes: 1

Views: 551

Answers (0)

Related Questions