xShirase
xShirase

Reputation: 12409

dopostback in PhantomJS/CasperJS

I am trying to scrape a website in aspx, which has a js dopostBack button. One of the button is replying with a printable view of my page, the other one pushes a .csv file to the client.

I saw a question here which describes the csv download problem but doesn't answer it: PhantomJS download using a javascript link

So I am focusing on trying to get the printable view in phantomJs as it seems simpler (it shows in the browser window, there must be a way!)

button code :

<a id="ctl00_ctl00_ctl00_MainContentPlaceHolder_PrintResultsLinkButton" 
  title="Print Results" class="btn-blue"     
  href="javascript:
    __doPostBack('ctl00$ctl00$ctl00$MainContentPlaceHolder$PrintResultsLinkButton','')
">
    <span>Print Results</span>
</a> 

I can click the link with Phantom/casper but it doesn't seem to be working. I figured that the request must be sent on button click, with all the headers, but I can't figore out how to receive the answer. Help?

my casper code, working up to the point where I should get the results page : https://gist.github.com/xShirase/7156131

I have also tried evaluating the js function, which works in the chrome console, but still doesn't give me the results in phantom...

Last attempt to date: I load my page a first time, to get the values of cookies and hidden inputs, then try to post the request myself. Still the same output in the capture, so I know my request is OK, but why don't I get the right results??

CODE :

casper.start();

capture = function(){
    var url = 'http://www.cms.gov/apps/physician-fee-schedule/search/search-criteria.aspx';
    casper.open(url).thenClick('a.btn',function() {
     this.then(grabResults);
    });
};

grabResults = function(){
  this.echo(this.getCurrentUrl());
  this.open('http://www.cms.gov/apps/physician-fee-schedule/search/search-results.aspx?Y=0&T=0&HT=2&CT=3&H1=00100&H2=11400&M=5').then(function(){
    this.capture('page.png');
    a = this.evaluate(function(){
        var v = $('input:hidden#__VIEWSTATE').val();
        var d = document.cookie;
        return [v,d];
    });
  });
  this.then(grabPRResults);

};


grabPRResults = function(){
    this.open('http://www.cms.gov/apps/physician-fee-schedule/search/search-results.aspx?Y=0&T=0&HT=2&CT=3&H1=00100&H2=11400&M=5', {
    method: 'post',
    headers: {
        'Host': 'www.cms.gov',
        'Connection': 'keep-alive',
        'Content-Length': '103902',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Origin': 'http://www.cms.gov',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Referer': 'http://www.cms.gov/apps/physician-fee-schedule/search/search-results.aspx?Y=0&T=0&HT=2&CT=3&H1=00100&H2=11400&M=5',
        'Accept-Encoding': 'gzip,deflate,sdch',
        'Accept-Language': 'fr,en-US;q=0.8,en;q=0.6',
        'Cookie': a[1]
      },
    data:   {
        '__EVENTTARGET':'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$PrintResultsLinkButton',
        '__EVENTARGUMENT':'',
        '__LASTFOCUS':'',
        '__VIEWSTATE':a[0],
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$PFSSResultsCPEWrapper_ClientState':'false',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$YearDropDown':'2013',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$TypeOfInfoDropDown':'pi',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPCTypeDropDown':'range',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$CarrierTypeDropDown':'all',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPC1Textbox':'00100',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPC2Textbox':'11400',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPC3Textbox':'',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPC4Textbox':'',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPC5Textbox':'',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$ModifierDropDown':'%',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$CarrierDropDown':'default',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$CarrierLocalityDropDown':'default',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$ResultsControl1$PFSSGridView$ctl01$ tbGotoPage':'',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$ResultsControl1$PFSSGridView$ctl01$PFSSGridViewtopddlTopPageSize':'10',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$ResultsControl1$PFSSGridView$ctl14$ tbGotoPageBottom':'',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$ResultsControl1$PFSSGridView$ctl14$PFSSGridViewbottomddlBottomPageSize':'10',
        'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$DownloadsWidget1$DownloadsCPEWrapper_ClientState':'false'
    }
  }).then(function(){
    this.wait(25000);
    this.then(lest);
  });
};

lest= function(){
  this.capture('ppp.png');
};

casper.then(capture);
casper.run();

Upvotes: 3

Views: 1726

Answers (1)

JustEngland
JustEngland

Reputation: 1390

From what I can tell your major issues is being notified when the postback is complete. I have mocked up a simple aspx page that simulates a long postback, it should work for your case. To wait for the callback to finish, then you can utilize standard casperjs functionality to do the scraping. I am a little worried about posting scraping instructions for a government site, hopefully my test page will be adequate to help you figure it out.

CasperJS

var casper = require('casper').create({
    // verbose: true,
    logLevel: "debug"
});
casper.start();

casper.on('remote.message', function (message) {
    this.echo(message);
});


grabResults = function () {
    this.echo(this.getCurrentUrl());
};

casper.start('http://localhost:13851/default.aspx', function () {

    casper.thenClick('#Button1', function () {
        // Setup a listener for the postback complete event
        this.evaluate(function () {
            Sys.WebForms.PageRequestManager.getInstance().add_endRequest(function () {
                console.log("client: doPostback complete");
                window.onPostBackComplete = true;
            });
        });

        // Use waitFor to wait for the postback to be finished
        this.waitFor(function () {
            return this.evaluate(function () {
                return window.onPostBackComplete;
            });
        }, function then() {
            this.echo("doPostback complete");
            this.echo("value of test label: " + this.fetchText('#Label1'));
        }, function timeout() {
            this.echo("-- > timeout");
        },
        5000);
    });


});

casper.run(function () {
    this.echo("finished");
});

Default.aspx

<%@ Page Language="C#" AutoEventWireup="true" %>
<!DOCTYPE html>
<script runat="server">    
    protected void Button1_Click(object sender, EventArgs e)
    {
        Label1.Text = "Slow loaded text";
        System.Threading.Thread.Sleep(1000);  // simulate a slow server
    }
</script>
<html xmlns="http://www.w3.org/1999/xhtml">
<head runat="server">
    <title>Sample page</title>
</head>
<body>
    <form id="form1" runat="server">
        <asp:ScriptManager ID="ScriptManager1" runat="server"></asp:ScriptManager>
        <div>
            <asp:UpdatePanel ID="UpdatePanel1" runat="server" >
                <ContentTemplate>
                    <asp:Label ID="Label1" runat="server" Text="Default Label"></asp:Label>
                    <br />
                    <asp:Button ID="Button1" runat="server" Text="Button" OnClick="Button1_Click"  />
                </ContentTemplate>
            </asp:UpdatePanel>
        </div>
    </form>
</body>
</html>

checkout: http://forums.asp.net/t/1245557.aspx?how+to+detect+the+end+of+__doPostBack+in+Javascript

Upvotes: 3

Related Questions