Reputation: 12409
I am trying to scrape a website in aspx, which has a js dopostBack button. One of the button is replying with a printable view of my page, the other one pushes a .csv file to the client.
I saw a question here which describes the csv download problem but doesn't answer it: PhantomJS download using a javascript link
So I am focusing on trying to get the printable view in phantomJs as it seems simpler (it shows in the browser window, there must be a way!)
button code :
<a id="ctl00_ctl00_ctl00_MainContentPlaceHolder_PrintResultsLinkButton"
title="Print Results" class="btn-blue"
href="javascript:
__doPostBack('ctl00$ctl00$ctl00$MainContentPlaceHolder$PrintResultsLinkButton','')
">
<span>Print Results</span>
</a>
I can click the link with Phantom/casper but it doesn't seem to be working. I figured that the request must be sent on button click, with all the headers, but I can't figore out how to receive the answer. Help?
my casper code, working up to the point where I should get the results page : https://gist.github.com/xShirase/7156131
I have also tried evaluating the js function, which works in the chrome console, but still doesn't give me the results in phantom...
Last attempt to date: I load my page a first time, to get the values of cookies and hidden inputs, then try to post the request myself. Still the same output in the capture, so I know my request is OK, but why don't I get the right results??
CODE :
casper.start();
capture = function(){
var url = 'http://www.cms.gov/apps/physician-fee-schedule/search/search-criteria.aspx';
casper.open(url).thenClick('a.btn',function() {
this.then(grabResults);
});
};
grabResults = function(){
this.echo(this.getCurrentUrl());
this.open('http://www.cms.gov/apps/physician-fee-schedule/search/search-results.aspx?Y=0&T=0&HT=2&CT=3&H1=00100&H2=11400&M=5').then(function(){
this.capture('page.png');
a = this.evaluate(function(){
var v = $('input:hidden#__VIEWSTATE').val();
var d = document.cookie;
return [v,d];
});
});
this.then(grabPRResults);
};
grabPRResults = function(){
this.open('http://www.cms.gov/apps/physician-fee-schedule/search/search-results.aspx?Y=0&T=0&HT=2&CT=3&H1=00100&H2=11400&M=5', {
method: 'post',
headers: {
'Host': 'www.cms.gov',
'Connection': 'keep-alive',
'Content-Length': '103902',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Origin': 'http://www.cms.gov',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': 'http://www.cms.gov/apps/physician-fee-schedule/search/search-results.aspx?Y=0&T=0&HT=2&CT=3&H1=00100&H2=11400&M=5',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'fr,en-US;q=0.8,en;q=0.6',
'Cookie': a[1]
},
data: {
'__EVENTTARGET':'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$PrintResultsLinkButton',
'__EVENTARGUMENT':'',
'__LASTFOCUS':'',
'__VIEWSTATE':a[0],
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$PFSSResultsCPEWrapper_ClientState':'false',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$YearDropDown':'2013',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$TypeOfInfoDropDown':'pi',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPCTypeDropDown':'range',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$CarrierTypeDropDown':'all',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPC1Textbox':'00100',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPC2Textbox':'11400',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPC3Textbox':'',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPC4Textbox':'',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPC5Textbox':'',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$ModifierDropDown':'%',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$CarrierDropDown':'default',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$CarrierLocalityDropDown':'default',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$ResultsControl1$PFSSGridView$ctl01$ tbGotoPage':'',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$ResultsControl1$PFSSGridView$ctl01$PFSSGridViewtopddlTopPageSize':'10',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$ResultsControl1$PFSSGridView$ctl14$ tbGotoPageBottom':'',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$ResultsControl1$PFSSGridView$ctl14$PFSSGridViewbottomddlBottomPageSize':'10',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$DownloadsWidget1$DownloadsCPEWrapper_ClientState':'false'
}
}).then(function(){
this.wait(25000);
this.then(lest);
});
};
lest= function(){
this.capture('ppp.png');
};
casper.then(capture);
casper.run();
Upvotes: 3
Views: 1726
Reputation: 1390
From what I can tell your major issues is being notified when the postback is complete. I have mocked up a simple aspx page that simulates a long postback, it should work for your case. To wait for the callback to finish, then you can utilize standard casperjs functionality to do the scraping. I am a little worried about posting scraping instructions for a government site, hopefully my test page will be adequate to help you figure it out.
CasperJS
var casper = require('casper').create({
// verbose: true,
logLevel: "debug"
});
casper.start();
casper.on('remote.message', function (message) {
this.echo(message);
});
grabResults = function () {
this.echo(this.getCurrentUrl());
};
casper.start('http://localhost:13851/default.aspx', function () {
casper.thenClick('#Button1', function () {
// Setup a listener for the postback complete event
this.evaluate(function () {
Sys.WebForms.PageRequestManager.getInstance().add_endRequest(function () {
console.log("client: doPostback complete");
window.onPostBackComplete = true;
});
});
// Use waitFor to wait for the postback to be finished
this.waitFor(function () {
return this.evaluate(function () {
return window.onPostBackComplete;
});
}, function then() {
this.echo("doPostback complete");
this.echo("value of test label: " + this.fetchText('#Label1'));
}, function timeout() {
this.echo("-- > timeout");
},
5000);
});
});
casper.run(function () {
this.echo("finished");
});
Default.aspx
<%@ Page Language="C#" AutoEventWireup="true" %>
<!DOCTYPE html>
<script runat="server">
protected void Button1_Click(object sender, EventArgs e)
{
Label1.Text = "Slow loaded text";
System.Threading.Thread.Sleep(1000); // simulate a slow server
}
</script>
<html xmlns="http://www.w3.org/1999/xhtml">
<head runat="server">
<title>Sample page</title>
</head>
<body>
<form id="form1" runat="server">
<asp:ScriptManager ID="ScriptManager1" runat="server"></asp:ScriptManager>
<div>
<asp:UpdatePanel ID="UpdatePanel1" runat="server" >
<ContentTemplate>
<asp:Label ID="Label1" runat="server" Text="Default Label"></asp:Label>
<br />
<asp:Button ID="Button1" runat="server" Text="Button" OnClick="Button1_Click" />
</ContentTemplate>
</asp:UpdatePanel>
</div>
</form>
</body>
</html>
checkout: http://forums.asp.net/t/1245557.aspx?how+to+detect+the+end+of+__doPostBack+in+Javascript
Upvotes: 3