Reputation: 11
I am trying to automate a website and I am having trouble with a Javascript issue. After my first request '__EVENTTARGET' = 'ctl00$content$ctl01$btn'. New href pop-up on same page, and now i want to request this new javascript link '__EVENTTARGET' = 'ctl00$content$ctl01$' . I don't know how to scrape a Javascript:_dopostback() using scrapy spider. I tried looking into this issue and cannot find anything.
<a id="ctl00_content_" href="javascript:__doPostBack('ctl00$content$ctl01$,'')">SYED ALI </a>
my spider code::
URL = 'xyz'
class ExitRealtySpider(scrapy.Spider):
name = "campSpider"#name = "exit_realty"
allowed_domains = ["xyz"]
start_urls = [URL]
def parse(self, response):
# submit a form (first page)
self.data = {}
soup = BeautifulSoup(urlopen(URL), 'html.parser')
viewstate = soup.find('input', {'id': '__VIEWSTATE' })['value']
generator = soup.find('input', {'id': '__VIEWSTATEGENERATOR'})['value']
validation = soup.find('input', {'id': '__EVENTVALIDATION' })['value']
self.data['__VIEWSTATE']= viewstate,
self.data['__VIEWSTATEGENERATOR'] = generator,#'',
self.data['__VIEWSTATEENCRYPTED'] = '',
self.data['__EVENTVALIDATION'] = validation,
self.data['typAirmenInquiry'] = '7',
self.data['ctl00$content$ctl01$txtbxLastName'] = 'a',
self.data['ctl00$content$ctl01$txtbxCertNo'] = '123',
self.data['ctl00$content$ctl01$btnSearch'] = 'Search',
self.data['__EVENTTARGET'] = 'ctl00$content$ctl01$'
return FormRequest.from_response(response,
method='POST',
callback=self.parse_page,
formdata=self.data,
#encoding = 'utf-8',
#meta={'page': 1},
dont_filter=True
#headers=HEADERS
)
def parse_page (self,response):
print("\n\n\n\n\n",response.body,"\n\n\n\n\n")
self.data = {}
soup = BeautifulSoup(urlopen(URL), 'html.parser')
viewstate = soup.find('input', {'id': '__VIEWSTATE' })['value']
generator = soup.find('input', {'id': '__VIEWSTATEGENERATOR'})['value']
validation = soup.find('input', {'id': '__EVENTVALIDATION' })['value']
self.data['__EVENTARGUMENT']= '',
self.data['__LASTFOCUS']= '',
self.data['__VIEWSTATE']= viewstate,
self.data['__VIEWSTATEGENERATOR'] = generator,#'',
self.data['__VIEWSTATEENCRYPTED'] = '',
self.data['__EVENTVALIDATION'] = validation,
self.data['typAirmenInquiry'] = '7',
self.data['__EVENTTARGET'] = 'ctl00$content$ctl01$'
ans = FormRequest.from_response(response,
method='POST',
callback=self.parse_page2,
formdata=self.data,
#encoding = 'utf-8',
#meta={'page': 1},
dont_filter=True
#headers=HEADERS
)
return ans
def parse_page2 (self,response):
print("\n\n\n\n\n"),response.body,"\n\n\n\n\n")
Upvotes: 1
Views: 443