Shehryar Ali
Shehryar Ali

Reputation: 11

scrape a Javascript:_dopostback()

I am trying to automate a website and I am having trouble with a Javascript issue. After my first request '__EVENTTARGET' = 'ctl00$content$ctl01$btn'. New href pop-up on same page, and now i want to request this new javascript link '__EVENTTARGET' = 'ctl00$content$ctl01$' . I don't know how to scrape a Javascript:_dopostback() using scrapy spider. I tried looking into this issue and cannot find anything.

<a id="ctl00_content_" href="javascript:__doPostBack('ctl00$content$ctl01$,'')">SYED&nbsp; ALI&nbsp; </a>

my spider code::

URL = 'xyz'

class ExitRealtySpider(scrapy.Spider):

name = "campSpider"#name = "exit_realty"
allowed_domains = ["xyz"]
start_urls = [URL]


def parse(self, response):
    # submit a form (first page)
    self.data = {}
    
    soup = BeautifulSoup(urlopen(URL), 'html.parser')
    viewstate  = soup.find('input', {'id': '__VIEWSTATE'         })['value']
    generator  = soup.find('input', {'id': '__VIEWSTATEGENERATOR'})['value']
    validation = soup.find('input', {'id': '__EVENTVALIDATION'   })['value']

    self.data['__VIEWSTATE']= viewstate,
    self.data['__VIEWSTATEGENERATOR'] = generator,#'',
    self.data['__VIEWSTATEENCRYPTED'] = '',
    self.data['__EVENTVALIDATION'] = validation,
    self.data['typAirmenInquiry'] = '7',
    self.data['ctl00$content$ctl01$txtbxLastName'] = 'a',
    self.data['ctl00$content$ctl01$txtbxCertNo'] = '123',
    self.data['ctl00$content$ctl01$btnSearch'] = 'Search',
    self.data['__EVENTTARGET'] = 'ctl00$content$ctl01$'
    
    return FormRequest.from_response(response,
                   method='POST',
                   callback=self.parse_page,
                   formdata=self.data,
                   #encoding = 'utf-8',
                   #meta={'page': 1},
                   dont_filter=True
                   #headers=HEADERS
                  )
def parse_page (self,response):
    print("\n\n\n\n\n",response.body,"\n\n\n\n\n")

    self.data = {}
    
    soup = BeautifulSoup(urlopen(URL), 'html.parser')
    viewstate  = soup.find('input', {'id': '__VIEWSTATE'         })['value']
    generator  = soup.find('input', {'id': '__VIEWSTATEGENERATOR'})['value']
    validation = soup.find('input', {'id': '__EVENTVALIDATION'   })['value']

    self.data['__EVENTARGUMENT']= '',
    self.data['__LASTFOCUS']= '',
    self.data['__VIEWSTATE']= viewstate,
    self.data['__VIEWSTATEGENERATOR'] = generator,#'',
    self.data['__VIEWSTATEENCRYPTED'] = '',
    self.data['__EVENTVALIDATION'] = validation,
    self.data['typAirmenInquiry'] = '7',
    self.data['__EVENTTARGET'] = 'ctl00$content$ctl01$'
    ans = FormRequest.from_response(response,
                   method='POST',
                   callback=self.parse_page2,
                   formdata=self.data,
                   #encoding = 'utf-8',
                   #meta={'page': 1},
                   dont_filter=True
                   #headers=HEADERS
                  )
    return ans

def parse_page2 (self,response):
    print("\n\n\n\n\n"),response.body,"\n\n\n\n\n")

Upvotes: 1

Views: 443

Answers (0)

Related Questions