MITHU
MITHU

Reputation: 164

Can't produce required results from a webpage issuing a post requests with appropriate parameters

I'm trying to scrape some fields from a webpage using requests module. To generate the desired result, it is necessary to choose an option from dropdown, as in Enterprise No and then put the search item K2018288262 in the seachbox and finally hit the search button. This image represents what I tried to detail. However, when I tried to mimic the same using the following script, I do not get required content within the response.

This is how I've tried:

import requests
from bs4 import BeautifulSoup

link = 'https://eservices.cipc.co.za/Search.aspx'

headers = {
    'X-MicrosoftAjax': 'Delta=true',
    'X-Requested-With': 'XMLHttpRequest',
}

with requests.Session() as s:
    s.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'
    r = s.get(link)
    soup = BeautifulSoup(r.text,"lxml")
    payload = {
        '__VIEWSTATE': soup.select_one("#__VIEWSTATE")["value"],
        '__VIEWSTATEGENERATOR': soup.select_one("#__VIEWSTATEGENERATOR")["value"],
        '__EVENTVALIDATION': soup.select_one("#__EVENTVALIDATION")["value"],
        '__EVENTARGUMENT': '',
        '__LASTFOCUS': '',
        'ctl00$cntMain$ScriptManager1': 'ctl00$cntMain$Updatepanel1|ctl00$cntMain$lnkSearchIcon',
        'ctl00$cntMain$drpSearchOptions': 'EntNo',
        'ctl00$cntMain$txtSearchCIPC': 'K2018288262',
        'ctl00$cntMain$wtmkSearch_ClientState': '',
        '__EVENTTARGET': 'ctl00$cntMain$lnkSearchIcon',
        '__ASYNCPOST': 'true'
    }
    s.headers.update(headers)
    res = s.post(link,data=payload)
    print(res.text)

How can I parse anything from the result page using requests module?

Upvotes: 1

Views: 46

Answers (1)

Andrej Kesely
Andrej Kesely

Reputation: 195573

Try:

import requests
from bs4 import BeautifulSoup

url = "https://eservices.cipc.co.za/Search.aspx"

with requests.session() as s:
    soup = BeautifulSoup(s.get(url).content, "html.parser")

    d = {}
    for inp in soup.select("input[value]"):
        d[inp["name"]] = inp["value"]

    d[
        "ctl00$cntMain$ScriptManager1"
    ] = "ctl00$cntMain$Updatepanel1|ctl00$cntMain$drpSearchOptions"
    d["ctl00$cntMain$drpSearchOptions"] = "EntNo"
    d["ctl00$cntMain$txtSearchCIPC"] = ""
    d["ctl00$cntMain$wtmkSearch_ClientState"] = ""
    d["__EVENTTARGET"] = "ctl00$cntMain$drpSearchOptions"
    d["__EVENTARGUMENT"] = ""
    d["__LASTFOCUS"] = ""

    del d["ctl00$btnHome"]

    soup = BeautifulSoup(s.post(url, data=d).content, "html.parser")

    d = {}
    for inp in soup.select("input[value]"):
        d[inp["name"]] = inp["value"]

    d[
        "ctl00$cntMain$ScriptManager1"
    ] = "ctl00$cntMain$Updatepanel1|ctl00$cntMain$lnkSearchIcon"
    d["ctl00$cntMain$drpSearchOptions"] = "EntNo"
    d["ctl00$cntMain$txtSearchCIPC"] = "K2018288262"  # <-- change no. here
    d["ctl00$cntMain$wtmkSearch_ClientState"] = ""
    d["__EVENTTARGET"] = "ctl00$cntMain$lnkSearchIcon"
    d["__EVENTARGUMENT"] = ""
    d["__LASTFOCUS"] = ""

    del d["ctl00$btnHome"]

    soup = BeautifulSoup(s.post(url, data=d).content, "html.parser")

    # print some info:
    print(
        soup.select_one(
            "#ctl00_cntMain_TabContainer1_TabPanel1_lblEntName"
        ).text
    )

    print(
        soup.select_one(
            "#ctl00_cntMain_TabContainer1_TabPanel1_lblPhysAddress"
        ).get_text(strip=True, separator="\n")
    )

Prints:

REVOLUTIONIZING ENTERPRISES IN AFRICA
UNIT 261 KIKUYU WATERFALL
CNR PRETORIA MAIN ROAD AND MAXWELL
JOHANNESBURG
GAUTENG
2090

Upvotes: 1

Related Questions