Reputation: 164
I'm trying to scrape some fields from a webpage using requests module. To generate the desired result, it is necessary to choose an option from dropdown, as in Enterprise No
and then put the search item K2018288262
in the seachbox and finally hit the search button. This image represents what I tried to detail. However, when I tried to mimic the same using the following script, I do not get required content within the response.
This is how I've tried:
import requests
from bs4 import BeautifulSoup
link = 'https://eservices.cipc.co.za/Search.aspx'
headers = {
'X-MicrosoftAjax': 'Delta=true',
'X-Requested-With': 'XMLHttpRequest',
}
with requests.Session() as s:
s.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'
r = s.get(link)
soup = BeautifulSoup(r.text,"lxml")
payload = {
'__VIEWSTATE': soup.select_one("#__VIEWSTATE")["value"],
'__VIEWSTATEGENERATOR': soup.select_one("#__VIEWSTATEGENERATOR")["value"],
'__EVENTVALIDATION': soup.select_one("#__EVENTVALIDATION")["value"],
'__EVENTARGUMENT': '',
'__LASTFOCUS': '',
'ctl00$cntMain$ScriptManager1': 'ctl00$cntMain$Updatepanel1|ctl00$cntMain$lnkSearchIcon',
'ctl00$cntMain$drpSearchOptions': 'EntNo',
'ctl00$cntMain$txtSearchCIPC': 'K2018288262',
'ctl00$cntMain$wtmkSearch_ClientState': '',
'__EVENTTARGET': 'ctl00$cntMain$lnkSearchIcon',
'__ASYNCPOST': 'true'
}
s.headers.update(headers)
res = s.post(link,data=payload)
print(res.text)
How can I parse anything from the result page using requests module?
Upvotes: 1
Views: 46
Reputation: 195573
Try:
import requests
from bs4 import BeautifulSoup
url = "https://eservices.cipc.co.za/Search.aspx"
with requests.session() as s:
soup = BeautifulSoup(s.get(url).content, "html.parser")
d = {}
for inp in soup.select("input[value]"):
d[inp["name"]] = inp["value"]
d[
"ctl00$cntMain$ScriptManager1"
] = "ctl00$cntMain$Updatepanel1|ctl00$cntMain$drpSearchOptions"
d["ctl00$cntMain$drpSearchOptions"] = "EntNo"
d["ctl00$cntMain$txtSearchCIPC"] = ""
d["ctl00$cntMain$wtmkSearch_ClientState"] = ""
d["__EVENTTARGET"] = "ctl00$cntMain$drpSearchOptions"
d["__EVENTARGUMENT"] = ""
d["__LASTFOCUS"] = ""
del d["ctl00$btnHome"]
soup = BeautifulSoup(s.post(url, data=d).content, "html.parser")
d = {}
for inp in soup.select("input[value]"):
d[inp["name"]] = inp["value"]
d[
"ctl00$cntMain$ScriptManager1"
] = "ctl00$cntMain$Updatepanel1|ctl00$cntMain$lnkSearchIcon"
d["ctl00$cntMain$drpSearchOptions"] = "EntNo"
d["ctl00$cntMain$txtSearchCIPC"] = "K2018288262" # <-- change no. here
d["ctl00$cntMain$wtmkSearch_ClientState"] = ""
d["__EVENTTARGET"] = "ctl00$cntMain$lnkSearchIcon"
d["__EVENTARGUMENT"] = ""
d["__LASTFOCUS"] = ""
del d["ctl00$btnHome"]
soup = BeautifulSoup(s.post(url, data=d).content, "html.parser")
# print some info:
print(
soup.select_one(
"#ctl00_cntMain_TabContainer1_TabPanel1_lblEntName"
).text
)
print(
soup.select_one(
"#ctl00_cntMain_TabContainer1_TabPanel1_lblPhysAddress"
).get_text(strip=True, separator="\n")
)
Prints:
REVOLUTIONIZING ENTERPRISES IN AFRICA
UNIT 261 KIKUYU WATERFALL
CNR PRETORIA MAIN ROAD AND MAXWELL
JOHANNESBURG
GAUTENG
2090
Upvotes: 1