Reputation:
i have tried CSS selector to send value using send_key() function in search box and submit that so that i get the table list of doctors on particular year , but getting below error " ElementNotInteractableException: Message: element not interactable "
below is the code which i have written :
from selenium import webdriver
from bs4 import BeautifulSoup as bs
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
url = "https://www.mciindia.org/CMS/information-desk/indian-medical-register"
driver = webdriver.Chrome(r"C:\chromedriver.exe")
driver.get(url)
driver.find_element_by_xpath("//a[contains(text(),'Year of Registration')]").click()
driver.find_elements_by_css_selector("input[type='text']")[-1].send_keys("2015")
driver.find_element_by_css_selector("input[value='Submit']").click()
next_page = True
while next_page == True:
soup = bs(driver.page_source, 'html.parser')
table1 = soup.find('table',{'id':'doct_info2'})
try:
rows = table1.find_all('tr')
for row in rows:
if len(row.find_all('td')) == 7:
data = row.find_all('td')
link = data[6].click()
soup2 = bs(link, 'html.parser')
table2 = soup2.find('table',{'id':'doctorBiodata'})
rows = table2.find_all('tr')
print(rows)
except:
pass
time.sleep(5)
try:
driver.find_element_by_xpath("//a[contains(text(),'Next')]").click()
except:
print ('No more pages')
next_page=False
driver.close()
Upvotes: 3
Views: 372
Reputation: 11505
The below code is for the first 10
result.
You can change length
from 10
to 42354
which is the maximum for year 2015
and you will get file of out.csv
in same second.
And the second file which is data.csv
will include the internal doctor details.
The page is rendered via JavaScript
so I've located the XHR
request under Network-Table
from your Browser Developer-Tools
. which is JS
.
import pandas as pd
import csv
import re
import requests
def Table():
table = pd.read_json("https://mciindia.org/MCIRest/open/getPaginatedData?service=getPaginatedDoctor&draw=1&columns[0][data]=0&columns[0][name]=&columns[0][searchable]=true&columns[0][orderable]=true&columns[0][search][value]=&columns[0][search][regex]=false&columns[1][data]=1&columns[1][name]=&columns[1][searchable]=true&columns[1][orderable]=true&columns[1][search][value]=&columns[1][search][regex]=false&columns[2][data]=2&columns[2][name]=&columns[2][searchable]=true&columns[2][orderable]=true&columns[2][search][value]=&columns[2][search][regex]=false&columns[3][data]=3&columns[3][name]=&columns[3][searchable]=true&columns[3][orderable]=true&columns[3][search][value]=&columns[3][search][regex]=false&columns[4][data]=4&columns[4][name]=&columns[4][searchable]=true&columns[4][orderable]=true&columns[4][search][value]=&columns[4][search][regex]=false&columns[5][data]=5&columns[5][name]=&columns[5][searchable]=true&columns[5][orderable]=true&columns[5][search][value]=&columns[5][search][regex]=false&columns[6][data]=6&columns[6][name]=&columns[6][searchable]=true&columns[6][orderable]=true&columns[6][search][value]=&columns[6][search][regex]=false&order[0][column]=0&order[0][dir]=asc&start=0&length=10&search[value]=&search[regex]=false&year=2015&_=1577626804003")['data']
with open('out.csv', 'w', newline="") as f:
writer = csv.writer(f)
writer.writerow(
['Year Of The Info', 'Registration#', 'State Medical Councils', 'Name', 'FatherName'])
data = []
for item in table:
writer.writerow(item[1:6])
required = item[6]
match = re.search(
r"openDoctorDetailsnew\('([^']*)', '([^']*)'", required)
data.append(match.group().split("'")[1:4:2])
print("Data Saved Into out.csv")
return data
def Details():
names = []
items = []
for doc, val in Table():
print(f"Extracting DoctorID# {doc}, RegValue# {val}")
json = {'doctorId': doc, 'regdNoValue': val}
r = requests.post(
"https://mciindia.org/MCIRest/open/getDataFromService?service=getDoctorDetailsByIdImr", json=json).json()
if r.keys() not in names:
names.append(r.keys())
items.append(r.values())
print("Done")
return names, items
def Save():
with open('data.csv', 'w', newline="") as d:
writer = csv.writer(d)
n, i = Details()
writer.writerows(n)
writer.writerows(i)
Save()
Check Output Sample out.csv And data.csv
Note: you have to take a look on
concurrent.futures
in case if you will do huge scrape for internal data.
Upvotes: 1
Reputation: 84465
You could do whole thing more quickly with just requests. Change the draw param to get different pages. You can dynamically add or remove the timestamp param '_'. Change 'year'for a different year. The initial json response gives a record count so it is easy enough to calculate the end count for a loop over all results in batches of 500. Use Session
object for efficiency of tcp connection re-use with multiple requests in loop.
import requests
import pandas as pd
params = (
('service', 'getPaginatedDoctor'),
('draw', '1'),
('columns[0][data]', '0'),
('columns[0][name]', ''),
('columns[0][searchable]', 'true'),
('columns[0][orderable]', 'true'),
('columns[0][search][value]', ''),
('columns[0][search][regex]', 'false'),
('columns[1][data]', '1'),
('columns[1][name]', ''),
('columns[1][searchable]', 'true'),
('columns[1][orderable]', 'true'),
('columns[1][search][value]', ''),
('columns[1][search][regex]', 'false'),
('columns[2][data]', '2'),
('columns[2][name]', ''),
('columns[2][searchable]', 'true'),
('columns[2][orderable]', 'true'),
('columns[2][search][value]', ''),
('columns[2][search][regex]', 'false'),
('columns[3][data]', '3'),
('columns[3][name]', ''),
('columns[3][searchable]', 'true'),
('columns[3][orderable]', 'true'),
('columns[3][search][value]', ''),
('columns[3][search][regex]', 'false'),
('columns[4][data]', '4'),
('columns[4][name]', ''),
('columns[4][searchable]', 'true'),
('columns[4][orderable]', 'true'),
('columns[4][search][value]', ''),
('columns[4][search][regex]', 'false'),
('columns[5][data]', '5'),
('columns[5][name]', ''),
('columns[5][searchable]', 'true'),
('columns[5][orderable]', 'true'),
('columns[5][search][value]', ''),
('columns[5][search][regex]', 'false'),
('columns[6][data]', '6'),
('columns[6][name]', ''),
('columns[6][searchable]', 'true'),
('columns[6][orderable]', 'true'),
('columns[6][search][value]', ''),
('columns[6][search][regex]', 'false'),
('order[0][column]', '0'),
('order[0][dir]', 'asc'),
('start', '0'),
('length', '500'),
('search[value]', ''),
('search[regex]', 'false'),
('name', ''),
('registrationNo', ''),
('smcId', ''),
('year', '2015'),
('_', '1577634512046'),
)
table_headers = ['Sl. No.','Year of Info','Registration Number','State Medical Councils','Name','Father Name','Action']
r = requests.get('https://mciindia.org/MCIRest/open/getPaginatedData', params=params)
df = pd.DataFrame(r.json()['data'], columns = table_headers)
print(df)
Upvotes: 1
Reputation: 33384
To Enter value on the textbox You need to Induce WebDriverWait
() and wait for element_to_be_clickable
() and use send_keys
()
To get the table you need to induce WebDriverWait
() and wait for table to be visible visibility_of_element_located
()
Code:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from bs4 import BeautifulSoup as bs
import time
url = "https://www.mciindia.org/CMS/information-desk/indian-medical-register"
driver = webdriver.Chrome(r"C:\chromedriver.exe")
driver.get(url)
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//a[text()='Year of Registration']"))).click()
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//input[@id='doctor_year']"))).send_keys("2015")
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//button[@id='doctor_year_details']"))).click()
WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.CSS_SELECTOR,"table.table.table-bordered.dataTable.no-footer")))
next_page = True
while next_page == True:
soup = bs(driver.page_source, 'html.parser')
table1 = soup.find('table',{'id':'doct_info2'})
try:
rows = table1.find_all('tr')
for row in rows:
if len(row.find_all('td')) == 7:
data = row.find_all('td')
link = data[6].click()
soup2 = bs(link, 'html.parser')
table2 = soup2.find('table',{'id':'doctorBiodata'})
rows = table2.find_all('tr')
print(rows)
except:
pass
time.sleep(5)
try:
driver.find_element_by_xpath("//a[contains(text(),'Next')]").click()
except:
print ('No more pages')
next_page=False
driver.close()
Upvotes: 1