Reputation: 13
I want to scrape data from following website : http://b2b.godrejinterio.com/GodrejInterio/dealer.aspx?id=29&menuid=2458&business=2
Here, data is dynamically generated on the same page itself without any change in URL. Everytime you option from 1st dropdown menu, then only 2nd dropdown becomes active and allows you to select option from 2nd dropdown and so on for 3rd & 4th dropdown menu.
After selection of all the dropdown menus, you have to click on search button then only data gets generated on the same page.
I need to scrape data for all possible selections in one go. Below is the code which i tried but it wont work as desired. I am using python and tools as beautifulsoup & selenium. Help me with this!!
Mike67, I have used your suggestion and improved code, but still I am unable to iterate within option and save code to dataframe. Help me with this !! Code :
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--incognito')
options.add_argument('--headless')
driver = webdriver.Chrome("C:/Users/Downloads/chromedriver")
rec=[]
driver.get("http://b2b.godrejinterio.com/GodrejInterio/dealer.aspx?id=29&menuid=2458&business=2")
# wait=WebDriverWait(driver,10)
time.sleep(2)
s1 = Select(driver.find_element_by_id("ucInterioDealerLocatorNewRight_ddlRange"))
s1.select_by_value("Institutional Furniture")
# print(s1.options[0].text)
time.sleep(2)
# wait.until(EC.presence_of_all_element_located((By.ID,"ucInterioDealerLocatorNewRight_ddlRange")))
s22 = driver.find_element_by_id("ucInterioDealerLocatorNewRight_ddlSubRange")
s2 = Select(driver.find_element_by_id("ucInterioDealerLocatorNewRight_ddlSubRange"))
all_options1 = s22.find_elements_by_tag_name("option")
for option1 in all_options1:
option1=option1.get_attribute("value")
print(option1)
if(option1=='0'):
continue
else:
s2.select_by_value(option1)
time.sleep(10)
# wait.until(EC.presence_of_all_element_located((By.ID,"ucInterioDealerLocatorNewRight_ddlSubRange")))
s33 = driver.find_element_by_id("ucInterioDealerLocatorNewRight_ddlState")
s3 = Select(driver.find_element_by_id("ucInterioDealerLocatorNewRight_ddlState"))
all_options2 = s33.find_elements_by_tag_name("option")
for option2 in all_options2:
option2=option2.get_attribute("value")
print(option2)
s3.select_by_value(option2)
# print(s3.options[1].text)
time.sleep(10)
# wait.until(EC.presence_of_all_elements_located((By.ID,"ucInterioDealerLocatorNewRight_ddlState")))
s44 = driver.find_element_by_id("ucInterioDealerLocatorNewRight_ddlCity")
s4 = Select(driver.find_element_by_id("ucInterioDealerLocatorNewRight_ddlCity"))
all_options3 = s44.find_elements_by_tag_name("option")
for option3 in all_options3:
option3=option3.get_attribute("value")
print(option3)
if(option3=='0'):
continue
else:
s4.select_by_value(option3)
# print(s4.options[1].text)
time.sleep(10)
# wait.until(EC.presence_of_all_elements_located((By.ID,"ucInterioDealerLocatorNewRight_ddlCity")))
s55 = driver.find_element_by_id("ucInterioDealerLocatorNewRight_ddlArea")
s5 = Select(driver.find_element_by_id("ucInterioDealerLocatorNewRight_ddlArea"))
all_options4 = s55.find_elements_by_tag_name("option")
for option4 in all_options4:
option4=option4.get_attribute("value")
print(option4)
if(option4=='0'):
continue
else:
s5.select_by_value(option4)
# print(s4.options[1].text)
time.sleep(10)
s6=driver.find_element_by_id("ucInterioDealerLocatorNewRight_imgBtnSearch").click()
# for i in s6.find_all('div')
# print(type(s6))
# print(s4.content)
time.sleep(10)
# wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME,"dealer_search_maindiv")))
# r1 = driver.find_element_by_class_name("dealer_search_maindiv")
html=driver.page_source
# print(html)
soup=BeautifulSoup(html,'html5lib')
try:
cl=soup.find('div',attrs={'class':'dealer_search_maindiv'})
for i in range(0,10):
i=str(i)
idd= f"ucInterioDealerLocatorNewRight_dlDealer_ctl0{i}_tblDealer"
kwargs={'id': 'idd' }
kwargs['id'] = idd
d1=cl.find('table', kwargs)
data=";"
d2 = d1.find('table')
for d3 in d2.find_all('tr'):
j=d3.find('td').text
print(j)
data = data + j + ';'
print(data)
rec.append(data)
except:
print("no record for this selection")
continue
print("state done")
print("all subrange completed")
print(len(rec))
df=pd.DataFrame({'Record':rec})
driver.close()
Upvotes: 1
Views: 543
Reputation: 11342
If you call time.sleep
in between each dropdown change, the page works:
driver.get("http://b2b.godrejinterio.com/GodrejInterio/dealer.aspx?id=29&menuid=2458&business=2")
time.sleep(2)
s1 = Select(driver.find_element_by_id("ucInterioDealerLocatorNewRight_ddlRange"))
s1.select_by_value("Institutional Furniture")
print(s1.options[0].text)
time.sleep(2)
s2 = Select(driver.find_element_by_id("ucInterioDealerLocatorNewRight_ddlSubRange"))
s2.select_by_value("Desking")
time.sleep(2)
s3 = Select(driver.find_element_by_id("ucInterioDealerLocatorNewRight_ddlState"))
s3.select_by_value("Delhi")
print(s3.options[0].text)
driver.find_element_by_id("ucInterioDealerLocatorNewRight_imgBtnSearch").click()
Upvotes: 1