Reputation:
I have written this code but its not going on the next page its fetching data from the same page repeatedly.
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
from selenium.webdriver import ActionChains
url="http://www.4docsearch.com/Delhi/Doctors"
driver = webdriver.Chrome(r'C:\chromedriver.exe')
driver.get(url)
next_page = True
while next_page == True:
soup = BeautifulSoup(driver.page_source, 'html.parser')
div = soup.find('div',{"id":"ContentPlaceHolder1_divResult"})
for heads in div.find_all('h2'):
links = heads.find('a')
print(links['href'])
try:
driver.find_element_by_xpath("""//* [@id="ContentPlaceHolder1_lnkNext"]""").click()
except:
print ('No more pages')
next_page=False
driver.close()
Upvotes: 0
Views: 270
Reputation: 193338
To browse to the Next page as the desired element is a JavaScript enabled element with __doPostBack()
you have to:
staleness_of()
the element first.element_to_be_clickable()
the element next.You can use the following Locator Strategies:
Code Block:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("start-maximized")
driver = webdriver.Chrome(options=chrome_options, executable_path=r'C:\Utility\BrowserDrivers\chromedriver.exe')
driver.get("http://www.4docsearch.com/Delhi/Doctors")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//a[@id='ContentPlaceHolder1_lnkNext' and not(@class='aspNetDisabled')]"))).click()
while True:
try:
WebDriverWait(driver, 20).until(EC.staleness_of((driver.find_element_by_xpath("//a[@id='ContentPlaceHolder1_lnkNext' and not(@class='aspNetDisabled')]"))))
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//a[@id='ContentPlaceHolder1_lnkNext' and not(@class='aspNetDisabled')]"))).click()
print ("Next")
except:
print ("No more pages")
break
print ("Exiting")
driver.quit()
Console Output
Next
Next
Next
.
.
.
No more pages
Exiting
Upvotes: 0