Reputation: 43
I have already read a lot of threads dealing with this issue in the afternoon but I'm currently running short of solutions unfortunately :(
I try to scrape this website: https://www.kumon.co.uk/find-a-tutor/
I use this code in order to store every url of the different stores. To do so I have to iterate on the next page till the last page.
Here is the code I use:
def get_urls(url) -> list:
# Get all URLs to the store pages
options = Options()
# options.add_argument('--headless')
path_chromedriver = Path(__file__).parent.parent.joinpath('externals/chromedriver')
browser = webdriver.Chrome(str(path_chromedriver), chrome_options=options)
browser.get(url)
inputElement = browser.find_element_by_id("centre_search")
inputElement.send_keys('london')
inputElement.send_keys(Keys.ENTER)
store_url = []
links = browser.find_elements_by_link_text('Choose Centre')
for link in links:
href = link.get_attribute('href')
store_url.append(href)
while browser.find_element_by_xpath("//ul[@class='pagination']//li[last()]/a/small"):
WebDriverWait(browser, 20).until(
EC.element_to_be_clickable((By.XPATH, "//ul[@class='pagination']//li[last()]/a/small"))).click()
links = browser.find_elements_by_link_text('Choose Centre')
for link in links:
href = link.get_attribute('href')
store_url.append(href)
return store_url
Unfortunately I get a
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
To use a Try... Except is not a good solution, I'm looking for a robust solution. Should I switch from Chrome to Firefox ?
Thank's in advance, Nicolas.
Upvotes: 3
Views: 838
Reputation: 52665
Not sure why you think that try
/except
is not good solution, but this is exactly what you need:
from selenium.common.exceptions import WebDriverException
def get_urls(url) -> list:
# Get all URLs to the store pages
options = Options()
# options.add_argument('--headless')
path_chromedriver = Path(__file__).parent.parent.joinpath('externals/chromedriver')
browser = webdriver.Chrome(str(path_chromedriver), chrome_options=options)
browser.get(url)
inputElement = browser.find_element_by_id("centre_search")
inputElement.send_keys('london')
inputElement.send_keys(Keys.ENTER)
links = browser.find_elements_by_link_text('Choose Centre')
store_url = [link.get_attribute("href") for link in links]
while True:
try:
WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.XPATH, "//li[last()][not(normalize-space(@class))]/a[@data-page]"))).click()
WebDriverWait(browser, 10).until(EC.staleness_of(links[-1]))
except WebDriverException:
break
links = WebDriverWait(browser, 10).until(EC.visibility_of_all_elements_located((By.LINK_TEXT, 'Choose Centre')))
store_url.extend([link.get_attribute("href") for link in links])
return store_url
Upvotes: 1