Reputation: 37
I've scraped the elements I want from the first page with requests + beautiful soup and am trying to use selenium to click a button that loads another page with the rest of the data I want to scrape. The new selenium window opens the first page, then the second but always returns some sort of selenium.common.exceptions Error. File "c:\Users\ArkPr\yelp\yelp_learner.py", line 122, in <module> WebDriverWait(driver, 10).until( File "C:\Users\ArkPr\AppData\Roaming\Python\Python39\site-packages\selenium\webdriver\support\wait.py", line 80, in until raise TimeoutException(message, screen, stacktrace) selenium.common.exceptions.TimeoutException: Message:
I've searched around and tried using implicitly and explicitly wait, time.sleep(), using find_element_by_class_name and xpath, using requests to get the data and I don't know what to do next.
url = "https://www.yelp.co.uk/search?find_desc=Restaurants&find_loc=London&ns=1"
trying to scrape data from this site
next_page = driver.find_element_by_class_name("css-ac8spe")
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "css-ac8spe"))
)
# element.clear()
element.click()
except TimeoutException as u:
print(u)
# if EC.presence_of_element_located((By.CLASS_NAME, "css-ac8spe")) == True:
# next_page.click()
# next_page.back()
for li in ul:
#driver.implicitly_wait(3)
WebDriverWait(driver, 10).until(
EC.visibility_of_all_elements_located((By.CLASS_NAME, 'stickySidebar__373c0__3MPss border-color--default__373c0__r305k')))
# EC.presence_of_element_located((By.CLASS_NAME, 'stickySidebar__373c0__3MPss border-color--default__373c0__r305k')))
sidebar = driver.find_element_by_class_name('stickySidebar__373c0__3MPss border-color--default__373c0__r305k')
if sidebar != None:
for side in sidebar:
print(side)
if side != None:
try:
address = side.find("p", {"class": " css-chtywg"})
# address = side.find_element_by_class_name("css-chtywg")
print(address.text)
except TimeoutException as t:
print(t)```
Upvotes: 1
Views: 117
Reputation: 3433
This code worked for me.
driver.implicitly_wait(10)
driver.get("https://www.yelp.co.uk/search?find_desc=Restaurants&find_loc=London&ns=1")
res_leng = len(driver.find_elements_by_xpath("//li/div[contains(@class,'container__09f24')]"))
for i in range(res_leng):
resturants = driver.find_elements_by_xpath("//li/div[contains(@class,'container__09f24')]")
resturants[i].find_element_by_xpath(".//a[text()='more']").click()
name = driver.find_element_by_tag_name("h1").text
address = driver.find_element_by_xpath("//p[@class=' css-chtywg']").text
print("{} : {}".format(name,address))
time.sleep(2)
driver.back()
Upvotes: 1
Reputation: 29362
You need to click on more on every post and then, it will naviagte you to the new page, extract the address and come back to original page.
driver = webdriver.Chrome(driver_path)
driver.maximize_window()
driver.implicitly_wait(50)
driver.get("https://www.yelp.co.uk/search?find_desc=Restaurants&find_loc=London&ns=1")
wait = WebDriverWait(driver, 50)
length = len(wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//li[contains(@class,'border-color')]/div[contains(@class, 'container')]"))))
j = 0
for i in range(length):
elements = wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//li[contains(@class,'border-color')]/div[contains(@class, 'container')]")))
ActionChains(driver).move_to_element(elements[j]).perform()
time.sleep(1)
elements[j].find_element(By.XPATH, ".//descendant::a[text()='more']").click()
time.sleep(1)
print(wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "p.css-chtywg"))).text)
driver.execute_script("window.history.go(-1)")
time.sleep(2)
j = j +1
Upvotes: 1