Reputation: 1
This is my crawling practice code.
from selenium import webdriver
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
chrome_options = Options()
chrome_options.add_argument("--headless")
browser = webdriver.Chrome('C:/chromedriver_win32/chromedriver')
browser.implicitly_wait(5)
browser.set_window_size(1024, 768) # maximize_window(), minimize
browser.get('http://prod.danawa.com/list/?cate=112758&15main_11_02')
WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH,'//*[@id="dlMaker_simple"]/dd/div[2]/button[1]'))).click()
WebDriverWait(browser, 3).until(EC.presence_of_element_located((By.XPATH,'//*[@id="selectMaker_simple_priceCompare_A"]/li[15]/label'))).click()
time.sleep(2)
# current page
cur_page = 1
# crawling page all
target_crawl_num = 7
while cur_page <= target_crawl_num:
soup = BeautifulSoup(browser.page_source, 'html.parser')
# selecting main product list
pro_list = soup.select('div.main_prodlist.main_prodlist_list > ul.product_list > li')
# checkig product list
# print(pro_list)
# current page print
print('****** Current Page : {}'.format(cur_page), '******')
print()
for v in pro_list:
if not v.find('div', class_ = "ad_header"):
print(v.select('p.prod_name > a')[0].text.strip())
# print(v.select('a.thumb_link > img')[0]['src']) << if I using this code, I get error message 'indexError: list index out of range' why??
print(v.select('p.price_sect > a')[0].text.strip())
print()
print()
cur_page += 1
if cur_page > target_crawl_num:
print('Crawling Succeed')
break
del soup
# next page clike
# XPATH
WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH,'//*[@id="productListArea"]/div[5]/div/div/a[{}]'.format(cur_page)))).click()
# CSS_SELECTOR
# WebDriverWait(browser, 3).until(EC.presence_of_element_located((By.CSS_SELECTOR,'div.number_warp > a:nth-child[{}]'.format(cur_page)))).click()
# wait 3sec
time.sleep(3)
# close browser
browser.close()
When I operating this code, I succeess from page1 to page2. but, when the page2 finished, I get error message like this
Traceback (most recent call last):
File "C:\python_crawl\.vscode\section06-3.py", line 107, in <module>
WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH,'//*[@id="productListArea"]/div[5]/div/div/a[{}]'.format(cur_page)))).click()
File "C:\python_crawl\lib\site-packages\selenium\webdriver\support\wait.py", line 80, in until
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:
The terminal process "C:\WINDOWS\System32\WindowsPowerShell\v1.0\powershell.exe -Command python C:\python_crawl\.vscode\section06-3.py" terminated with exit code: 1.
Is there anything I can do?
Upvotes: 0
Views: 81
Reputation: 33361
The error means that Selenium is waiting for the element you specified there but it couldn't find it.
Possibly you are using wrong locator or maybe the element is on another page or inside iframe.
Upvotes: 1