박성은
박성은

Reputation: 1

When I practice crawling, I get this error message 'selenium.common.exceptions.TimeoutException: Message:'

This is my crawling practice code.

from selenium import webdriver
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC 
from selenium.webdriver.chrome.options import Options 
from bs4 import BeautifulSoup

chrome_options = Options()
chrome_options.add_argument("--headless")
browser = webdriver.Chrome('C:/chromedriver_win32/chromedriver')
browser.implicitly_wait(5)
browser.set_window_size(1024, 768) # maximize_window(), minimize
browser.get('http://prod.danawa.com/list/?cate=112758&15main_11_02')

WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH,'//*[@id="dlMaker_simple"]/dd/div[2]/button[1]'))).click()

WebDriverWait(browser, 3).until(EC.presence_of_element_located((By.XPATH,'//*[@id="selectMaker_simple_priceCompare_A"]/li[15]/label'))).click()

time.sleep(2)

# current page
cur_page = 1
# crawling page all 
target_crawl_num = 7 

while cur_page <= target_crawl_num:
    soup = BeautifulSoup(browser.page_source, 'html.parser')

    # selecting main product list 
    pro_list = soup.select('div.main_prodlist.main_prodlist_list > ul.product_list > li')

    # checkig product list 
    # print(pro_list)

    # current page print 
    print('****** Current Page : {}'.format(cur_page), '******')
    print()
    for v in pro_list:
        if not v.find('div', class_ = "ad_header"):
    
            print(v.select('p.prod_name > a')[0].text.strip()) 
            # print(v.select('a.thumb_link > img')[0]['src']) << if I using this code, I get error message 'indexError: list index out of range' why??
            print(v.select('p.price_sect > a')[0].text.strip())
        print()
    print() 
    cur_page += 1 

    if cur_page > target_crawl_num: 
        print('Crawling Succeed')
        break
    
    del soup

    # next page clike
    # XPATH 
    WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH,'//*[@id="productListArea"]/div[5]/div/div/a[{}]'.format(cur_page)))).click()
    # CSS_SELECTOR 
    # WebDriverWait(browser, 3).until(EC.presence_of_element_located((By.CSS_SELECTOR,'div.number_warp > a:nth-child[{}]'.format(cur_page)))).click()

    # wait 3sec
    time.sleep(3)

# close browser 
browser.close()

When I operating this code, I succeess from page1 to page2. but, when the page2 finished, I get error message like this

Traceback (most recent call last):
  File "C:\python_crawl\.vscode\section06-3.py", line 107, in <module>
    WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH,'//*[@id="productListArea"]/div[5]/div/div/a[{}]'.format(cur_page)))).click()        
  File "C:\python_crawl\lib\site-packages\selenium\webdriver\support\wait.py", line 80, in until
    raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:

The terminal process "C:\WINDOWS\System32\WindowsPowerShell\v1.0\powershell.exe -Command python C:\python_crawl\.vscode\section06-3.py" terminated with exit code: 1.

Is there anything I can do?

Upvotes: 0

Views: 81

Answers (1)

Prophet
Prophet

Reputation: 33361

The error means that Selenium is waiting for the element you specified there but it couldn't find it.
Possibly you are using wrong locator or maybe the element is on another page or inside iframe.

Upvotes: 1

Related Questions