Reputation: 25
I have this link that I have to scrape parts from: https://partsurfer.hp.com.
To get to each part I first need to cycle through a series of clicks to make the elements visible. An example is the image below:
My code, however gets stuck after going through one cycle. I cannot seem to get it to go through several times. Please take a look at my code. I would appreciate any ideas to assist me write the loop properly.
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from time import sleep
options1 = webdriver.ChromeOptions()
options1.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(executable_path="~~chromedriver.exe", options=options1)
url = 'https://partsurfer.hp.com'
driver.get(url)
sleep(5)
# deal with popup
driver.find_element_by_xpath("//button[@id='onetrust-accept-btn-handler']").click()
# select country and reload the page
dropdowns = Select(driver.find_element_by_id('ctl00_BodyContentPlaceHolder_ddlCountry'))
dropdowns.select_by_value('ZA')
driver.refresh()
sleep(5)
# click hierarchy and cycle through the list to the parts
hierarchy_click = driver.find_element_by_xpath("//a[@id='ctl00_BodyContentPlaceHolder_aHierarchy']/span[@class='ie_bg']/span").click()
sleep(5)
category_list = driver.find_elements_by_xpath("//table[@width='650']/tbody/tr/td/a")
for a in category_list:
breadcrumb1 = a.text
print(breadcrumb1)
a.click()
sleep(2)
series_list = driver.find_elements_by_xpath("//div[@id='ctl00_BodyContentPlaceHolder_HierarchyTreen1Nodes']/table/tbody/tr//a")
for b in series_list:
breadcrumb2 = b.text
print(breadcrumb2)
b.click()
sleep(2)
series_2 = driver.find_elements_by_xpath("//div[@id='ctl00_BodyContentPlaceHolder_HierarchyTreen2Nodes']/table/tbody/tr//a")
for c in series_2:
breadcrumb3 = c.text
print(breadcrumb3)
c.click()
sleep(2)
series_3 = driver.find_elements_by_xpath("//div[@id='ctl00_BodyContentPlaceHolder_HierarchyTreen3Nodes']/table/tbody/tr//a")
for d in series_3:
breadcrumb4 = d.text
print(breadcrumb4)
d.click()
sleep(2)
series_4 = driver.find_elements_by_xpath("//div[@id='ctl00_BodyContentPlaceHolder_HierarchyTreen4Nodes']/table/tbody/tr//a")
for e in series_4:
breadcrumb5 = e.text
print(breadcrumb5)
e.click()
sleep(2)
models = driver.find_elements_by_xpath("//table[@class='table_sortable']/tbody//a")
for model in models:
model_num = model.text
print(model_num)
model.click()
sleep(5)
# model number = //span[@id='ctl00_BodyContentPlaceHolder_lblProductNumber']
table_rows = driver.find_elements_by_xpath("//div[@id='ctl00_BodyContentPlaceHolder_dvProdinfo']/table/tbody/tr")
for row in table_rows:
print(row.text)
My code isn't complete, so would love to get any inputs to improve its efficiency.
Upvotes: 0
Views: 466
Reputation: 9969
wait=WebDriverWait(driver,10)
driver.get('https://partsurfer.hp.com')
# deal with popup
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"button#onetrust-accept-btn-handler"))).click()
# select country and reload the page
dropdowns = Select(driver.find_element_by_id('ctl00_BodyContentPlaceHolder_ddlCountry'))
dropdowns.select_by_value('ZA')
# click hierarchy and cycle through the list to the parts
wait.until(EC.element_to_be_clickable((By.XPATH,"//a[@id='ctl00_BodyContentPlaceHolder_aHierarchy']/span[@class='ie_bg']/span"))).click()
category_list = driver.find_elements_by_xpath("//table[@width='650']/tbody/tr/td/a")
for i in range(1,len(category_list)):
wait.until(EC.element_to_be_clickable((By.XPATH,"(//table[@width='650']/tbody/tr/td/a)["+str(i)+"]"))).click()
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#ctl00_BodyContentPlaceHolder_HierarchyTreet0"))).click()
Here's a small demo to go through the top level. You want to use the Top Hierarchy to reset yourself.
Imports:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
Upvotes: 1