Reputation: 133
All the data in is populated from the first table. I cannot move to the next div and get the data of the td
for each tr
.
The site: https://asd.com/page/
Below is the code that I have written.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
my_url= "https://asd.com/page/asd"
driver.get(my_url)
boxes = driver.find_elements(By.CLASS_NAME, "col-md-4")
companies = []
company = {}
for box in boxes:
header = box.find_element(By.CLASS_NAME,"text-primary.text-uppercase")
company['name']= header.text
td= box
company['Type']= td.find_element(By.XPATH,"//div/div/div/table/tbody/tr[1]/td").text
company['Capital']= td.find_element(By.XPATH,"//div/div/div/table/tbody/tr[2]/td").text
company['Address'] = td.find_element(By.XPATH,"//div/div/div/table/tbody/tr[3]/td").text
company['Owner'] = td.find_element(By.XPATH,"//div/div/div/table/tbody/tr[4]/td").text
company['Co-Owner'] = td.find_element(By.XPATH,"//div/div/div/table/tbody/tr[5]/td").text
company['Duration'] = td.find_element(By.XPATH,"//div/div/div/table/tbody/tr[6]/td").text
company['Place'] = td.find_element(By.XPATH,"//div/div/div/table/tbody/tr[7]/td").text
company['Company ID'] = td.find_element(By.XPATH,"//div/div/div/table/tbody/tr[8]/td").text
companies.append(company)
print(company)
Upvotes: 1
Views: 230
Reputation: 33361
There are several issues here:
driver.get(my_url)
and boxes = driver.find_elements(By.CLASS_NAME, "col-md-4")
to let the elements loaded before getting the list of all of them.text-primary.text-uppercase
is actually 2 class names: text-primary
and text-uppercase
so you should use XPATH or CSS_SELECTOR to locate element by 2 class names, not by CLASS_NAME
..
//div/div/div/table/tbody/tr[1]/td
are absolute while they should be calculated based on the parent box
element.td
element, you can use the existing box
element here.//div/div/div/table/tbody/tr[1]/td
can and should be improved.company = {}
should be defined inside the loop.from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
my_url= "https://monentreprise.bj/page/annonces"
driver.get(my_url)
wait = WebDriverWait(driver, 20)
actions = ActionChains(driver)
wait.until(EC.presence_of_element_located((By.CLASS_NAME, "col-md-4")))
time.sleep(2)
boxes = driver.find_elements(By.CLASS_NAME, "col-md-4")
companies = []
for box in boxes:
actions.move_to_element(box).perform()
time.sleep(0.3)
company = {}
header = box.find_element(By.XPATH,".//h5[@class='text-primary text-uppercase']")
company['name']= header.text
company['Objet']= box.find_element(By.XPATH,".//tr[1]/td").text
company['Capital']= box.find_element(By.XPATH,".//tr[2]/td").text
company['Siège Social'] = box.find_element(By.XPATH,".//tr[3]/td").text
company['Gérant'] = box.find_element(By.XPATH,".//tr[4]/td").text
company['Co-Gérant'] = box.find_element(By.XPATH,".//tr[5]/td").text
company['Durée'] = box.find_element(By.XPATH,".//tr[6]/td").text
company['Dépôt'] = box.find_element(By.XPATH,".//tr[7]/td").text
company['Immatriculation RCCM'] = box.find_element(By.XPATH,".//tr[8]/td").text
companies.append(company)
print(company)
Upvotes: 1