Reputation: 7
I want to extract the name, website, phone, and email of every company on the site but the code keeps printing the first company name on the page over and over and crashs if I try to find the website, phone, and email.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
url='https://www.dmcc.ae/business-search?directory=1&submissionGuid=2c8df029-a92e-4b5d-a014-7ef9948e664b'
driver = webdriver.Firefox()
driver.get(url)
wait=WebDriverWait(driver,50)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#hs-eu-confirmation-button"))).click()
wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,'#pym-0 > iframe')))
list=wait.until(EC.visibility_of_all_elements_located((By.CLASS_NAME,'searched-list ')))
button = wait.until(EC.element_to_be_clickable((By.XPATH,'./html/body/div[5]/div/ul/li[13]/a')))
numOfPages=1161
counter=4
for i in range(numOfPages):
driver.execute_script("arguments[0].scrollIntoView();", button)
for e in list:
name = e.find_element_by_xpath('/html/body/div[3]/div/div/div[1]/div/div[1]/div/div[1]/h4').text
print(name)
website = e.find_element_by_xpath('/html/body/div[3]/div/div/div[1]/div/div[9]/div/div[2]/div[2]/table/tbody/tr[1]/td[2]/a').text
print(website)
phone = e.find_element_by_xpath('/html/body/div[3]/div/div/div[1]/div/div[9]/div/div[2]/div[2]/table/tbody/tr[2]/td[2]/a').text
print(phone)
email = e.find_element_by_xpath('/html/body/div[3]/div/div/div[1]/div/div[9]/div/div[2]/div[2]/table/tbody/tr[3]/td[2]/a').text
print(email)
time.sleep(counter)
button.click()
list=wait.until(EC.visibility_of_all_elements_located((By.CLASS_NAME,'searched-list ')))
if i%40==0:
counter+=1
my problem is with these lines of code
list=wait.until(EC.visibility_of_all_elements_located((By.CLASS_NAME,'searched-list ')))
for e in list:
name = e.find_element_by_xpath('/html/body/div[3]/div/div/div[1]/div/div[1]/div/div[1]/h4').text
print(name)
website = e.find_element_by_xpath('/html/body/div[3]/div/div/div[1]/div/div[9]/div/div[2]/div[2]/table/tbody/tr[1]/td[2]/a').text
print(website)
phone = e.find_element_by_xpath('/html/body/div[3]/div/div/div[1]/div/div[9]/div/div[2]/div[2]/table/tbody/tr[2]/td[2]/a').text
print(phone)
email = e.find_element_by_xpath('/html/body/div[3]/div/div/div[1]/div/div[9]/div/div[2]/div[2]/table/tbody/tr[3]/td[2]/a').text
print(email)
Upvotes: 0
Views: 89
Reputation: 668
i would you suggest to use other functions of finding elements to make your code more readable. I've made a couple of changes in your code, hope that helps you to get the data:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
url = "https://www.dmcc.ae/business-search?directory=1&submissionGuid=2c8df029-a92e-4b5d-a014-7ef9948e664b"
driver = webdriver.Firefox()
driver.get(url)
wait = WebDriverWait(driver, 50)
wait.until(
EC.element_to_be_clickable((By.CSS_SELECTOR, "#hs-eu-confirmation-button"))
).click()
wait.until(
EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "#pym-0 > iframe"))
)
list = wait.until(EC.visibility_of_all_elements_located((By.CLASS_NAME, "list-title ")))
button = wait.until(
EC.element_to_be_clickable(
(By.CSS_SELECTOR, "a[ng-click='setPage(pager.currentPage + 1)']")
)
)
counter = 4
def getText(element):
text = element.text
if not text:
text = "---"
return text
def getContactInfo(parent):
element = None
try:
element = parent.find_element_by_class_name("contact-info")
except:
pass
return element
while (
# Last Page has disabled the li element
not "disabled"
in driver.find_element_by_css_selector(
"li[ng-class='{disabled:pager.currentPage === pager.totalPages}']"
)
.get_attribute("class")
.split()
):
driver.execute_script("arguments[0].scrollIntoView();", button)
for e in list:
name = e.find_element_by_tag_name("h4")
print(getText(name))
account_info = e.find_element_by_css_selector(
"div.account-Info.large-12.columns.ng-scope"
)
contact_info = getContactInfo(account_info)
if contact_info:
website = contact_info.find_element_by_css_selector(
"a.website.ng-binding.ng-scope"
)
print(getText(website))
phone = contact_info.find_element_by_css_selector("a.telephone.ng-binding")
print(getText(phone))
email = contact_info.find_element_by_css_selector("a.emailid.ng-binding")
print(getText(email))
print("*******\n")
button.click()
time.sleep(counter)
list = wait.until(
EC.visibility_of_all_elements_located((By.CLASS_NAME, "list-title "))
)
driver.quit()
Upvotes: 1