Reputation: 43
I'm using selenium in python to webscrape information from a website, but I'm running into a problem, that after I click on the website to get more rows from a table, the rows that appear have a hidden-xs hidden-sm
and I can't seem to find a way to get those elements. My code is below. Is there any way you can help me?
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
import time
import pandas as pd
flight_Code=[]
Date=[]
Departure=[]
Arrival=[]
aircraft_code=[]
Code=["ph-bfy"]
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'}
chrome_path= "C:/Users/hugol/Documents/chromedriver.exe"
chrome_options=Options()
#chrome_options.add_argument({'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'})
chrome_options.add_argument("--no-sandbox")
driver=webdriver.Chrome(chrome_path, options=chrome_options)
url="https://www.flightradar24.com/"
driver.get(url)
login_button=WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.ID, 'premiumOverlay')))
login_button.click()
username=WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.ID, 'fr24_SignInEmail')))
username.send_keys(*******)
password=WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.ID, 'fr24_SignInPassword')))
password.send_keys(*******)
login_button=WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.ID, 'fr24_SignIn')))
login_button.click()
time.sleep(2)
for i in Code:
new_url="https://www.flightradar24.com/data/aircraft/"+i
driver.get(new_url)
more_button=WebDriverWait(driver, 1).until(EC.presence_of_element_located((By.ID, 'btn-load-earlier-flights')))
more_button.click()
# WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.ID, 'tbl-datatable')))
for row in driver.find_elements_by_class_name("data-row"):
try:
flight_code=row.find_element_by_class_name("fbold").text
except NoSuchElementException:
flight_code=''
try:
flight_date=row.find_element_by_class_name("row").text
except NoSuchElementException:
flight_date=''
try:
flight_departure=row.find_elements_by_class_name("details")[4].text
except NoSuchElementException:
flight_departure=''
try:
flight_arrival=row.find_elements_by_class_name("details")[3].text
except NoSuchElementException:
flight_arrival=''
flight_Code.append(flight_code)
Date.append(flight_date)
Departure.append(flight_departure)
Arrival.append(flight_arrival)
aircraft_code.append(i)
df=pd.DataFrame({'Code': flight_Code,'Date': Date, 'Departure': Departure, 'Arrival': Arrival, 'Aircraft':aircraft_code})
And the website html looks like this:
Thanks guys!!!
Upvotes: 2
Views: 3525
Reputation: 33384
Instead of element.text
use element.get_attribute("textContent")
flight_code=row.find_element_by_class_name("fbold").get_attribute("textContent")
Update:
After click on more button you need to wait for element to be visible.Use Explicit wait.
WebDriverWait(driver,10).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR,".data-row")))
Upvotes: 8