Reputation: 311
Hello I'm trying to scrap some info from t he following page: http://verify.sos.ga.gov/verification/
My code is the following:
import sys
reload(sys)
sys.setdefaultencoding('utf8')
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
import time
import csv
url = 'http://verify.sos.ga.gov/verification/'
def init_Selenium():
global driver
driver = webdriver.Chrome("/Users/rodrigopeniche/Downloads/chromedriver")
driver.get(url)
def select_profession():
select = Select(driver.find_element_by_name('t_web_lookup__profession_name'))
options = select.options
for index in range(1, len(options) - 1):
select = Select(driver.find_element_by_name('t_web_lookup__profession_name'))
select.select_by_index(index)
select_license_type()
def select_license_type():
select = Select(driver.find_element_by_name('t_web_lookup__license_type_name'))
options = select.options
for index in range(1, len(options) - 1):
select = Select(driver.find_element_by_name('t_web_lookup__license_type_name'))
select.select_by_index(index)
search_button = driver.find_element_by_id('sch_button')
driver.execute_script('arguments[0].click();', search_button)
scrap_licenses_results()
def scrap_licenses_results():
table_rows = driver.find_elements_by_tag_name('tr')
for index, row in enumerate(table_rows):
if index < 9:
continue
else:
attributes = row.find_elements_by_xpath('td')
try:
name = attributes[0].text
license_number = attributes[1].text
profession = attributes[2].text
license_type = attributes[3].text
status = attributes[4].text
address = attributes[5].text
license_details_page_link = attributes[0].find_element_by_id('datagrid_results__ctl3_name').get_attribute('href')
driver.get(license_details_page_link)
data_rows = driver.find_elements_by_class_name('rdata')
issued_date = data_rows[len(data_rows) - 3].text
expiration_date = data_rows[len(data_rows) - 2].text
last_renewal_day = data_rows[len(data_rows) - 1].text
print name, license_number, profession, license_type, status, address, issued_date, expiration_date, last_renewal_day
driver.back()
except:
pass
init_Selenium()
select_profession()
When I execute the script it works for the first iteration but fails in the second one. The exact place where the error is raised is in the scrap_licenses_results() function, in the attributes = row.find_elements_by_xpath('td')
line.
Any help will be appreciated
Upvotes: 0
Views: 260
Reputation: 1804
The staleElementReferenceException is due to the list of rows gathered before loop iteration. Initially, You created a list of all rows ,named table_rows.
table_rows = driver.find_elements_by_tag_name('tr')
Now in loop, during first iteration, your first row element is fresh and can be found by the driver. At the end of first iteration, you are doing driver.back()
, your page changes/refreshes HTML DOM . All the previously gathered references are lost now. All the rows in your table_rows list are now stale. Hence, in 2nd iteration you are facing such exception.
You have to move the find row operation in the loop, so that everytime a fresh reference is found on target application. The psuedocode shall do Something like this.
total_rows = driver.find_elements_by_tag_name('tr').length()
for i in total_rows
driver.find_element_by_xpath('//tr[i]')
.. further code..
Upvotes: 1