Reputation: 341
I have this block of code that works fine when I use Chrome, however, when I want to switch to headless Chrome, I get selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
and it does not return anything. Any ideas why?
Code:
class IOLSpider(scrapy.Spider):
name = 'iol'
start_urls = [
'http://www.iolproperty.co.za/all_properties/For_Sale/Western_Cape',
'http://www.iolproperty.co.za/all_properties/Rentals/Western_Cape',
]
def __init__(self):
#path to driver
chrome_options = webdriver.ChromeOptions()
chrome_options.headless = True
self.driver = webdriver.Chrome('/path/chromedriver',chrome_options=chrome_options)
def parse(self, response):
. . .
def parse_area(self, response):
. . .
def parse_property(self,response):
#the link that comes here is the link of property, like this one
#https://www.iolproperty.co.za/view-property.jsp?PID=2000026825
item = response.meta.get('item')
self.driver.get(response.url)
self.driver.current_url
self.driver.execute_script("document.getElementById('footcont').setAttribute('hidden', true)")
elem = WebDriverWait(self.driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//a[text()="Show email address"]')))
elem.click()
contact_email=self.driver.find_element_by_xpath('//span[@id="viewagmail" and @style="display: block;"]/a').text
Upvotes: 0
Views: 603
Reputation: 1432
The site you are trying to scrape is detecting that you are using a headless browser and not playing nice. This has worked for me in the past, but you might have to adjust based on your specific needs.
url = "https://www.iolproperty.co.za/view-property.jsp?PID=2000026825"
options = Options()
options.add_argument('--no-sandbox')
options.add_argument("--headless")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
with webdriver.Chrome(options=options) as driver:
driver.execute_cdp_cmd('Network.setUserAgentOverride', {
"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36'})
driver.get(url)
driver.execute_script("document.getElementById('footcont').setAttribute('hidden', true)")
elem = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//a[text()="Show email address"]')))
elem.click() # This can be combined with the webdriver wait line, but isn't necessary.
contact_email = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//span[@id="viewagmail" and @style="display: block;"]/a[starts-with(@href, "mailto")]'))).text
Upvotes: 1