saraherceg
saraherceg

Reputation: 341

Grab all next pages using Selenium and Scrapy

I am trying to get all the "next pages", and continue scraping those pages as well, by clicking on the button "Next" at the bottom of the page, using Selenium. I would like to get all of them (pages 2,3,4 etc..). However, I am not sure what I am doing wrong here, but I am unable to get the 'click' option to work.

Here is my code:

import scrapy
import re
import math
from selenium import webdriver
import time

class PropertyFoxSpider(scrapy.Spider):
    name = 'property_fox'
    start_urls = [
        'https://propertyfox.co.za/listing-search?currentpage=1&term_id=62515&keywords=Western+Cape&orderby=createddate:desc&status%5B%5D=Active'
    ]


    def __init__(self):
        #path to driver
        self.driver = webdriver.Chrome('my_path_here')
    

    def parse(self, response):
        self.driver.get(response.url)
        for prop in response.css('div.property-item'):
            link = prop.css('a::attr(href)').get()
            banner = prop.css('div.property-figure-icon div::text').get()
            sold_tag = None
            if banner:
                banner = banner.strip()
                sold_tag = 'sold' if 'sold' in banner.lower() else None

            yield scrapy.Request(
                link,
                callback=self.parse_property,
                meta={'item': {
                    'agency': self.name,
                    'url': link,
                    'offering': 'buy',
                    'banners': banner,
                    'sold_tag':  sold_tag,
                }},
            )
        elem = self.driver.find_element_by_id('pagerNext')
        #elem = self.driver.find_element_by_xpath('//*[@id="pagerNext"]')
        elem.click()
        time.sleep(0.2)

     def parse_property(self, response):
         item = response.meta.get('item')
         . . .

Upvotes: 0

Views: 76

Answers (1)

JaSON
JaSON

Reputation: 4869

Try to wait until element is clickable:

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

elem = WebDriverWait(self.driver, 10).until(EC.element_to_be_clickable((By.ID, "pagerNext")))
elem.click()

Upvotes: 1

Related Questions