Reputation: 117
import scrapy
from selenium import webdriver
class QuotesSpider(scrapy.Spider):
name = "quotes"
start_urls = [
'http://ozhat-turkiye.com/en/brands/a',
]
I want to click a link
def __init__(self):
self.drivers = webdriver.Firefox('C:/Program Files (x86)\Mozilla Firefox')
I want to click a link
def parse(self, response):
for title in response.css('div.tabledivinlineblock a.tablelink50::attr(href)').extract():
yield {'title': title,
'response': response.url
}
# i want to click this a tag
next = self.driver.find_element_by_xpath('//*[@id="maincontent_DataPager"]/a[last()]')
# follow pagination links
# for href in response.css('span#maincontent_DataPager a:last-child'):
#
# yield response.follow(href, self.parse)
next_page = response.css('span#maincontent_DataPager a:last-child::attr(href)').extract_first().strip()
if next_page is not None:
yield response.follow(next_page, callback=self.parse)
Upvotes: 2
Views: 3476
Reputation: 22440
The following script should fetch you the required items exhausting all the clicks connected to next page link. You can't use here response.follow()
as there is no link to follow other than clicking on it.
import time
import scrapy
from selenium import webdriver
class QuotesSpider(scrapy.Spider):
name = "quotes"
start_urls = [
'http://ozhat-turkiye.com/en/brands/a',
]
def __init__(self):
self.driver = webdriver.Firefox()
def parse(self, response):
self.driver.get(response.url)
while True:
time.sleep(5)
for title in self.driver.find_elements_by_css_selector('div.tabledivinlineblock a.tablelink50'):
yield {'title': title.text,'response': response.url}
try:
self.driver.find_element_by_css_selector('span#maincontent_DataPager a:last-child').click()
except Exception: break
I used harcoded wait within the script which is not recommended at all. You should replace the same with Explicit Wait
.
Upvotes: 1