Reputation:
the problem i am facing is weird and its so much waste of time
in theory this should give out the link
next_page = response.xpath('//ul[@class="pagination justify-content-center"]/li[6]/a/@href').get()
but the out put i got is http://moviful.com/browseNone
the "None" part should contain link "?page=2"
so after wasting hours of time i finally try this
next_page = response.xpath('//ul[@class="pagination justify-content-center"]/li[6]/a/@href').getall()
the output i got http://moviful.com/browse\['?page=2'\] where the heck [ ] are coming from? if you go to
'//ul[@class="pagination justify-content-center"]/li[6]/a/@href' there are no [] only ?page=2
so i try this
next_page = response.xpath('//ul[@class="pagination justify-content-center"]/li[6]/a/@href').extract()
and same output http://moviful.com/browse\['?page=2'\]
i finally try again
next_page = response.xpath('//ul[@class="pagination justify-content-center"]/li[6]/a/@href').get()
this time i got data from page "2" why?
what happened to http://moviful.com/browseNone why i am getting data this time ?
but it didnt move 3rd page just stops at 2nd page also why the value of "li" keep changing every page?
import scrapy
from scraper_api import ScraperAPIClient
client = ScraperAPIClient('API-KEY-HIDDEN')
class DbmoviesSpider(scrapy.Spider):
name = "browse2"
allowed_domains = ["moviful.com"]
def start_requests(self):
links=["https://moviful.com/browse?page=1"]
for link in links:
yield scrapy.Request(client.scrapyGet(url=link,render=True),callback=self.parse)
def parse(self, response):
movies = response.xpath('//div[@class="video-grid d-flex flex-wrap justify-content-around"]/div/div/div')
for movie in movies:
link = movie.xpath('.//a/@href').get()
full_links = f"http://moviful.com{link}"
price = movie.xpath('.//a/text()').get()
yield {'Movie Links':full_links,
'Price':price}
next_page = response.xpath('//ul[@class="pagination justify-content-center"]/li[6]/a/@href').getall()
link = f"http://moviful.com/browse{next_page}"
print(link)
if next_page:
yield scrapy.Request(url=link,callback=self.parse)
Upvotes: -2
Views: 42
Reputation: 104
import scrapy
from scraper_api import ScraperAPIClient
client = ScraperAPIClient('API-KEY-HIDDEN')
class DbmoviesSpider(scrapy.Spider):
name = "browse2"
allowed_domains = ["moviful.com"]
url = "https://moviful.com/browse?page={i}"
def start_requests(self):
for i in range(1, 210): # there are 209 pages on website
yield scrapy.Request(client.scrapyGet(url=self.url.format(i=i), render=True), callback=self.parse)
def parse(self, response):
movies = response.xpath('//div[@class="video-grid d-flex flex-wrap justify-content-around"]/div/div/div')
for movie in movies:
link = movie.xpath('.//a/@href').get()
full_links = f"http://moviful.com{link}"
price = movie.xpath('.//a/text()').get()
yield {
'Movie Links':full_links,
'Price':price
}
Upvotes: 0