Zero
Zero

Reputation: 15

Scrapy didn't return result

I have this code:

import scrapy

class AstroSpider(scrapy.Spider):
    name = "Astro"
    allowed_domains = ['www.astrolighting.com']
    start_urls = ['https://www.astrolighting.com/products']



def parse(self, response, **kwargs):
    for link in response.css('article.product-listing-item a::attr(href)'):
        yield response.follow(link.get(), callback=self.parse_items)

def parse_items(self, response):
    
    for link in response.css('div.variants.variants--large a::attr(href)'):
        yield response.follow(link.get(), callback=self.parse_item)

def parse_item(self, response):
    print(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!")
    yield {
        'name': response.css('div.detail__right h1::text').get(),
        'material': response.css('div.detail__right p span::text').getall()[0],
        'id': response.css('div.detail__right p span::text').getall()[1].strip()
    }

So, the results of parsing is just empty. Why? It seems like function "parse_item" never evaluated

Upvotes: 1

Views: 59

Answers (1)

Ahmed Ellaban
Ahmed Ellaban

Reputation: 156

I didn't test your code but according to @alexpdev you need to pass dont_filter to True


class AstroSpider(scrapy.Spider):
    name = "Astro"
    allowed_domains = ['www.astrolighting.com']
    start_urls = ['https://www.astrolighting.com/products']



def parse(self, response, **kwargs):
    for link in response.css('article.product-listing-item a::attr(href)'):
        yield response.follow(link.get(), callback=self.parse_items,dont_filter=True)

def parse_items(self, response):
    
    for link in response.css('div.variants.variants--large a::attr(href)'):
        yield response.follow(link.get(), callback=self.parse_item,dont_filter=True)

def parse_item(self, response):
    print(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!")
    yield {
        'name': response.css('div.detail__right h1::text').get(),
        'material': response.css('div.detail__right p span::text').getall()[0],
        'id': response.css('div.detail__right p span::text').getall()[1].strip()
    }

but I suggest checking parse_items links by

print(response.css('div.variants.variants--large a::attr(href)').getall())

Upvotes: 1

Related Questions