Reputation: 41
Trust you are doing well. Please I need your help, I´m obtaining this error but I don´t know why:
File "C:\Users\Luis\Amazon\mercado\spiders\spider.py", line 14
yield scrapy.Request("https://www.amazon.es/s/ref=sr_pg_2?rh=n%3A1951051031%2Cn%3A2424922031%2Ck%3Afebi&page=1&keywords=febi&ie=UTF8&qid=1535314254",self.parse_item)
^IndentationError: expected an indented block
# -*- coding: utf-8 -*-
import scrapy
import urllib
from mercado.items import MercadoItem
class MercadoSpider(CrawlSpider):
name = 'mercado'
item_count = 0
allowed_domain = ['https://www.amazon.es']
start_urls = ['https://www.amazon.es/s/ref=sr_pg_2rh=n%3A1951051031%2Cn%3A2424922031%2Ck%3Afebi&page=1&keywords=febi&ie=UTF8&qid=1 535314254']
def start_requests(self):
yield scrapy.Request("https://www.amazon.es/s/ref=sr_pg_2?rh=n%3A1951051031%2Cn%3A2424922031%2Ck%3Afebi&page=1&keywords=febi&ie=UTF8&qid=1535314254",self.parse_item)
for i in range(2,400):
yield scrapy.Request("https://www.amazon.es/s/ref=sr_pg_2?rh=n%3A1951051031%2Cn%3A2424922031%2Ck%3Afebi&page="+str(i)+"&keywords=febi&ie=UTF8&qid=1535314254",self.parse_item)
def parse_item(self, response):
ml_item = MercadoItem()
#info de producto
ml_item['articulo'] = response.xpath('normalize-space(//*[@id="productTitle"])').extract()
ml_item['precio'] = response.xpath('normalize-space(//*[@id="priceblock_ourprice"])').extract()
self.item_count += 1
yield ml_item
Do you know why? I' ve added the code here to do it easily.
Upvotes: 0
Views: 127
Reputation: 10666
You have an indentation error:
# -*- coding: utf-8 -*-
import scrapy
import urllib
from mercado.items import MercadoItem
class MercadoSpider(CrawlSpider):
name = 'mercado'
item_count = 0
allowed_domain = ['https://www.amazon.es']
start_urls = ['https://www.amazon.es/s/ref=sr_pg_2rh=n%3A1951051031%2Cn%3A2424922031%2Ck%3Afebi&page=1&keywords=febi&ie=UTF8&qid=1 535314254']
def start_requests(self):
yield scrapy.Request("https://www.amazon.es/s/ref=sr_pg_2?rh=n%3A1951051031%2Cn%3A2424922031%2Ck%3Afebi&page=1&keywords=febi&ie=UTF8&qid=1535314254",self.parse_item)
for i in range(2,400):
yield scrapy.Request("https://www.amazon.es/s/ref=sr_pg_2?rh=n%3A1951051031%2Cn%3A2424922031%2Ck%3Afebi&page="+str(i)+"&keywords=febi&ie=UTF8&qid=1535314254",self.parse_item)
def parse_item(self, response):
ml_item = MercadoItem()
#info de producto
ml_item['articulo'] = response.xpath('normalize-space(//*[@id="productTitle"])').extract()
ml_item['precio'] = response.xpath('normalize-space(//*[@id="priceblock_ourprice"])').extract()
self.item_count += 1
yield ml_item
UPDATE But right now you have code (not optimal) to get pagination and parse details page. You need to add code to parse each pagination page and get detail link for each item:
def start_requests(self):
yield scrapy.Request("https://www.amazon.es/s/ref=sr_pg_2?rh=n%3A1951051031%2Cn%3A2424922031%2Ck%3Afebi&page=1&keywords=febi&ie=UTF8&qid=1535314254",self.parse_search)
for i in range(2,400):
yield scrapy.Request("https://www.amazon.es/s/ref=sr_pg_2?rh=n%3A1951051031%2Cn%3A2424922031%2Ck%3Afebi&page="+str(i)+"&keywords=febi&ie=UTF8&qid=1535314254",self.parse_search)
def parse_search(self, response):
for item_link in response.xpath('//ul[@id="s-results-list-atf"]//a[contains(@class, "s-access-detail-page")]/@href').extract():
yield scrapy.Request(item_link, self.parse_item)
def parse_item(self, response):
ml_item = MercadoItem()
#info de producto
ml_item['articulo'] = response.xpath('normalize-space(//*[@id="productTitle"])').extract()
ml_item['precio'] = response.xpath('normalize-space(//*[@id="priceblock_ourprice"])').extract()
self.item_count += 1
yield ml_item
Upvotes: 1