Reputation: 143
I'm coding a Scrapy project. I've tested everything, but when I parse a page it returns TypeError: Argument must be bytes or unicode, got 'list'
I've tested everything in the shell using this link. And I can't seem to find where it's having a problem. All of my shell commands returned only one item (i.e. there was no comma.)
Does anyone know why this might be the case?
from scrapy.spiders import Spider
from scrapy.selector import HtmlXPathSelector
from scrapy.loader import XPathItemLoader
from scrapy.loader.processors import Join, MapCompose
from scraper_app.items import Grailed
class GrailedSpider(Spider):
name = "grailed"
allowed_domains = ["grailed.com"]
base_url = "https://www.grailed.com/listings/"
start_urls = ["https://www.grailed.com/listings/100"]
for i in range(100, 150):
start_urls.append(base_url + str(i))
item_fields = {
'created': '//ul[@class = "horizontal-list listing-metadata-list clearfix"]/li[@class="horizontal-list-item listing-metadata-item"][1]/span[2]/text()',
'title_size': '//h1[@class = "designer"]/div/text()',
'original_price': '//ul[@class = "horizontal-list price-drops clearfix"]/li/text()',
'followers': '//div[@class = "listing-followers"]/p/text()',
'shipping_price': '//div[@class = "listing-shipping"]/p/text()',
'sellers_wardrobe': '//div[@class = "user-widget medium"]/a/text()',
'bought_and_sold': '//div[@class = "user-widget-bottom"]/p[@class= "bought-and-sold"]/text()[1]',
'feedback_score': '//div[@class = "green seller-score-top"]/text()[2]'
}
def parse(self, response):
selector = HtmlXPathSelector(response)
# iterate over urls
for url in selector.xpath(self.start_urls):
loader = XPathItemLoader(Grailed(), selector=url)
# define processors
loader.default_input_processor = MapCompose(unicode.strip)
loader.default_output_processor = Join()
# iterate over fields and add xpaths to the loader
for field, xpath in self.item_fields.iteritems():
loader.add_xpath(field, xpath)
yield loader.load_item()
The traceback shows
ERROR: Spider error processing <GET https://www.grailed.com/listings/144> (referer: None)
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scrapy/utils/defer.py", line 102, in iter_errback
yield next(it)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scrapy/spidermiddlewares/offsite.py", line 28, in process_spider_output
for x in result:
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scrapy/spidermiddlewares/referer.py", line 22, in <genexpr>
return (_set_referer(r) for r in result or ())
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scrapy/spidermiddlewares/urllength.py", line 37, in <genexpr>
return (r for r in result or () if _filter(r))
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scrapy/spidermiddlewares/depth.py", line 54, in <genexpr>
return (r for r in result or () if _filter(r))
File "/Users/phillipblack/Projects/scrape_workspace/grailed/scraper_app/spiders/grailed_spider.py", line 55, in parse
for url in selector.xpath(self.start_urls):
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scrapy/selector/unified.py", line 97, in xpath
smart_strings=self._lxml_smart_strings)
File "lxml.etree.pyx", line 1507, in lxml.etree._Element.xpath (src/lxml/lxml.etree.c:52198)
File "xpath.pxi", line 295, in lxml.etree.XPathElementEvaluator.__call__ (src/lxml/lxml.etree.c:151999)
File "apihelpers.pxi", line 1391, in lxml.etree._utf8 (src/lxml/lxml.etree.c:27100)
TypeError: Argument must be bytes or unicode, got 'list'
Upvotes: 1
Views: 4716
Reputation: 18799
The problem is in this line:
for url in selector.xpath(self.start_urls):
selector.xpath
should receive an string, with an xpath command. I see that you want to get urls, so maybe something like //a/@href
:
selector.xpath('//a/@href')
Upvotes: 1