Reputation: 61
import scrapy
class ExampleSpider(scrapy.Spider):
name = "example"
allowed_domains = ["dmoz.org"]
start_urls = [
"http://www.dmoz.org/Computers/Programming/Languages/Python/Books/",
"http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
]
def parse(self, response):
for sel in response.xpath('//ul/li'):
title = sel.xpath('a/text()').extract()
link = sel.xpath('a/@href').extract()
desc = sel.xpath('text()').extract()
print title, link, desc
However, when I try to call up the spider, I get the following error message:
[example] ERROR: Spider error processing <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Books/>
Traceback (most recent call last):
File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/twisted/internet/base.py", line 1178, in mainLoop
self.runUntilCurrent()
File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/twisted/internet/base.py", line 800, in runUntilCurrent
call.func(*call.args, **call.kw)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/twisted/internet/defer.py", line 368, in callback
self._startRunCallbacks(result)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/twisted/internet/defer.py", line 464, in _startRunCallbacks
self._runCallbacks()
--- <exception caught here> ---
File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/twisted/internet/defer.py", line 551, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "/Users/andy2/Documents/Python/tutorial/tutorial/spiders/example.py", line 18, in parse
print title, link, desc
exceptions.NameError: global name 'link' is not defined
Is there anything I can do to make this code work?
Can anyone help me?
Thank you!!!
Upvotes: 3
Views: 439
Reputation: 473753
You need to instantiate a Selector
and pass response
as an argument. Also, your imports are not correct. Here's the fixed version of the spider:
from scrapy.selector import Selector
from scrapy.spider import Spider
class ExampleSpider(Spider):
name = "example"
allowed_domains = ["dmoz.org"]
start_urls = [
"http://www.dmoz.org/Computers/Programming/Languages/Python/Books/",
"http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
]
def parse(self, response):
sel = Selector(response)
for li in sel.xpath('//ul/li'):
title = li.xpath('a/text()').extract()
link = li.xpath('a/@href').extract()
desc = li.xpath('text()').extract()
print title, link, desc
Upvotes: 3