naman jogani
naman jogani

Reputation: 3

TypeError: Cannot mix str and non-str arguments

from scrapy import Spider
from scrapy.http import Request


class CourseSpider(Spider):
    name = 'course'
    allowed_domains = ['coursera.org']
    start_urls = ['https://coursera.org/about/partners']

    def parse(self, response):
        listings = response.xpath('//div[@class="rc-PartnerBox vertical-box"]')
        for listing in listings:
            title = listing.xpath('.//div[@class="partner-box-wrapper card-one-clicker flex-1"]/p').extract_first()
            relative_url = listing.xpath('.//a/@href').extract_first()
            absolute_url = response.urljoin(relative_url)

            yield Request(response.urljoin(relative_url), callback = self.parse_listing,meta={'title':title,'absolute_url':absolute_url})

    def parse_listing(self,response):
        titles = response.meta.get('title')
        absolute_url = response.meta.get('absolute_url')
        titles_course =  response.xpath('//div[@class="name headline-1-text"]/text()').extract()
        url_link = response.xpath('//div[@class="rc-Course"]/a/@href').extract()
        abs_url = response.urljoin(url_link)

        yield {'title':title,
        'titles':title,
        'absolute_url':absolute_url,
        'titles_course':titles_course,
        'abs_url':abs_url}

However, upon running the script through the cmd. I am getting errors. These errors suggest that I cannot mix str and non-str arguments and I am confused over how to deal with this problem. Any help would be appreciated.

Traceback (most recent call last):
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\utils\defer.py", line 117, in iter_errback
    yield next(it)
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\utils\python.py", line 345, in __next__
    return next(self.data)
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\utils\python.py", line 345, in __next__
    return next(self.data)
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 64, in _evaluate_iterable
    for r in iterable:
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 64, in _evaluate_iterable
    for r in iterable:
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 338, in <genexpr>
    return (_set_referer(r) for r in result or ())
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 64, in _evaluate_iterable
    for r in iterable:
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in <genexpr>
    return (r for r in result or () if _filter(r))
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 64, in _evaluate_iterable
    for r in iterable:
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 58, in <genexpr>
    return (r for r in result or () if _filter(r))
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 64, in _evaluate_iterable
    for r in iterable:
  File "C:\Users\Naman Jogani\Desktop\Udemy\udemy\spiders\course.py", line 28, in parse_listing
    yield {'title':title,
NameError: name 'title' is not defined
2020-08-05 00:08:48 [scrapy.core.scraper] ERROR: Spider error processing <GET https://www.coursera.org/checkpoint> (referer: https://www.coursera.org/about/partners)
Traceback (most recent call last):
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\utils\defer.py", line 117, in iter_errback
    yield next(it)
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\utils\python.py", line 345, in __next__
    return next(self.data)
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\utils\python.py", line 345, in __next__
    return next(self.data)
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 64, in _evaluate_iterable
    for r in iterable:
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 64, in _evaluate_iterable
    for r in iterable:
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 338, in <genexpr>
    return (_set_referer(r) for r in result or ())
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 64, in _evaluate_iterable
    for r in iterable:
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in <genexpr>
    return (r for r in result or () if _filter(r))
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 64, in _evaluate_iterable
    for r in iterable:
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 58, in <genexpr>
    return (r for r in result or () if _filter(r))
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 64, in _evaluate_iterable
    for r in iterable:
  File "C:\Users\Naman Jogani\Desktop\Udemy\udemy\spiders\course.py", line 26, in parse_listing
    abs_url = response.urljoin(url_link)
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\http\response\text.py", line 80, in urljoin
    return urljoin(get_base_url(self), url)
  File "c:\users\naman jogani\anaconda3\lib\urllib\parse.py", line 504, in urljoin
    base, url, _coerce_result = _coerce_args(base, url)
  File "c:\users\naman jogani\anaconda3\lib\urllib\parse.py", line 120, in _coerce_args
    raise TypeError("Cannot mix str and non-str arguments")
TypeError: Cannot mix str and non-str arguments
2020-08-05 00:08:48 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.coursera.org/casewesternreserve> (referer: https://www.coursera.org/about/partners)
2020-08-05 00:08:48 [scrapy.core.scraper] ERROR: Spider error processing <GET https://www.coursera.org/casewesternreserve> (referer: https://www.coursera.org/about/partners)
Traceback (most recent call last):
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\utils\defer.py", line 117, in iter_errback
    yield next(it)
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\utils\python.py", line 345, in __next__
    return next(self.data)
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\utils\python.py", line 345, in __next__
    return next(self.data)
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 64, in _evaluate_iterable
    for r in iterable:
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output
    for x in result:
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 64, in _evaluate_iterable
    for r in iterable:
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 338, in <genexpr>
    return (_set_referer(r) for r in result or ())
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 64, in _evaluate_iterable
    for r in iterable:
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in <genexpr>
    return (r for r in result or () if _filter(r))
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 64, in _evaluate_iterable
    for r in iterable:
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 58, in <genexpr>
    return (r for r in result or () if _filter(r))
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\core\spidermw.py", line 64, in _evaluate_iterable
    for r in iterable:
  File "C:\Users\Naman Jogani\Desktop\Udemy\udemy\spiders\course.py", line 26, in parse_listing
    abs_url = response.urljoin(url_link)
  File "c:\users\naman jogani\anaconda3\lib\site-packages\scrapy\http\response\text.py", line 80, in urljoin
    return urljoin(get_base_url(self), url)
  File "c:\users\naman jogani\anaconda3\lib\urllib\parse.py", line 504, in urljoin
    base, url, _coerce_result = _coerce_args(base, url)
  File "c:\users\naman jogani\anaconda3\lib\urllib\parse.py", line 120, in _coerce_args
    raise TypeError("Cannot mix str and non-str arguments")
TypeError: Cannot mix str and non-str arguments
2020-08-05 00:08:48 [scrapy.core.engine] INFO: Closing spider (finished)

I tried adding the extract() function since it was mentioned on some previous stackoverflow question on the listings container to get rid of that error but then my xpath is not getting the desired output.

Upvotes: 0

Views: 1693

Answers (1)

mdaniel
mdaniel

Reputation: 33223

You are looking for .extract_first() or its new name .get(), because .extract() produces a list, which one cannot use in .urljoin

Upvotes: 1

Related Questions