Reputation: 13
def parse():
html = get_html(URL)
if html.status_code == 200:
phones = []
pages_count = pages(html.text)
for page in range(1, pages_count + 1):
print(f'Parsing a page {page} from {pages_count}...')
html = get_html(URL, params={'p': page})
phones.extend(get_content(html.text))
print(phones)
else:
print('Error')
Hi, I want to list items, but I get an error
File "C:/Users/User/PycharmProjects/Parser/parser.py", line 52, in <module>
parse()
File "C:/Users/User/PycharmProjects/Parser/parser.py", line 46, in parse
phones.extend(get_content(html.text))
TypeError: 'NoneType' object is not iterab
This is all the code:
import requests
from bs4 import BeautifulSoup
URL = 'https://comfy.ua/smartfon/'
HEADERS = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:71.0) Gecko/20100101 Firefox/71.0',
'accept': '*/*'}
def get_html(url, params=None):
r = requests.get(url, headers=HEADERS, params=params)
return r
def pages(html):
soup = BeautifulSoup(html, 'html.parser')
pagination = soup.find_all('li', class_='pager__number')
if pagination:
return int(pagination[-2].get_text())
else:
return 1
def get_content(html):
soup = BeautifulSoup(html, 'html.parser')
items = soup.find_all('div', class_="product-item__i")
phone = []
for item in items:
phone.append({
'title': item.find('p', class_="product-item__name").get_text(strip=True),
'link': item.find('a', class_="product-item__name-link js-gtm-product-title").get('href'),
'price': item.find('div', class_="price-box__content-i").get_text(strip=True).replace(u'\xa0', u' ')
})
print(phone)
def parse():
html = get_html(URL)
if html.status_code == 200:
phones = []
pages_count = pages(html.text)
for page in range(1, pages_count + 1):
print(f'Parsing a page {page} from {pages_count}...')
html = get_html(URL, params={'p': page})
phones.extend(get_content(html.text))
print(phones)
else:
print('Error')
parse()
I get an empty list, but should get the phones. Also i get an error.
Upvotes: 0
Views: 40
Reputation: 9978
phones.extend(get_content(html.text))
TypeError: 'NoneType' object is not iterab
This error is telling you that you're trying to iterate over None
. Since extend()
takes an iterable, this is therefore telling you that get_content()
is returning None
. This often happens when a function returns nothing at all: no return statement is equivalent to return None
in Python.
Sure enough, your code for get_content()
doesn't have a return statement. You need to add it:
def get_content(html):
soup = BeautifulSoup(html, 'html.parser')
items = soup.find_all('div', class_="product-item__i")
phone = []
for item in items:
phone.append({
'title': item.find('p', class_="product-item__name").get_text(strip=True),
'link': item.find('a', class_="product-item__name-link js-gtm-product-title").get('href'),
'price': item.find('div', class_="price-box__content-i").get_text(strip=True).replace(u'\xa0', u' ')
})
print(phone)
return phone # <--- add this
Upvotes: 1