Reputation: 59
I would like to know your advice on how to optimize my code. More precisely, I want to parse all elements with this class 'value-decrease'. Code:
import requests
from bs4 import BeautifulSoup
URL = 'https://finance.i.ua/nbu/'
HEADERS = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/80.0.3987.163 Safari/537.36', 'accept': '*/*'}
def get_html(url, params=None):
r = requests.get(url, headers=HEADERS, params=params)
return r
def get_content(html):
soup = BeautifulSoup(html, 'html.parser')
items = soup.find_all('div', class_="data_container")
currency = []
for item in items:
currency.append({
item.find_all('span', class_='value-decrease').get_text(strip=True)
})
print(f"1 usd = {currency} uah")
def parse():
html = get_html(URL)
if html.status_code == 200:
get_content(html.text)
else:
print('Error')
parse()
The output should be like this(The numbers are approximate.):
1 usd = 27 uah
1 eur = 29 uah
Upvotes: 1
Views: 220
Reputation: 178179
currency
is a list that grows with each iteration, so it isn't what you want to print. Also value-decrease
is value -descrease
(extra space). The span
is also nested in another span. Here's the fixes:
import requests
from bs4 import BeautifulSoup
URL = 'https://finance.i.ua/nbu/'
HEADERS = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/80.0.3987.163 Safari/537.36', 'accept': '*/*'}
def get_html(url, params=None):
r = requests.get(url, headers=HEADERS, params=params)
return r
def get_content(html):
soup = BeautifulSoup(html, 'html.parser')
rows = soup.find_all('tr')[1:] # find table rows and throw away header
for row in rows:
data = row.find('span',class_='value -decrease') # is the row a decrease?
if data:
currency = row.th.get_text().lower()
value = data.span.get_text()
print(f'1 {currency} = {value} uah')
def parse():
html = get_html(URL)
if html.status_code == 200:
get_content(html.text)
else:
print('Error')
parse()
Output:
1 usd = 27.2022 uah
1 eur = 29.6341 uah
Upvotes: 1
Reputation: 2328
I realized the country changes so I updated my code.
def get_content(html):
soup = BeautifulSoup(html, 'html.parser')
currency = []
elems = soup.findAll("span", {"class": "value -decrease"})
countries = [x.parent.previous_sibling.previous_sibling.get_text(strip=True) for x in elems]
for i in range(len(elems)):
cur = elems[i].get_text(strip=True).split('.')[0]
currency.append(cur)
print(f"1 {countries[i]} = {cur} uah")
Upvotes: 0