Reputation: 51
I have a code which should give me a list of shop's data. But the list is empty and doesn't show any errors...Any ideas how to do it?
import requests
from bs4 import BeautifulSoup
import pandas as pd
def get_page_data(number):
print('number:', number)
url = 'https://www.brw.pl/siec-sprzedazy/?page={}'.format(number)
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
container = soup.find(class_='lista-salonow')
items = container.find_all(class_='salon-kontener')
dane = []
for item in items:
adres = item.find(class_='salon-szczegoly-adres').get_text(strip=True)
dane.append([adres])
return dane
wszystkie_dane = []
for number in range(1, 3):
dane_na_stronie = get_page_data(number)
wszystkie_dane.extend(dane_na_stronie)
dane = pd.DataFrame(wszystkie_dane, columns=['adres'])
dane.to_csv('brw.csv', index=False)
Upvotes: 0
Views: 61
Reputation: 986
Try the below approach using requests which is clean, reliable and less code is needed to fetch the desired result straight from the provided website.
You can hit the URL in browser to see what all columns are coming in result and then you can use them as per your requirement. Right now i'm fetching only 5 in the print statement the same way you can fetch other columns as well.
import json
import requests
from urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
def scrap_shops_data():
api_url = 'https://www.brw.pl/ajax/zpLIv5maeKSYy8KP07immqanj-PVnJO6mQ/' #API URL to fetch data in JSON form
shops_result = requests.get(api_url,verify=False).json() #Get request to fetch the data from the supplied URL
for shop in shops_result: #loop to iterate on the JSON object
print('-' * 100)
print(shop['nazwa_salonu'])
print(shop['adres'])
print(shop['kod_pocztowy'])
print(shop['miejscowosc'])
print(shop['email'])
print('-' * 100)
scrap_shops_data()
Upvotes: 1