Reputation: 151
I'am trying to get some information from a website with python, from a webshop.
I tried this one:
my_url = requests.get(https://www.telekom.hu/shop/categoryresults/?N=10994&contractType=list_price&instock_products=1&Ns=sku.sortingPrice%7C0%7C%7Cproduct.displayName%7C0&No=0&Nrpp=9&paymentType=FULL)
data = my_url.json()
name = data['MainContent'][0]['contents'][0]['productList']['products'][0]['productModel']["displayName"]
price = data['MainContent'][0]['contents'][0]['productList']['products'][0]['priceInfo']['priceItemSale']["gross"]
url= data['MainContent'][0]['contents'][0]['productList']['products'][0]['productModel']["url"]
for mc in data['MainContent']:
for co in mc:
for prod in co['productList']['products']:
name = prod['productModel']['displayName']
price = prod['priceItemSale']['gross']
url = prod['productModel']['url']
filename = "test.csv"
csv_writer = csv.writer(open(filename, 'w'))
headers = "Name, Price, Link\n"
f.write(headers)
f.close()
In this webshop there are a lot of product with these attribute "productModel", but how can i get these all and write into a csv? I want web-scraping the name, the price and the url link this page in differents cells but it isn't working.
* EDIT:
def proba():
my_url = requests.get('https://www.telekom.hu/shop/categoryresults/?N=10994&contractType=list_price&instock_products=1&Ns=sku.sortingPrice%7C0%7C%7Cproduct.displayName%7C0&No=0&Nrpp=9&paymentType=FULL')
data = my_url.json()
results = []
products = data['MainContent'][0]['contents'][0]['productList']['products']
for product in products:
name = product['productModel']['displayName']
try:
priceGross = product['priceInfo']['priceItemSale']['gross']
except:
priceGross = product['priceInfo']['priceItemToBase']['gross']
url = product['productModel']['url']
results.append([name, priceGross, url])
df = pd.DataFrame(results, columns = ['Name', 'Price', 'Url'])
# print(df) ## print df
df.to_csv(r'/usr/src/Python-2.7.13/test.csv', sep=',', encoding='utf-8-sig',index = False )
while True:
mytime=datetime.now().strftime("%H:%M:%S")
while mytime < "23:59:59":
print mytime
proba()
mytime=datetime.now().strftime("%H:%M:%S")
Upvotes: 0
Views: 48
Reputation: 503
import requests
my_url = requests.get("https://www.telekom.hu/shop/categoryresults/?N=10994&contractType=list_price&instock_products=1&Ns=sku.sortingPrice%7C0%7C%7Cproduct.displayName%7C0&No=0&Nrpp=9&paymentType=FULL")
data = my_url.json()
datas=data["MainContent"][0]["contents"]
finaldata=[]
for mc in datas:
plist=mc["productList"]["products"]
for p in plist:
name =p['productModel']['displayName']
try:
price = p['priceInfo']['priceItemToBase']['gross']
except:
price=p['priceInfo']['priceItemSale']['gross']
url = p['productModel']['url']
finaldata.append([name,price,url])
files=open("data.csv",'w+')
columns = ['Name', 'Price', 'Url']
files.write(",".join(columns))
files.write("\n")
for f in finaldata:
files.write('{},{},{}\n'.format(f[0],f[1],f[2]))
Upvotes: 0
Reputation: 84465
Not all the items have the same number of keys so you cannot access with prod['priceItemSale']['gross']
throughout. You need to decide where to get value from when that is not present. Example below:
import requests
import pandas as pd
my_url = requests.get('https://www.telekom.hu/shop/categoryresults/?N=10994&contractType=list_price&instock_products=1&Ns=sku.sortingPrice%7C0%7C%7Cproduct.displayName%7C0&No=0&Nrpp=9&paymentType=FULL')
data = my_url.json()
results = []
products = data['MainContent'][0]['contents'][0]['productList']['products']
for product in products:
name = product['productModel']['displayName']
try:
priceGross = product['priceInfo']['priceItemSale']['gross']
except:
priceGross = product['priceInfo']['priceItemToBase']['gross']
url = product['productModel']['url']
results.append([name, priceGross, url])
df = pd.DataFrame(results, columns = ['Name', 'Price', 'Url'])
# print(df) ## print df
df.to_csv(r'C:\Users\User\Desktop\Data.csv', sep=',', encoding='utf-8-sig',index = False )
Output:
Upvotes: 1