Reputation: 67
can somenone give me a hint how to fix the error - TooManyRedirects: Exceeded 30 redirects.?
import requests from bs4 import BeautifulSoup
baseurl = 'https://www.roco.cc/'
headers = { 'UserAgent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36' }
productlinks = []
for x in range(1,2): r = requests.get( f'https://www.roco.cc/ren/products/locomotives/steam-locomotives.html?p={x}&verfuegbarkeit_status=41%2C42%2C43%2C45%2C44')
soup = BeautifulSoup(r.content, 'lxml')
productlist = soup.find_all('li', class_='item product product-item')
for item in productlist:
for link in item.find_all('a', class_='product-item-link', href=True):
productlinks.append(link['href'])
for link in productlinks:
r = requests.get(link, headers={'user-agent': 'My app'})
soup = BeautifulSoup(r.content, 'lxml')
Manufacturer_name = 'Roco'
try:
Reference = soup.find('span', class_='product-head-artNr').text.strip()
except:
Reference = ''
try:
Price = soup.find('td', {'data-th': 'Price'}).text.strip()
except:
Price = ''
Type = 'Steam locomotive'
try:
Scale = soup.find('td', {'data-th': 'Scale'}).text.strip()
except:
Scale = ''
try:
Current = soup.find('Current').text.strip()
except:
Current = ''
try:
Control = soup.find('td', {'data-th':'Control'}).text.strip()
except:
Control = ''
try:
Interface = soup.find('td', {'data-th': 'Interface'}).text.strip()
except:
Interface = ''
try:
Digital_decoder = soup.find(
'td', {'data-th': 'Digital decoder'}).text.strip()
except:
Digital_decoder = ''
try:
Decoder_Type = soup.find(
'td', {'data-th': 'Decoder-Type'}).text.strip()
except:
Decoder_Type = ''
try:
Motor = soup.find('td', {'data-th': 'Motor'}).text.strip()
except:
Motor = ''
try:
Flywheel = soup.find('td', {'data-th': 'Flywheel'}).text.strip()
except:
Flywheel = ''
try:
Minimum_radius = soup.find(
'td', {'data-th': 'Minimum radius'}).text.strip()
except:
Minimum_radius = ''
try:
Length_over_buffer = soup.find(
'td', {'data-th': 'Length over buffer'}).text.strip()
except:
Length_over_buffer = ''
try:
Number_of_driven_axles = soup.find(
'td', {'data-th': 'Number of driven axles'}).text.strip()
except:
Number_of_driven_axles = ''
try:
Number_of_axles_with_traction_tyres = soup.find(
'td', {'data-th': 'Number of axles with traction tyres'}).text.strip()
except:
Number_of_axles_with_traction_tyres = ''
try:
Coupling = soup.find('td', {'data-th': 'Coupling'}).text.strip()
except:
Coupling = ''
try:
LED_lighting = soup.find(
'td', {'data-th': 'LED lighting'}).text.strip()
except:
LED_lighting = ''
try:
Head_light = soup.find('td', {'data-th': 'Head light'}).text.strip()
except:
Head_light = ''
try:
LED_head_light = soup.find(
'td', {'data-th': 'LED head light'}).text.strip()
except:
LED_head_light = ''
try:
Country = soup.find(
'td', {'data-th': 'Original (country)'}).text.strip()
except:
Country = ''
try:
Railway_company = soup.find(
'td', {'data-th': 'Railway Company'}).text.strip()
except:
Railway_company = ''
try:
Epoch = soup.find('td', {'data-th': 'Epoch'}).text.strip()
except:
Epoch = ''
try:
Description = soup.find(
'div', class_='product-add-form-text').text.strip()
except:
Description = ''
Locomotives = {
Manufacturer_name: 'Manufacturer_name',
Reference: 'Reference',
Price: 'Price',
Type: 'Type',
Scale: 'Scale',
Current: 'Current',
Control: 'Control',
Interface: 'Interface',
Digital_decoder: 'Digital_decoder',
Decoder_Type: 'Decoder_Type',
Motor: 'Motor',
Flywheel: 'Flywheel',
Minimum_radius: 'Minimum_radius',
Length_over_buffer: 'Length_over_buffer',
Number_of_driven_axles: 'Number_of_driven_axles',
Number_of_axles_with_traction_tyres: 'Number_of_axles_with_traction_tyres',
Coupling: 'Coupling',
LED_lighting: 'LED_lighting',
Head_light: 'Head_light',
LED_head_light: 'LED_head_light',
Country: 'Country',
Railway_company: 'Railway_company',
Epoch: 'Epoch',
Description: 'Description',
}
print(Locomotives)
Upvotes: 0
Views: 795
Reputation: 67
import requests
from bs4 import BeautifulSoup
import pandas as pd
import xlsxwriter
baseurl = 'https://www.roco.cc/'
headers = {
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'en-US,en;q=0.8',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
}
productlinks = []
for x in range(1,2):
r = requests.get(
f'https://www.roco.cc/ren/products/locomotives/steam-locomotives.html?p={x}&verfuegbarkeit_status=41%2C42%2C43%2C45%2C44')
soup = BeautifulSoup(r.content, 'lxml')
productlist = soup.find_all('li', class_='item product product-item')
for item in productlist:
for link in item.find_all('a', class_='product-item-link', href=True):
productlinks.append(link['href'])
Loco_list = []
for link in productlinks:
r = requests.get(link, allow_redirects=False)
soup = BeautifulSoup(r.content, 'lxml')
Manufacturer_name = 'Roco'
try:
Reference = soup.find('span', class_='product-head-artNr').text.strip()
except:
Reference = ''
try:
Price = soup.find('td', {'data-th': 'Price'}).text.strip()
except:
Price = ''
Type = 'Steam locomotive'
try:
Scale = soup.find('td', {'data-th': 'Scale'}).text.strip()
except:
Scale = ''
try:
Current = soup.find('Current').text.strip()
except:
Current = ''
try:
Control = soup.find('td', {'data-th': 'Control'}).text.strip()
except:
Control = ''
try:
Interface = soup.find('td', {'data-th': 'Interface'}).text.strip()
except:
Interface = ''
try:
Digital_decoder = soup.find(
'td', {'data-th': 'Digital decoder'}).text.strip()
except:
Digital_decoder = ''
try:
Decoder_Type = soup.find(
'td', {'data-th': 'Decoder-Type'}).text.strip()
except:
Decoder_Type = ''
try:
Motor = soup.find('td', {'data-th': 'Motor'}).text.strip()
except:
Motor = ''
try:
Flywheel = soup.find('td', {'data-th': 'Flywheel'}).text.strip()
except:
Flywheel = ''
try:
Minimum_radius = soup.find(
'td', {'data-th': 'Minimum radius'}).text.strip()
except:
Minimum_radius = ''
try:
Length_over_buffer = soup.find(
'td', {'data-th': 'Length over buffer'}).text.strip()
except:
Length_over_buffer = ''
try:
Number_of_driven_axles = soup.find(
'td', {'data-th': 'Number of driven axles'}).text.strip()
except:
Number_of_driven_axles = ''
try:
Number_of_axles_with_traction_tyres = soup.find(
'td', {'data-th': 'Number of axles with traction tyres'}).text.strip()
except:
Number_of_axles_with_traction_tyres = ''
try:
Coupling = soup.find('td', {'data-th': 'Coupling'}).text.strip()
except:
Coupling = ''
try:
LED_lighting = soup.find(
'td', {'data-th': 'LED lighting'}).text.strip()
except:
LED_lighting = ''
try:
Head_light = soup.find('td', {'data-th': 'Head light'}).text.strip()
except:
Head_light = ''
try:
LED_head_light = soup.find(
'td', {'data-th': 'LED head light'}).text.strip()
except:
LED_head_light = ''
try:
Country = soup.find(
'td', {'data-th': 'Original (country)'}).text.strip()
except:
Country = ''
try:
Railway_company = soup.find(
'td', {'data-th': 'Railway Company'}).text.strip()
except:
Railway_company = ''
try:
Epoch = soup.find('td', {'data-th': 'Epoch'}).text.strip()
except:
Epoch = ''
try:
Description = soup.find(
'div', class_='product-add-form-text').text.strip()
except:
Description = ''
Locomotives = {
'Manufacturer_name': Manufacturer_name,
'Reference': Reference,
'Price': Price,
'Type': Type,
'Scale': Scale,
'Current': Current,
'Control': Control,
'Interface': Interface,
'Digital_decoder': Digital_decoder,
'Decoder_Type': Decoder_Type,
'Motor': Motor,
'Flywheel': Flywheel,
'Minimum_radius': Minimum_radius,
'Length_over_buffer': Length_over_buffer,
'Number_of_driven_axles': Number_of_driven_axles,
'Number_of_axles_with_traction_tyres': Number_of_axles_with_traction_tyres,
'Coupling': Coupling,
'LED_lighting': LED_lighting,
'Head_light': Head_light,
'LED_head_light': LED_head_light,
'Country': Country,
'Railway_company': Railway_company,
'Epoch': Epoch,
'Description': Description,
}
Loco_list.append(Locomotives)
df1 = pd.DataFrame(Loco_list)
# df2 = pd.DataFrame()
# df3 = pd.DataFrame()
# df4 = pd.DataFrame()
writer = pd.ExcelWriter('Roco - locomotives.xlsx', engine='xlsxwriter')
df1.to_excel(writer, sheet_name='Model')
# df2.to_excel(writer, sheet_name='Spare parts')
# df3.to_excel(writer, sheet_name='Documents')
# df4.to_excel(writer, sheet_name='Photos')
writer.save()
print('Saved to file')
Upvotes: 0