Reputation: 111
I'm having trouble with my web scraper not getting the "Odds" values and not sure what is wrong. For each piece of information, I am using a try/except to see if the element is available. I'm not sure what is wrong with getting the Odds values though. Thanks for the help
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
url = 'https://www.ncaagamesim.com/college-basketball-predictions.asp'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find('table')
# Get column names
headers = table.find_all('th')
cols = [x.text for x in headers]
# Get all rows in table body
table_rows = table.find_all('tr')
rows = []
# Grab the text of each td, and put into a rows list
for each in table_rows[1:]:
odd_avail = True
data = each.find_all('td')
time = data[0].text.strip()
# Get matchup and odds
try:
matchup, odds = data[1].text.strip().split('\xa0')
odd_margin = float(odds.split('by')[-1].strip())
except:
matchup = data[1].text.strip()
odd_margin = '-'
odd_avail = False
# Get favored team
try:
odd_team_win = data[1].find_all('img')[-1]['title']
except:
odd_team_win = '-'
odd_avail = False
# Get simulation winner
try:
sim_team_win = data[2].find('img')['title']
except:
sim_team_win = '-'
odd_avail = False
awayTeam = matchup.split('@')[0].strip()
homeTeam = matchup.split('@')[1].strip()
# Get simulation margin
try:
sim_margin = float(re.findall("\d+\.\d+", data[2].text)[-1])
except:
sim_margin = '-'
odd_avail = False
# If all variables available, determine odds, simulation margin points, and optimal bet
if odd_avail == True:
if odd_team_win == sim_team_win:
diff = abs(sim_margin - odd_margin)
if sim_margin > odd_margin:
bet = odd_team_win
else:
if odd_team_win == homeTeam:
bet = awayTeam
else:
bet = homeTeam
else:
diff = odd_margin + sim_margin
bet = sim_team_win
else:
diff = -1
bet = '-'
# Create table
row = {cols[0]: time, 'Matchup': matchup, 'Odds Winner': odd_team_win, 'Odds': odd_margin,
'Simulation Winner': sim_team_win, 'Simulation Margin': sim_margin, 'Diff': diff, 'Bet' : bet}
rows.append(row)
df = pd.DataFrame(rows)
df = df.sort_values(by = ['Diff'], ascending = False)
print (df.to_string())
# df.to_csv('odds.csv', index=False)
When I run this code everything works perfectly and gets all other values but all the odds values in the table are '-'.
Upvotes: 0
Views: 169
Reputation: 28630
I added a few things into the code, to account for
As far as the odds not showing. Check the csv file to see if it's there. If it is, might just be a preference you need to change in pycharm (might be just cutting off some of the string)
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
url = 'https://www.ncaagamesim.com/college-basketball-predictions.asp'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find('table')
# Get column names
headers = table.find_all('th')
cols = [x.text for x in headers]
# Get all rows in table body
table_rows = table.find_all('tr')
rows = []
# Grab the text of each td, and put into a rows list
for each in table_rows[1:]:
odd_avail = True
data = each.find_all('td')
time = data[0].text.strip()
# Get matchup and odds
try:
matchup, odds = data[1].text.strip().split('\xa0')
odd_margin = float(odds.split('by')[-1].strip())
except:
matchup = data[1].text.strip()
if 'Even' in matchup:
matchup, odds = data[1].text.strip().split('\xa0')
odd_margin = 0
else:
odd_margin = '-'
odd_avail = False
awayTeam = matchup.split('@')[0].strip()
homeTeam = matchup.split('@')[1].strip()
# Get favored team
try:
odd_team_win = data[1].find_all('img')[-1]['title']
except:
odd_team_win = '-'
odd_avail = False
# Get simulation winner
try:
sim_team_win = data[2].find('img')['title']
except:
if 'wins' in data[2].text:
sim_team_win = data[2].text.split('wins')[0].strip()
else:
sim_team_win = '-'
odd_avail = False
# Get simulation margin
try:
sim_margin = float(re.findall("\d+\.\d+", data[2].text)[-1])
except:
sim_margin = '-'
odd_avail = False
# If all variables available, determine odds and simulation margin points
if odd_avail == True:
if odd_team_win == sim_team_win:
diff = abs(sim_margin - odd_margin)
else:
diff = odd_margin + sim_margin
else:
diff = '-'
# Create table
row = {cols[0]: time, 'Away Team': awayTeam, 'Home Team':homeTeam, 'Odds Winner': odd_team_win, 'Odds': odd_margin,
'Simulation Winner': sim_team_win, 'Simulation Margin': sim_margin, 'Diff': diff}
rows.append(row)
df = pd.DataFrame(rows)
print (df.to_string())
# df.to_csv('odds.csv', index=False)
Upvotes: 1