Reputation: 807
The script I have is to scrape the NHL-API, it is supposed to scrape all games in a season ranging from 2017020001 - 2017021271. I just finished up with it and realised that only the last game gets appended to each respective list. So in my case only game 2017021271 and not the rest 1270 games before it.
My code looks like this, what have I done terribly wrong?
I know it has to do with my borderline retarded looping, but I don't know how to fix it. Thank you for understanding!
#Importing Libraries
import numpy as np
import pandas as pd
import requests
import json
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder
#Create Empty lists
player_id = {}
goalie_id = {}
person = []
position = []
skaterstats = []
goalie_person=[]
goalie_position=[]
goalie_stats=[]
team = []
team_goals = []
matchid = []
#Connect to NHL-API
for game_id in range(2017020001, 2017020100, 1):
url = 'https://statsapi.web.nhl.com/api/v1/game/{}/feed/live'.format(game_id)
r = requests.get(url)
game_data = r.json()
#Get Keys for Players/Goalies
for homeaway in ['home','away']:
player_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('skaters')
player_id[homeaway] = player_dict
for homeaway in ['home','away']:
goalie_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('goalies')
goalie_id[homeaway] = goalie_dict
#Get PlayerStats/TeamStats
for homeaway in player_id:
for playerID in player_id[homeaway]:
play_dict_teamname = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('team').get('name')
play_dict_teamgoals = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('teamStats').get('teamSkaterStats').get('goals')
play_dict_gameid = game_data.get('gamePk')
play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('person')
play_dict_position = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('position')
play_dict_skaterstats = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('stats').get('skaterStats')
#Append TeamStats to Empty list
team.append(play_dict_teamname)
team_goals.append(play_dict_teamgoals)
matchid.append(play_dict_gameid)
#Append PlayerStats to Empty list
person.append(play_dict_person)
position.append(play_dict_position)
if play_dict_skaterstats:
skaterstats.append(play_dict_skaterstats)
if not play_dict_skaterstats:
play_dict_skaterstats = {}
play_dict_skaterstats['timeOnIce'] = None
play_dict_skaterstats['assists'] = None
play_dict_skaterstats['goals'] = None
play_dict_skaterstats['shots'] = None
play_dict_skaterstats['hits'] = None
play_dict_skaterstats['powerPlayGoals'] = None
play_dict_skaterstats['powerPlayAssists'] = None
play_dict_skaterstats['penaltyMinutes'] = None
play_dict_skaterstats['faceOffPct'] = None
play_dict_skaterstats['faceOffWins'] = None
play_dict_skaterstats['faceoffTaken'] = None
play_dict_skaterstats['takeaways'] = None
play_dict_skaterstats['giveaways'] = None
play_dict_skaterstats['shortHandedGoals'] = None
play_dict_skaterstats['shortHandedAssists'] = None
play_dict_skaterstats['blocked'] = None
play_dict_skaterstats['plusMinus'] = None
play_dict_skaterstats['evenTimeOnIce'] = None
play_dict_skaterstats['powerPlayTimeOnIce'] = None
play_dict_skaterstats['shortHandedTimeOnIce'] = None
skaterstats.append(play_dict_skaterstats)
#Get GoalieStats
for homeaway in goalie_id:
for goalieID in goalie_id[homeaway]:
play_dict_teamname = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('team').get('name')
play_dict_teamgoals = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('teamStats').get('teamSkaterStats').get('goals')
play_dict_gameid = game_data.get('gamePk')
goalie_dict_person = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('person')
goalie_dict_position = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('position')
goalie_dict_stats = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('stats').get('goalieStats')
#Append GoalieStats to Empty list
goalie_person.append(goalie_dict_person)
goalie_position.append(goalie_dict_position)
if goalie_dict_stats:
goalie_stats.append(goalie_dict_stats)
#Append TeamStats to Empty list
team.append(play_dict_teamname)
team_goals.append(play_dict_teamgoals)
matchid.append(play_dict_gameid)
#Create DataFrames for all lists
df_person = pd.DataFrame(person)
df_position = pd.DataFrame(position)
df_skaterstats = pd.DataFrame(skaterstats)
df_team = pd.DataFrame(team)
df_teamgoals = pd.DataFrame(team_goals)
df_gameID = pd.DataFrame(matchid)
df_goalie_per = pd.DataFrame(goalie_person)
df_goalie_pos = pd.DataFrame(goalie_position)
df_goalie_stats = pd.DataFrame(goalie_stats)
Upvotes: 0
Views: 65
Reputation: 28630
Fixed the indents to include what you needed within your initial for loop.See if this fixes your issue:
#Importing Libraries
import numpy as np
import pandas as pd
import requests
import json
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder
#Create Empty lists
player_id = {}
goalie_id = {}
person = []
position = []
skaterstats = []
goalie_person=[]
goalie_position=[]
goalie_stats=[]
team = []
team_goals = []
matchid = []
#Connect to NHL-API
for game_id in range(2017020001, 2017020100, 1):
url = 'https://statsapi.web.nhl.com/api/v1/game/{}/feed/live'.format(game_id)
r = requests.get(url)
game_data = r.json()
#Get Keys for Players/Goalies
for homeaway in ['home','away']:
player_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('skaters')
player_id[homeaway] = player_dict
for homeaway in ['home','away']:
goalie_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('goalies')
goalie_id[homeaway] = goalie_dict
#Get PlayerStats/TeamStats
for homeaway in player_id:
for playerID in player_id[homeaway]:
play_dict_teamname = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('team').get('name')
play_dict_teamgoals = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('teamStats').get('teamSkaterStats').get('goals')
play_dict_gameid = game_data.get('gamePk')
play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('person')
play_dict_position = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('position')
play_dict_skaterstats = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('stats').get('skaterStats')
#Append TeamStats to Empty list
team.append(play_dict_teamname)
team_goals.append(play_dict_teamgoals)
matchid.append(play_dict_gameid)
#Append PlayerStats to Empty list
person.append(play_dict_person)
position.append(play_dict_position)
if play_dict_skaterstats:
skaterstats.append(play_dict_skaterstats)
if not play_dict_skaterstats:
play_dict_skaterstats = {}
play_dict_skaterstats['timeOnIce'] = None
play_dict_skaterstats['assists'] = None
play_dict_skaterstats['goals'] = None
play_dict_skaterstats['shots'] = None
play_dict_skaterstats['hits'] = None
play_dict_skaterstats['powerPlayGoals'] = None
play_dict_skaterstats['powerPlayAssists'] = None
play_dict_skaterstats['penaltyMinutes'] = None
play_dict_skaterstats['faceOffPct'] = None
play_dict_skaterstats['faceOffWins'] = None
play_dict_skaterstats['faceoffTaken'] = None
play_dict_skaterstats['takeaways'] = None
play_dict_skaterstats['giveaways'] = None
play_dict_skaterstats['shortHandedGoals'] = None
play_dict_skaterstats['shortHandedAssists'] = None
play_dict_skaterstats['blocked'] = None
play_dict_skaterstats['plusMinus'] = None
play_dict_skaterstats['evenTimeOnIce'] = None
play_dict_skaterstats['powerPlayTimeOnIce'] = None
play_dict_skaterstats['shortHandedTimeOnIce'] = None
skaterstats.append(play_dict_skaterstats)
#Get GoalieStats
for homeaway in goalie_id:
for goalieID in goalie_id[homeaway]:
play_dict_teamname = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('team').get('name')
play_dict_teamgoals = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('teamStats').get('teamSkaterStats').get('goals')
play_dict_gameid = game_data.get('gamePk')
goalie_dict_person = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('person')
goalie_dict_position = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('position')
goalie_dict_stats = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('stats').get('goalieStats')
#Append GoalieStats to Empty list
goalie_person.append(goalie_dict_person)
goalie_position.append(goalie_dict_position)
if goalie_dict_stats:
goalie_stats.append(goalie_dict_stats)
#Append TeamStats to Empty list
team.append(play_dict_teamname)
team_goals.append(play_dict_teamgoals)
matchid.append(play_dict_gameid)
#Create DataFrames for all lists
df_person = pd.DataFrame(person)
df_position = pd.DataFrame(position)
df_skaterstats = pd.DataFrame(skaterstats)
df_team = pd.DataFrame(team)
df_teamgoals = pd.DataFrame(team_goals)
df_gameID = pd.DataFrame(matchid)
df_goalie_per = pd.DataFrame(goalie_person)
df_goalie_pos = pd.DataFrame(goalie_position)
df_goalie_stats = pd.DataFrame(goalie_stats)
Upvotes: 2