MisterButter
MisterButter

Reputation: 807

How to fix loops so that all scraped data gets appended to their lists?

The script I have is to scrape the NHL-API, it is supposed to scrape all games in a season ranging from 2017020001 - 2017021271. I just finished up with it and realised that only the last game gets appended to each respective list. So in my case only game 2017021271 and not the rest 1270 games before it.

My code looks like this, what have I done terribly wrong?

I know it has to do with my borderline retarded looping, but I don't know how to fix it. Thank you for understanding!

#Importing Libraries 
import numpy as np
import pandas as pd
import requests
import json
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder

#Create Empty lists
player_id = {}
goalie_id = {}

person = []
position = []
skaterstats = []

goalie_person=[]
goalie_position=[]
goalie_stats=[]

team = []
team_goals = []
matchid = []

#Connect to NHL-API
for game_id in range(2017020001, 2017020100, 1):
    url = 'https://statsapi.web.nhl.com/api/v1/game/{}/feed/live'.format(game_id)
    r = requests.get(url)
    game_data = r.json()

#Get Keys for Players/Goalies
for homeaway in ['home','away']:
    player_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('skaters')
    player_id[homeaway] = player_dict

for homeaway in ['home','away']:
    goalie_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('goalies')
    goalie_id[homeaway] = goalie_dict 

#Get PlayerStats/TeamStats
for homeaway in player_id:
    for playerID in player_id[homeaway]:
    play_dict_teamname = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('team').get('name')
    play_dict_teamgoals = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('teamStats').get('teamSkaterStats').get('goals')                
    play_dict_gameid = game_data.get('gamePk')

    play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('person')
    play_dict_position = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('position')
    play_dict_skaterstats = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('stats').get('skaterStats')

     #Append TeamStats to Empty list
    team.append(play_dict_teamname)
    team_goals.append(play_dict_teamgoals)
    matchid.append(play_dict_gameid)

    #Append PlayerStats to Empty list
    person.append(play_dict_person)
    position.append(play_dict_position)
    if play_dict_skaterstats: 
        skaterstats.append(play_dict_skaterstats)
    if not play_dict_skaterstats:
        play_dict_skaterstats = {}
        play_dict_skaterstats['timeOnIce'] = None
        play_dict_skaterstats['assists'] = None
        play_dict_skaterstats['goals'] = None
        play_dict_skaterstats['shots'] = None
        play_dict_skaterstats['hits'] = None
        play_dict_skaterstats['powerPlayGoals'] = None
        play_dict_skaterstats['powerPlayAssists'] = None
        play_dict_skaterstats['penaltyMinutes'] = None
        play_dict_skaterstats['faceOffPct'] = None
        play_dict_skaterstats['faceOffWins'] = None
        play_dict_skaterstats['faceoffTaken'] = None
        play_dict_skaterstats['takeaways'] = None
        play_dict_skaterstats['giveaways'] = None
        play_dict_skaterstats['shortHandedGoals'] = None
        play_dict_skaterstats['shortHandedAssists'] = None
        play_dict_skaterstats['blocked'] = None
        play_dict_skaterstats['plusMinus'] = None
        play_dict_skaterstats['evenTimeOnIce'] = None
        play_dict_skaterstats['powerPlayTimeOnIce'] = None
        play_dict_skaterstats['shortHandedTimeOnIce'] = None

skaterstats.append(play_dict_skaterstats)

#Get GoalieStats
for homeaway in goalie_id:
    for goalieID in goalie_id[homeaway]:
    play_dict_teamname = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('team').get('name')
    play_dict_teamgoals = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('teamStats').get('teamSkaterStats').get('goals')                
    play_dict_gameid = game_data.get('gamePk')

    goalie_dict_person = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('person')
    goalie_dict_position = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('position')
    goalie_dict_stats = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('stats').get('goalieStats')

    #Append GoalieStats to Empty list
    goalie_person.append(goalie_dict_person)
    goalie_position.append(goalie_dict_position)
    if goalie_dict_stats: 
        goalie_stats.append(goalie_dict_stats)

    #Append TeamStats to Empty list
    team.append(play_dict_teamname)
    team_goals.append(play_dict_teamgoals)
    matchid.append(play_dict_gameid)

#Create DataFrames for all lists
df_person = pd.DataFrame(person)
df_position = pd.DataFrame(position)
df_skaterstats = pd.DataFrame(skaterstats)

df_team = pd.DataFrame(team)
df_teamgoals = pd.DataFrame(team_goals)
df_gameID = pd.DataFrame(matchid)

df_goalie_per = pd.DataFrame(goalie_person)
df_goalie_pos = pd.DataFrame(goalie_position)
df_goalie_stats = pd.DataFrame(goalie_stats)

Upvotes: 0

Views: 65

Answers (1)

chitown88
chitown88

Reputation: 28630

Fixed the indents to include what you needed within your initial for loop.See if this fixes your issue:

#Importing Libraries 
import numpy as np
import pandas as pd
import requests
import json
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder

#Create Empty lists
player_id = {}
goalie_id = {}

person = []
position = []
skaterstats = []

goalie_person=[]
goalie_position=[]
goalie_stats=[]

team = []
team_goals = []
matchid = []

#Connect to NHL-API
for game_id in range(2017020001, 2017020100, 1):
    url = 'https://statsapi.web.nhl.com/api/v1/game/{}/feed/live'.format(game_id)
    r = requests.get(url)
    game_data = r.json()

    #Get Keys for Players/Goalies
    for homeaway in ['home','away']:
        player_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('skaters')
        player_id[homeaway] = player_dict

    for homeaway in ['home','away']:
        goalie_dict = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('goalies')
        goalie_id[homeaway] = goalie_dict 

    #Get PlayerStats/TeamStats
    for homeaway in player_id:
        for playerID in player_id[homeaway]:
            play_dict_teamname = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('team').get('name')
            play_dict_teamgoals = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('teamStats').get('teamSkaterStats').get('goals')                
            play_dict_gameid = game_data.get('gamePk')

            play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('person')
            play_dict_position = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('position')
            play_dict_skaterstats = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(playerID)).get('stats').get('skaterStats')

             #Append TeamStats to Empty list
            team.append(play_dict_teamname)
            team_goals.append(play_dict_teamgoals)
            matchid.append(play_dict_gameid)

            #Append PlayerStats to Empty list
            person.append(play_dict_person)
            position.append(play_dict_position)
            if play_dict_skaterstats: 
                skaterstats.append(play_dict_skaterstats)
            if not play_dict_skaterstats:
                play_dict_skaterstats = {}
                play_dict_skaterstats['timeOnIce'] = None
                play_dict_skaterstats['assists'] = None
                play_dict_skaterstats['goals'] = None
                play_dict_skaterstats['shots'] = None
                play_dict_skaterstats['hits'] = None
                play_dict_skaterstats['powerPlayGoals'] = None
                play_dict_skaterstats['powerPlayAssists'] = None
                play_dict_skaterstats['penaltyMinutes'] = None
                play_dict_skaterstats['faceOffPct'] = None
                play_dict_skaterstats['faceOffWins'] = None
                play_dict_skaterstats['faceoffTaken'] = None
                play_dict_skaterstats['takeaways'] = None
                play_dict_skaterstats['giveaways'] = None
                play_dict_skaterstats['shortHandedGoals'] = None
                play_dict_skaterstats['shortHandedAssists'] = None
                play_dict_skaterstats['blocked'] = None
                play_dict_skaterstats['plusMinus'] = None
                play_dict_skaterstats['evenTimeOnIce'] = None
                play_dict_skaterstats['powerPlayTimeOnIce'] = None
                play_dict_skaterstats['shortHandedTimeOnIce'] = None

    skaterstats.append(play_dict_skaterstats)

    #Get GoalieStats
    for homeaway in goalie_id:
        for goalieID in goalie_id[homeaway]:
            play_dict_teamname = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('team').get('name')
            play_dict_teamgoals = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('teamStats').get('teamSkaterStats').get('goals')                
            play_dict_gameid = game_data.get('gamePk')

            goalie_dict_person = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('person')
            goalie_dict_position = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('position')
            goalie_dict_stats = play_dict_person = game_data.get('liveData').get('boxscore').get('teams').get(homeaway).get('players').get('ID' + str(goalieID)).get('stats').get('goalieStats')

            #Append GoalieStats to Empty list
            goalie_person.append(goalie_dict_person)
            goalie_position.append(goalie_dict_position)
            if goalie_dict_stats: 
                goalie_stats.append(goalie_dict_stats)

            #Append TeamStats to Empty list
            team.append(play_dict_teamname)
            team_goals.append(play_dict_teamgoals)
            matchid.append(play_dict_gameid)

#Create DataFrames for all lists
df_person = pd.DataFrame(person)
df_position = pd.DataFrame(position)
df_skaterstats = pd.DataFrame(skaterstats)

df_team = pd.DataFrame(team)
df_teamgoals = pd.DataFrame(team_goals)
df_gameID = pd.DataFrame(matchid)

df_goalie_per = pd.DataFrame(goalie_person)
df_goalie_pos = pd.DataFrame(goalie_position)
df_goalie_stats = pd.DataFrame(goalie_stats)

Upvotes: 2

Related Questions