Reputation: 23
I'm scraping and cleaning data off a particular site that collects NFL data so I end up with different lengths of a data frame that looks like this:
Now i'm going to want to summarize data in particular ways starting with team away and home yard averages. So I use a NFL team vector and loop these lines as shown below:
teamList <- as.matrix(c("New England Patriots", "Dallas Cowboys", "Denver Broncos",
"Pittsburgh Steelers", "Seattle Seahawks","Oakland Raiders", "Philadelphia Eagles", "Green Bay Packers"
,"San Francisco 49ers", "New York Giants","Chicago Bears", "Minnesota Vikings", "Washington Redskins",
"Carolina Panthers", "Carolina Panthers", "New Orleans Saints", "St. Louis Rams", "New York Jets",
"Baltimore Ravens", "San Diego Chargers", "Indianapolis Colts", "Houston Texans", "Arizona Cardinals",
"Detroit Lions", "Cleveland Browns", "Atlanta Falcons", "Buffalo Bills", "Jacksonville Jaguars", "Cincinnati Bengals",
"Kansas City Chiefs", "Tampa Bay Buccaneers", "Tennesee Titans", "Miami Dolphins"))
#Calcuating average yards per game vs any opponent for each team (home and away)
for (i in 1:nrow(teamList)){
for (y in 1:nrow(grossM)){
homeV1 <- matrix(0,1000000,1)
awayV1 <- matrix(0,1000000,1)
homeV2 <- matrix(0,1000000,1)
awayV2 <- matrix(0,1000000,1)
if (teamList[i,]==grossM[y,4]&grossM[y,5]=="@")(awayV1[y,1] <- grossM[y,9]) else 0
if (teamList[i,]==grossM[y,4]&grossM[y,5]=="")(homeV1[y,1] <- grossM[y,9]) else 0
if (teamList[i,]==grossM[y,6]&grossM[y,5]=="")(awayV2[y,1] <- grossM[y,11]) else 0
if (teamList[i,]==grossM[y,6]&grossM[y,5]=="@")(homeV2[y,1] <- grossM[y,11]) else 0
....
which is obviously inefficient but I haven't had to worry about the efficiency of my writing until now since the loop takes entirely too long (I haven't actually let it complete since it takes somewhere over an hour). Can someone please point me in the write direction, perhaps some type of matrix operation I'm not thinking of?
Thanks in advance for a response!
edit: just randomly realized I should be using aggregate in some way here right?
structure(list(Week = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L,
17L, 17L, 17L, 17L, 17L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = c(" 1", " 2", " 3", " 4", " 5", " 6", " 7",
" 8", " 9", "10", "11", "12", "13", "14", "15", "16", "17"), class = "factor"),
Day = structure(c(4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 1L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L,
4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 1L, 4L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 1L, 4L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L), .Label = c("Mon", "Sat", "Sun", "Thu"), class = "factor"),
Date = structure(c(43L, 44L, 44L, 44L, 44L, 44L, 44L, 44L,
44L, 44L, 44L, 44L, 44L, 44L, 45L, 45L, 46L, 47L, 47L, 47L,
47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 48L,
49L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L,
50L, 50L, 50L, 51L, 30L, 40L, 40L, 40L, 40L, 40L, 40L, 40L,
40L, 40L, 40L, 40L, 40L, 40L, 41L, 42L, 31L, 31L, 31L, 31L,
31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 32L, 33L, 34L, 34L,
34L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 35L, 36L,
37L, 37L, 37L, 37L, 37L, 37L, 37L, 37L, 37L, 37L, 37L, 37L,
38L, 39L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 21L, 27L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 28L,
28L, 28L, 28L, 29L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 18L, 18L, 19L, 20L, 22L, 22L, 22L, 22L, 22L,
22L, 22L, 22L, 22L, 22L, 22L, 22L, 23L, 24L, 24L, 24L, 25L,
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 26L,
12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 14L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 4L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 7L, 8L, 9L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
15L, 52L, 53L, 53L, 53L, 53L, 53L, 53L, 53L, 53L, 53L, 53L
), .Label = c("December 10", "December 13", "December 14",
"December 17", "December 19", "December 20", "December 21",
"December 24", "December 26", "December 27", "December 28",
"December 3", "December 6", "December 7", "January 3", "November 1",
"November 12", "November 15", "November 16", "November 19",
"November 2", "November 22", "November 23", "November 26",
"November 29", "November 30", "November 5", "November 8",
"November 9", "October 1", "October 11", "October 12", "October 15",
"October 18", "October 19", "October 22", "October 25", "October 26",
"October 29", "October 4", "October 5", "October 8", "September 10",
"September 13", "September 14", "September 17", "September 20",
"September 21", "September 24", "September 27", "September 28",
"September 4", "September 7"), class = "factor"), `Winner/tie` = structure(c(19L,
26L, 4L, 22L, 7L, 1L, 5L, 10L, 9L, 29L, 16L, 12L, 17L, 31L,
27L, 2L, 10L, 2L, 19L, 30L, 32L, 5L, 18L, 7L, 8L, 23L, 12L,
1L, 25L, 9L, 15L, 22L, 21L, 13L, 25L, 5L, 4L, 24L, 1L, 2L,
19L, 18L, 23L, 28L, 14L, 7L, 10L, 12L, 3L, 14L, 7L, 26L,
10L, 21L, 2L, 22L, 6L, 20L, 5L, 12L, 29L, 32L, 28L, 14L,
8L, 4L, 7L, 6L, 21L, 24L, 10L, 1L, 2L, 19L, 30L, 12L, 25L,
20L, 17L, 22L, 5L, 10L, 7L, 13L, 27L, 18L, 19L, 12L, 11L,
25L, 24L, 28L, 21L, 23L, 32L, 19L, 5L, 29L, 20L, 2L, 15L,
16L, 18L, 17L, 1L, 19L, 18L, 28L, 29L, 23L, 7L, 10L, 13L,
16L, 1L, 3L, 30L, 20L, 5L, 7L, 24L, 4L, 18L, 22L, 19L, 5L,
21L, 14L, 27L, 31L, 25L, 6L, 4L, 6L, 15L, 1L, 19L, 5L, 30L,
18L, 16L, 17L, 11L, 32L, 25L, 13L, 15L, 14L, 30L, 10L, 28L,
1L, 11L, 5L, 3L, 13L, 9L, 16L, 12L, 19L, 5L, 6L, 11L, 22L,
18L, 16L, 26L, 23L, 10L, 13L, 32L, 7L, 28L, 14L, 1L, 3L,
12L, 30L, 7L, 5L, 1L, 31L, 28L, 16L, 17L, 22L, 10L, 25L,
4L, 24L, 27L, 9L, 1L, 20L, 24L, 16L, 22L, 28L, 15L, 12L,
25L, 5L, 8L, 23L, 19L, 32L, 29L, 21L, 29L, 22L, 28L, 26L,
32L, 18L, 2L, 16L, 1L, 12L, 7L, 19L, 13L, 25L, 5L, 11L, 23L,
32L, 6L, 22L, 14L, 18L, 2L, 4L, 3L, 11L, 29L, 16L, 20L, 13L,
1L, 10L, 25L, 20L, 27L, 11L, 5L, 10L, 24L, 17L, 13L, 32L,
4L, 18L, 14L, 7L, 16L, 28L, 28L, 2L, 13L, 5L, 4L, 27L, 10L,
25L, 7L, 24L, 31L), .Label = c("Arizona Cardinals", "Atlanta Falcons",
"Baltimore Ravens", "Buffalo Bills", "Carolina Panthers",
"Chicago Bears", "Cincinnati Bengals", "Cleveland Browns",
"Dallas Cowboys", "Denver Broncos", "Detroit Lions", "Green Bay Packers",
"Houston Texans", "Indianapolis Colts", "Jacksonville Jaguars",
"Kansas City Chiefs", "Miami Dolphins", "Minnesota Vikings",
"New England Patriots", "New Orleans Saints", "New York Giants",
"New York Jets", "Oakland Raiders", "Philadelphia Eagles",
"Pittsburgh Steelers", "San Diego Chargers", "San Francisco 49ers",
"Seattle Seahawks", "St. Louis Rams", "Tampa Bay Buccaneers",
"Tennessee Titans", "Washington Redskins"), class = "factor"),
V5 = structure(c(1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L,
1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L,
2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L,
1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L,
2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L,
2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L,
2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L,
2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L,
1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L,
1L, 2L), .Label = c("", "@"), class = "factor"), `Loser/tie` = structure(c(25L,
11L, 14L, 8L, 23L, 20L, 15L, 3L, 21L, 28L, 13L, 6L, 32L,
30L, 18L, 24L, 16L, 21L, 4L, 20L, 29L, 13L, 11L, 26L, 31L,
3L, 28L, 6L, 27L, 24L, 17L, 14L, 32L, 30L, 29L, 20L, 17L,
22L, 27L, 9L, 15L, 26L, 8L, 6L, 31L, 3L, 11L, 16L, 25L, 15L,
16L, 8L, 18L, 4L, 13L, 17L, 23L, 9L, 30L, 27L, 1L, 24L, 11L,
13L, 3L, 31L, 28L, 16L, 27L, 20L, 23L, 11L, 32L, 9L, 15L,
29L, 26L, 2L, 31L, 32L, 28L, 8L, 4L, 15L, 3L, 16L, 14L, 26L,
6L, 1L, 21L, 27L, 9L, 26L, 30L, 22L, 24L, 8L, 14L, 31L, 4L,
25L, 11L, 13L, 3L, 17L, 6L, 9L, 27L, 22L, 25L, 12L, 31L,
11L, 8L, 26L, 2L, 21L, 14L, 8L, 9L, 17L, 29L, 15L, 32L, 12L,
30L, 10L, 2L, 20L, 23L, 26L, 22L, 29L, 3L, 28L, 21L, 31L,
9L, 23L, 10L, 24L, 12L, 20L, 8L, 7L, 31L, 2L, 24L, 6L, 27L,
7L, 23L, 32L, 29L, 22L, 17L, 26L, 18L, 4L, 9L, 12L, 24L,
17L, 2L, 4L, 15L, 31L, 19L, 20L, 21L, 29L, 25L, 30L, 27L,
8L, 11L, 2L, 8L, 20L, 29L, 15L, 18L, 23L, 3L, 21L, 26L, 14L,
13L, 19L, 6L, 32L, 18L, 30L, 4L, 26L, 31L, 3L, 14L, 9L, 7L,
2L, 27L, 10L, 13L, 6L, 11L, 17L, 30L, 9L, 8L, 17L, 4L, 6L,
15L, 3L, 24L, 23L, 27L, 31L, 14L, 10L, 21L, 20L, 26L, 24L,
30L, 19L, 17L, 21L, 5L, 9L, 25L, 27L, 28L, 8L, 15L, 31L,
12L, 7L, 8L, 2L, 29L, 6L, 30L, 26L, 21L, 19L, 15L, 9L, 22L,
12L, 31L, 3L, 23L, 1L, 12L, 20L, 32L, 30L, 6L, 9L, 14L, 8L,
3L, 15L, 16L), .Label = c("Arizona Cardinals", "Atlanta Falcons",
"Baltimore Ravens", "Buffalo Bills", "Carolina Panthers",
"Chicago Bears", "Cincinnati Bengals", "Cleveland Browns",
"Dallas Cowboys", "Denver Broncos", "Detroit Lions", "Green Bay Packers",
"Houston Texans", "Indianapolis Colts", "Jacksonville Jaguars",
"Kansas City Chiefs", "Miami Dolphins", "Minnesota Vikings",
"New England Patriots", "New Orleans Saints", "New York Giants",
"New York Jets", "Oakland Raiders", "Philadelphia Eagles",
"Pittsburgh Steelers", "San Diego Chargers", "San Francisco 49ers",
"Seattle Seahawks", "St. Louis Rams", "Tampa Bay Buccaneers",
"Tennessee Titans", "Washington Redskins"), class = "factor"),
PtsW = structure(c(18L, 23L, 17L, 21L, 23L, 21L, 10L, 9L,
17L, 24L, 17L, 21L, 7L, 32L, 10L, 16L, 21L, 14L, 30L, 16L,
14L, 14L, 16L, 14L, 18L, 27L, 17L, 37L, 33L, 10L, 13L, 10L,
22L, 9L, 2L, 17L, 31L, 14L, 36L, 29L, 39L, 21L, 17L, 16L,
25L, 18L, 14L, 28L, 13L, 6L, 26L, 20L, 13L, 14L, 37L, 17L,
12L, 16L, 27L, 7L, 14L, 13L, 3L, 17L, 23L, 4L, 17L, 8L, 20L,
29L, 6L, 32L, 15L, 20L, 28L, 14L, 14L, 21L, 28L, 24L, 17L,
16L, 24L, 21L, 15L, 6L, 24L, 17L, 27L, 15L, 17L, 10L, 17L,
27L, 21L, 20L, 17L, 14L, 17L, 1L, 24L, 13L, 18L, 34L, 16L,
26L, 13L, 3L, 17L, 24L, 6L, 19L, 10L, 35L, 24L, 19L, 13L,
40L, 19L, 21L, 23L, 23L, 11L, 18L, 17L, 27L, 22L, 17L, 7L,
24L, 28L, 12L, 12L, 27L, 12L, 29L, 17L, 17L, 1L, 20L, 19L,
10L, 8L, 36L, 20L, 1L, 9L, 14L, 35L, 7L, 19L, 24L, 8L, 34L,
6L, 14L, 14L, 23L, 20L, 10L, 23L, 7L, 35L, 28L, 10L, 20L,
21L, 14L, 20L, 14L, 10L, 21L, 29L, 15L, 9L, 23L, 17L, 13L,
27L, 31L, 17L, 32L, 28L, 24L, 5L, 13L, 7L, 35L, 20L, 25L,
16L, 9L, 13L, 14L, 13L, 1L, 20L, 25L, 39L, 18L, 23L, 28L,
14L, 5L, 17L, 14L, 11L, 21L, 21L, 9L, 20L, 20L, 25L, 28L,
13L, 24L, 30L, 20L, 14L, 23L, 6L, 24L, 28L, 25L, 13L, 28L,
16L, 16L, 8L, 38L, 10L, 6L, 10L, 22L, 13L, 7L, 28L, 24L,
28L, 10L, 18L, 10L, 9L, 14L, 28L, 17L, 25L, 10L, 20L, 24L,
12L, 10L, 20L, 14L, 13L, 26L, 26L, 27L, 7L, 10L, 13L, 18L,
21L, 20L, 13L, 24L, 16L), .Label = c("10", "12", "13", "14",
"15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34",
"35", "36", "37", "38", "39", "40", "41", "42", "43", "44",
"45", "47", "48", "49", "51", "52"), class = "factor"), PtsL = structure(c(12L,
19L, 5L, 2L, 4L, 10L, 34L, 4L, 17L, 23L, 11L, 14L, 2L, 5L,
21L, 15L, 15L, 11L, 24L, 10L, 2L, 8L, 7L, 10L, 5L, 25L, 8L,
14L, 9L, 2L, 11L, 32L, 12L, 34L, 31L, 13L, 5L, 8L, 32L, 19L,
8L, 5L, 11L, 1L, 25L, 15L, 3L, 19L, 11L, 4L, 12L, 18L, 11L,
2L, 12L, 5L, 11L, 11L, 14L, 21L, 13L, 11L, 2L, 11L, 22L,
4L, 15L, 8L, 18L, 8L, 2L, 8L, 10L, 31L, 23L, 2L, 11L, 12L,
2L, 11L, 14L, 14L, 12L, 11L, 11L, 2L, 18L, 11L, 26L, 4L,
32L, 21L, 11L, 20L, 22L, 14L, 7L, 31L, 12L, 32L, 23L, 4L,
10L, 17L, 9L, 32L, 11L, 3L, 31L, 11L, 2L, 2L, 31L, 2L, 11L,
17L, 11L, 30L, 17L, 2L, 18L, 8L, 9L, 14L, 2L, 20L, 9L, 15L,
7L, 19L, 27L, 10L, 8L, 4L, 11L, 24L, 17L, 2L, 31L, 5L, 4L,
10L, 7L, 5L, 34L, 31L, 4L, 12L, 8L, 6L, 4L, 23L, 4L, 7L,
4L, 8L, 5L, 21L, 4L, 4L, 5L, 4L, 5L, 11L, 2L, 13L, 16L, 12L,
15L, 31L, 5L, 32L, 22L, 3L, 4L, 18L, 14L, 10L, 21L, 28L,
21L, 29L, 32L, 11L, 4L, 11L, 21L, 2L, 12L, 19L, 11L, 7L,
11L, 8L, 11L, 21L, 33L, 31L, 7L, 32L, 11L, 1L, 2L, 3L, 31L,
12L, 5L, 15L, 14L, 7L, 4L, 5L, 16L, 8L, 8L, 5L, 8L, 11L,
5L, 7L, 2L, 18L, 27L, 18L, 11L, 15L, 12L, 11L, 3L, 8L, 4L,
31L, 8L, 8L, 8L, 4L, 18L, 31L, 33L, 8L, 3L, 8L, 7L, 11L,
2L, 11L, 22L, 2L, 31L, 14L, 8L, 4L, 15L, 7L, 8L, 31L, 7L,
26L, 31L, 5L, 11L, 8L, 15L, 18L, 7L, 8L, 2L), .Label = c("0",
"10", "12", "13", "14", "15", "16", "17", "18", "19", "20",
"21", "22", "23", "24", "25", "26", "27", "28", "29", "3",
"30", "31", "32", "33", "34", "35", "38", "39", "49", "6",
"7", "8", "9"), class = "factor"), YdsW = structure(c(72L,
151L, 58L, 52L, 103L, 122L, 13L, 4L, 130L, 65L, 51L, 45L,
10L, 37L, 102L, 102L, 32L, 106L, 158L, 52L, 83L, 64L, 64L,
97L, 16L, 137L, 72L, 33L, 140L, 70L, 103L, 59L, 74L, 113L,
12L, 126L, 123L, 5L, 136L, 132L, 147L, 21L, 146L, 81L, 88L,
141L, 67L, 137L, 69L, 47L, 135L, 132L, 59L, 34L, 88L, 121L,
81L, 132L, 8L, 73L, 49L, 115L, 60L, 46L, 157L, 3L, 117L,
49L, 165L, 161L, 31L, 60L, 116L, 69L, 79L, 45L, 63L, 94L,
128L, 148L, 93L, 134L, 68L, 92L, 99L, 44L, 115L, 80L, 167L,
38L, 123L, 96L, 23L, 112L, 68L, 66L, 101L, 36L, 138L, 88L,
29L, 87L, 121L, 156L, 114L, 131L, 48L, 46L, 96L, 139L, 30L,
155L, 15L, 57L, 152L, 76L, 24L, 170L, 89L, 81L, 142L, 118L,
27L, 24L, 143L, 122L, 48L, 76L, 41L, 151L, 169L, 136L, 19L,
104L, 11L, 139L, 108L, 34L, 48L, 94L, 34L, 23L, 22L, 160L,
142L, 10L, 36L, 17L, 162L, 97L, 159L, 93L, 85L, 78L, 96L,
75L, 95L, 94L, 43L, 69L, 28L, 24L, 125L, 111L, 76L, 113L,
79L, 109L, 127L, 73L, 109L, 86L, 130L, 54L, 55L, 54L, 39L,
96L, 87L, 154L, 164L, 145L, 127L, 6L, 4L, 144L, 27L, 163L,
98L, 9L, 25L, 41L, 100L, 96L, 62L, 50L, 133L, 120L, 90L,
129L, 67L, 120L, 150L, 1L, 39L, 84L, 40L, 124L, 42L, 82L,
119L, 134L, 126L, 64L, 49L, 18L, 153L, 27L, 7L, 61L, 35L,
87L, 149L, 103L, 20L, 116L, 48L, 123L, 14L, 78L, 83L, 110L,
95L, 81L, 2L, 11L, 166L, 51L, 91L, 98L, 89L, 98L, 141L, 63L,
77L, 156L, 129L, 132L, 106L, 131L, 29L, 7L, 48L, 26L, 56L,
67L, 105L, 168L, 44L, 53L, 71L, 42L, 72L, 156L, 90L, 118L,
107L), .Label = c("126", "205", "209", "219", "231", "232",
"242", "244", "248", "256", "258", "259", "263", "268", "270",
"274", "276", "277", "280", "281", "284", "287", "289", "290",
"291", "292", "293", "294", "295", "296", "297", "299", "300",
"303", "305", "308", "309", "310", "313", "317", "318", "319",
"320", "321", "322", "323", "326", "327", "328", "329", "330",
"333", "334", "336", "337", "339", "340", "342", "344", "345",
"346", "348", "349", "350", "352", "353", "354", "355", "356",
"359", "360", "361", "362", "363", "364", "365", "366", "368",
"369", "370", "371", "372", "373", "374", "375", "376", "377",
"378", "379", "380", "381", "382", "383", "385", "386", "388",
"389", "390", "391", "393", "394", "395", "396", "397", "398",
"402", "405", "406", "407", "408", "411", "412", "413", "414",
"417", "418", "419", "420", "423", "424", "425", "427", "428",
"429", "430", "431", "433", "434", "435", "436", "437", "438",
"439", "442", "445", "446", "448", "449", "451", "453", "458",
"459", "460", "463", "467", "469", "471", "474", "480", "481",
"483", "491", "493", "497", "500", "503", "505", "507", "508",
"510", "519", "521", "522", "524", "525", "537", "546", "568",
"597", "608"), class = "factor"), TOW = structure(c(1L, 4L,
1L, 2L, 1L, 2L, 2L, 2L, 4L, 4L, 1L, 1L, 2L, 2L, 2L, 3L, 2L,
1L, 3L, 3L, 2L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 3L, 1L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 3L, 3L, 3L,
1L, 3L, 3L, 1L, 1L, 3L, 2L, 1L, 2L, 4L, 1L, 2L, 1L, 1L, 2L,
4L, 1L, 1L, 2L, 3L, 2L, 2L, 3L, 3L, 1L, 4L, 1L, 1L, 4L, 2L,
1L, 3L, 4L, 3L, 4L, 1L, 1L, 1L, 3L, 2L, 1L, 4L, 1L, 5L, 3L,
1L, 1L, 2L, 1L, 4L, 2L, 2L, 3L, 2L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 2L, 1L, 3L, 2L, 1L, 1L, 5L, 1L, 1L, 3L, 4L, 1L, 1L, 1L,
2L, 1L, 3L, 2L, 3L, 1L, 3L, 2L, 3L, 3L, 1L, 3L, 2L, 4L, 3L,
1L, 3L, 1L, 1L, 1L, 2L, 1L, 3L, 2L, 3L, 4L, 2L, 1L, 1L, 3L,
1L, 1L, 3L, 2L, 3L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
3L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 3L, 2L, 3L, 1L, 4L, 1L, 3L,
2L, 3L, 2L, 2L, 2L, 3L, 1L, 2L, 1L, 4L, 1L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 1L, 3L, 1L, 2L, 2L, 3L, 2L, 1L, 2L, 1L, 3L,
2L, 1L, 2L, 1L, 1L, 3L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 1L, 1L,
2L, 1L, 1L, 2L, 3L, 1L, 1L, 1L, 2L, 1L, 2L, 3L, 2L, 4L, 2L,
2L, 1L, 3L, 6L, 3L, 1L, 2L, 1L, 2L, 3L, 2L, 1L, 3L, 1L, 2L,
2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 4L, 1L), .Label = c("0",
"1", "2", "3", "4", "5"), class = "factor"), YdsL = structure(c(168L,
69L, 70L, 85L, 32L, 142L, 44L, 6L, 59L, 102L, 135L, 140L,
106L, 50L, 34L, 138L, 79L, 130L, 106L, 87L, 17L, 67L, 87L,
108L, 128L, 172L, 88L, 97L, 143L, 23L, 129L, 102L, 133L,
82L, 41L, 125L, 132L, 87L, 5L, 105L, 63L, 118L, 109L, 4L,
159L, 137L, 60L, 90L, 42L, 157L, 167L, 158L, 89L, 78L, 156L,
23L, 30L, 97L, 144L, 11L, 164L, 84L, 39L, 163L, 124L, 52L,
136L, 57L, 125L, 130L, 58L, 160L, 48L, 43L, 89L, 96L, 141L,
146L, 66L, 22L, 96L, 65L, 117L, 134L, 151L, 91L, 143L, 178L,
163L, 169L, 33L, 3L, 166L, 150L, 171L, 121L, 106L, 114L,
123L, 39L, 122L, 99L, 51L, 86L, 52L, 48L, 71L, 20L, 9L, 116L,
110L, 2L, 15L, 52L, 37L, 120L, 173L, 149L, 111L, 17L, 144L,
136L, 84L, 161L, 36L, 140L, 127L, 75L, 69L, 149L, 162L, 99L,
82L, 56L, 136L, 102L, 152L, 29L, 19L, 120L, 21L, 161L, 121L,
107L, 101L, 39L, 80L, 122L, 127L, 105L, 72L, 124L, 19L, 8L,
17L, 46L, 14L, 12L, 101L, 83L, 14L, 115L, 24L, 95L, 92L,
148L, 151L, 35L, 68L, 47L, 94L, 104L, 177L, 103L, 117L, 98L,
72L, 83L, 50L, 96L, 16L, 127L, 1L, 112L, 122L, 109L, 49L,
28L, 139L, 155L, 114L, 45L, 131L, 61L, 145L, 53L, 62L, 69L,
86L, 48L, 128L, 25L, 21L, 76L, 9L, 124L, 93L, 113L, 175L,
75L, 25L, 26L, 165L, 63L, 119L, 116L, 154L, 121L, 82L, 54L,
10L, 128L, 141L, 138L, 102L, 137L, 131L, 55L, 112L, 113L,
47L, 73L, 74L, 86L, 77L, 117L, 145L, 40L, 7L, 64L, 42L, 147L,
114L, 104L, 129L, 81L, 174L, 11L, 18L, 176L, 67L, 107L, 26L,
100L, 13L, 27L, 38L, 170L, 121L, 43L, 155L, 126L, 142L, 131L,
153L, 72L, 31L), .Label = c("125", "140", "142", "146", "156",
"173", "178", "186", "189", "190", "196", "201", "205", "210",
"211", "212", "213", "215", "216", "220", "221", "225", "226",
"227", "230", "231", "232", "240", "242", "243", "245", "246",
"247", "248", "249", "250", "254", "255", "256", "257", "258",
"263", "264", "265", "266", "267", "268", "270", "272", "273",
"274", "276", "280", "282", "284", "285", "287", "288", "289",
"290", "291", "292", "293", "294", "298", "299", "300", "301",
"302", "304", "305", "306", "307", "308", "309", "310", "312",
"313", "314", "316", "317", "318", "319", "320", "321", "322",
"323", "324", "325", "326", "328", "329", "331", "332", "333",
"334", "335", "338", "339", "341", "342", "343", "344", "345",
"347", "349", "350", "354", "355", "356", "359", "361", "363",
"364", "365", "366", "368", "369", "370", "371", "372", "375",
"376", "377", "380", "382", "383", "385", "386", "388", "389",
"391", "393", "394", "396", "397", "398", "399", "401", "402",
"406", "408", "409", "411", "412", "413", "414", "415", "416",
"417", "420", "422", "423", "424", "427", "428", "431", "432",
"433", "435", "436", "440", "444", "447", "452", "460", "461",
"464", "469", "472", "479", "493", "496", "502", "509", "512",
"538", "548"), class = "factor"), TOL = structure(c(2L, 3L,
4L, 6L, 3L, 2L, 4L, 3L, 1L, 2L, 3L, 2L, 3L, 3L, 2L, 3L, 6L,
2L, 4L, 4L, 1L, 2L, 4L, 4L, 4L, 3L, 3L, 3L, 2L, 4L, 1L, 6L,
4L, 2L, 2L, 3L, 4L, 5L, 5L, 2L, 2L, 3L, 3L, 1L, 3L, 2L, 4L,
2L, 1L, 1L, 2L, 2L, 2L, 3L, 5L, 3L, 3L, 1L, 6L, 2L, 4L, 3L,
2L, 3L, 1L, 2L, 2L, 1L, 1L, 5L, 4L, 7L, 3L, 3L, 3L, 5L, 3L,
4L, 5L, 3L, 1L, 4L, 2L, 4L, 3L, 2L, 1L, 2L, 2L, 4L, 4L, 1L,
5L, 3L, 2L, 2L, 2L, 5L, 4L, 3L, 5L, 4L, 1L, 2L, 3L, 3L, 1L,
1L, 1L, 2L, 4L, 1L, 4L, 3L, 3L, 1L, 5L, 2L, 5L, 1L, 2L, 2L,
1L, 5L, 3L, 3L, 4L, 3L, 1L, 3L, 5L, 2L, 5L, 3L, 5L, 2L, 2L,
3L, 2L, 3L, 6L, 2L, 1L, 3L, 4L, 3L, 2L, 5L, 5L, 3L, 1L, 2L,
1L, 6L, 5L, 3L, 2L, 3L, 2L, 2L, 4L, 3L, 2L, 3L, 4L, 3L, 2L,
3L, 2L, 2L, 4L, 4L, 5L, 2L, 3L, 1L, 2L, 2L, 3L, 2L, 2L, 2L,
2L, 4L, 3L, 2L, 4L, 4L, 2L, 3L, 2L, 2L, 4L, 1L, 3L, 2L, 2L,
3L, 3L, 2L, 4L, 5L, 1L, 3L, 2L, 2L, 2L, 2L, 3L, 5L, 2L, 2L,
1L, 3L, 2L, 4L, 5L, 3L, 5L, 4L, 3L, 3L, 3L, 1L, 2L, 3L, 4L,
2L, 2L, 4L, 2L, 3L, 4L, 2L, 4L, 2L, 3L, 5L, 5L, 2L, 5L, 3L,
1L, 4L, 4L, 2L, 2L, 1L, 5L, 5L, 4L, 3L, 2L, 3L, 2L, 4L, 2L,
3L, 3L, 4L, 4L, 5L, 3L, 1L, 3L, 2L, 4L), .Label = c("0",
"1", "2", "3", "4", "5", "6"), class = "factor")), .Names = c("Week",
"Day", "Date", "Winner/tie", "V5", "Loser/tie", "PtsW", "PtsL",
"YdsW", "TOW", "YdsL", "TOL"), row.names = c(NA, -267L), class = "data.frame")
Upvotes: 2
Views: 106
Reputation: 3364
Here is a summarized solution as an extension of @thepule answer
One of the main problem is that the data is not "tidy" (http://garrettgman.github.io/tidying/). I.e. the team has is a single variable and the outcome is a single variable
#wtf <- structure(.....
# in the previous structure the continous variables were encoded as factors and should be transformed to integers
for(i in 7:11) {
wtf[[i]] <- as.integer(as.character(wtf[[i]]))
}
wtf %>%
tbl_df() %>%
tidyr::gather(key = "outcome", value = "team", 4, 6) %>%
group_by(team, outcome, V5) %>%
summarise(meanYds = mean(ifelse(outcome == "Winner/tie",
(YdsW),
(YdsL))))
Explanation
wtf %>% tbl_df()
simple puts the data.frame
into a nice classtidyr::gather
converts the "flattens" the teams and outcom of the game into two columnsteam
, outcome
, V5
meaning that for each distinct combination of the values for these variables we can compute some measure ...ifelse
will return a vector
of winning or losing yeards and we just compute the mean.Which should result in the table containing for all team, outcome, and home advantage the mean yards across games.
Upvotes: 1
Reputation: 1751
Alright, based on the data frame you dputted it seems that the following should provide you with the information you need, although maybe in a slightly different format.
First I load dplyr
and convert the Yard number in numeric format:
library(dplyr)
df$YdsW <- as.numeric(df$YdsW)
df$YdsL <- as.numeric(df$YdsL)
Than, based on my understanding of the data set:
Avg_win_yard <- df %>%
group_by(`Winner/tie`, V5) %>%
summarise(Total_yard = mean(YdsW))
Looking at the first 4 rows of the data frame, they contain:
head(Avg_win_yard, 4)
Source: local data frame [4 x 3]
Groups: Winner/tie [2]
Winner/tie V5 Total_yard
(fctr) (fctr) (dbl)
1 Arizona Cardinals 109.3333
2 Arizona Cardinals @ 108.0000
3 Atlanta Falcons 111.4000
4 Atlanta Falcons @ 93.7500
The way I interpret this is the Cardinals won running an average 109.3 yards playing home and 108 playing outside of home.
Similarly, for the losing team:
Avg_lose_yard <- df %>%
group_by(`Loser/tie`, V5) %>%
summarise(Total_yard = mean(YdsL))
head(Avg_lose_yard, 4)
Source: local data frame [4 x 3]
Groups: Loser/tie [2]
Loser/tie V5 Total_yard
(fctr) (fctr) (dbl)
1 Arizona Cardinals 169.00
2 Arizona Cardinals @ 95.50
3 Atlanta Falcons 80.75
4 Atlanta Falcons @ 133.50
This I interpret as the cardinals lost running 169 Avg yard playing outside home and 95.50 playing home. Once you have the information, you can dissect it as you want.
Upvotes: 0
Reputation: 10360
Perhaps you try a foreach
loop. Thus, you can use the power of all your CPU kernels. This example uses 4 cores, you should change according to your hardware.
library(foreach)
library(doSNOW)
ncpus <- 4
cl <- makeCluster(ncpus, type = "SOCK")
registerDoSNOW(cl)
f <- foreach(i = 1:nrow(teamList), .combine = rbind, .inorder = F) %dopar% {
for (y in 1:nrow(grossM)){
homeV1 <- matrix(0,1000000,1)
awayV1 <- matrix(0,1000000,1)
homeV2 <- matrix(0,1000000,1)
awayV2 <- matrix(0,1000000,1)
if (teamList[i,]==grossM[y,4]&grossM[y,5]=="@")(awayV1[y,1] <- grossM[y,9]) else 0
if (teamList[i,]==grossM[y,4]&grossM[y,5]=="")(homeV1[y,1] <- grossM[y,9]) else 0
if (teamList[i,]==grossM[y,6]&grossM[y,5]=="")(awayV2[y,1] <- grossM[y,11]) else 0
if (teamList[i,]==grossM[y,6]&grossM[y,5]=="@")(homeV2[y,1] <- grossM[y,11]) else 0
# .... #
}
data.frame(team = teamList[i], awayV1, homeV1, awayV2, homeV2)
}
Upvotes: 0
Reputation: 2797
Forgive me if this isn't clear, but this is my first Stack overflow post.
I assume you are reading this in from pro-football-reference. I cleaned it up as presumably you did.
library(XML)
library(dplyr)
First read in the data
url <- "http://www.pro-football-reference.com/years/2015/games.htm"
nfl_weeks <- XML::readHTMLTable(url)[[1]]
Then clean the data
nfl_weeks <- nfl_weeks[nfl_weeks$Week != "Week",][,-7]
names(nfl_weeks)[5] <- 'vs'
nfl_weeks[,7:ncol(nfl_weeks)] <- apply(nfl_weeks[,7:ncol(nfl_weeks)],2,as.numeric)
Followed by split-apply-combine strategy
# split
s <- split(nfl_weeks, nfl_weeks$`Winner/tie`)[2:33]
YdsW <- lapply(s, function(i) mYdsW = mean(i$YdsW))
YdsW <- do.call("c", YdsW)
# apply
YdsL<- lapply(s, function(i) mYdsW = mean(i$YdsL))
# combine
YdsL <- do.call("c", YdsL)
Upvotes: 1