Reputation: 3515
I have a series of soccer results and wish to find out how many points a team has scored in a particular number of games
Here is the head of a subset with the cumulative points scored during a season since the latest result
I have been wrist=-slapped a couple of times for not using dput so bear with length
allData <- structure(list(team = c("Arsenal", "Tottenham H", "Tottenham H",
"Arsenal", "Arsenal", "Tottenham H"), venue = c("H", "A", "H",
"A", "H", "A"), result = c("W", "D", "W", "L", "W", "D"), GF = c(1L,
0L, 3L, 1L, 3L, 0L), GA = c(0L, 0L, 1L, 2L, 0L, 0L), gameDate = structure(c(1333868400,
1333782000, 1333263600, 1333177200, 1332572400, 1332572400), class = c("POSIXct",
"POSIXt"), tzone = ""), season = structure(c(2L, 2L, 2L, 2L,
2L, 2L), .Label = c("2010/2011", "2011/2012"), class = "factor"),
points = c(3, 1, 3, 0, 3, 1), GD = c(1L, 0L, 2L, -1L, 3L,
0L), cumpts = c(3, 1, 4, 3, 6, 5)), .Names = c("team", "venue",
"result", "GF", "GA", "gameDate", "season", "points", "GD", "cumpts"
), row.names = c(NA, 6L), class = "data.frame")
and here is the data for one team during one season
spurs <- structure(list(team = c("Tottenham H", "Tottenham H", "Tottenham H",
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H",
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H",
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H",
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H",
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H",
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H"),
venue = c("A", "H", "A", "H", "A", "H", "A", "H", "A", "H",
"A", "H", "H", "H", "A", "A", "H", "H", "A", "H", "A", "H",
"A", "H", "A", "A", "H", "A", "H", "A", "H", "A"), result = c("D",
"W", "D", "D", "L", "L", "L", "W", "D", "W", "L", "D", "W",
"W", "D", "W", "D", "W", "L", "W", "W", "W", "W", "W", "W",
"D", "W", "W", "W", "W", "L", "L"), GF = c(0L, 3L, 0L, 1L,
0L, 1L, 2L, 5L, 0L, 3L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L,
3L, 3L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 4L, 2L, 1L, 0L), GA = c(0L,
1L, 0L, 1L, 1L, 3L, 5L, 0L, 0L, 1L, 3L, 1L, 0L, 0L, 1L, 0L,
1L, 0L, 2L, 0L, 1L, 0L, 1L, 1L, 1L, 2L, 1L, 1L, 0L, 0L, 5L,
3L), gameDate = structure(c(1333782000, 1333263600, 1332572400,
1332313200, 1331366400, 1330848000, 1330243200, 1328947200,
1328515200, 1327996800, 1327219200, 1326528000, 1326268800,
1325577600, 1325318400, 1324972800, 1324540800, 1324281600,
1323590400, 1322899200, 1322294400, 1321862400, 1320562800,
1319958000, 1319353200, 1318748400, 1317538800, 1316847600,
1316329200, 1315638000, 1314514800, 1313996400), class = c("POSIXct",
"POSIXt"), tzone = ""), season = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("2010/2011",
"2011/2012"), class = "factor"), points = c(1, 3, 1, 1, 0,
0, 0, 3, 1, 3, 0, 1, 3, 3, 1, 3, 1, 3, 0, 3, 3, 3, 3, 3,
3, 1, 3, 3, 3, 3, 0, 0), GD = c(0L, 2L, 0L, 0L, -1L, -2L,
-3L, 5L, 0L, 2L, -1L, 0L, 2L, 1L, 0L, 2L, 0L, 1L, -1L, 3L,
2L, 2L, 2L, 2L, 1L, 0L, 1L, 1L, 4L, 2L, -4L, -3L), cumpts = c(1,
4, 5, 6, 6, 6, 6, 9, 10, 13, 13, 14, 17, 20, 21, 24, 25,
28, 28, 31, 34, 37, 40, 43, 46, 47, 50, 53, 56, 59, 59, 59
)), .Names = c("team", "venue", "result", "GF", "GA", "gameDate",
"season", "points", "GD", "cumpts"), row.names = c(NA, -32L), class = "data.frame")
I then have this code on the spurs dataframe to calculate points scored in specific game lengths(here 5)
gameLength <- 5
seasonLength <- nrow(spurs)
cumPoints <- c()
cumPoints[1] <- spurs[gameLength,]$cumpts
for (i in gameLength+1:seasonLength) {
cumPoints[i-(gameLength-1)] <- ((spurs[i,]$cumpts)-
(spurs[i-gameLength,]$cumpts))
}
cumPoints <- cumPoints[!is.na(cumPoints)] # not sure why throws up NAs
This produces the correct output
[1] 6 5 2 4 4 7 7 8 8 10 8 11 11 11 8 10 10 12 12 15 15
[22] 13 13 13 13 13 12 9
but I need to be able to transform the allData with a column containing this data for every season and team in the dataframe.
I am assuming I should use ddply somehow, unless there is a better alternative
Upvotes: 1
Views: 287
Reputation: 43265
To replicate your output:
library(zoo)
rollapply(spurs$GD, gamelength, sum)
And if you have allData that looks like the spurs data.frame...
rollsum <- function(df, gamelen=gamelength) {
require(zoo)
out <- rollapply(df$points, gamelen, sum)
return(out)
}
ddply(allData, .(team), rollsum)
Upvotes: 1