pssguy
pssguy

Reputation: 3515

How do I create a function for plyr

I have a series of soccer results and wish to find out how many points a team has scored in a particular number of games

Here is the head of a subset with the cumulative points scored during a season since the latest result

I have been wrist=-slapped a couple of times for not using dput so bear with length

allData <- structure(list(team = c("Arsenal", "Tottenham H", "Tottenham H", 
"Arsenal", "Arsenal", "Tottenham H"), venue = c("H", "A", "H", 
"A", "H", "A"), result = c("W", "D", "W", "L", "W", "D"), GF = c(1L, 
0L, 3L, 1L, 3L, 0L), GA = c(0L, 0L, 1L, 2L, 0L, 0L), gameDate = structure(c(1333868400, 
1333782000, 1333263600, 1333177200, 1332572400, 1332572400), class = c("POSIXct", 
"POSIXt"), tzone = ""), season = structure(c(2L, 2L, 2L, 2L, 
2L, 2L), .Label = c("2010/2011", "2011/2012"), class = "factor"), 
 points = c(3, 1, 3, 0, 3, 1), GD = c(1L, 0L, 2L, -1L, 3L, 
0L), cumpts = c(3, 1, 4, 3, 6, 5)), .Names = c("team", "venue", 
"result", "GF", "GA", "gameDate", "season", "points", "GD", "cumpts"
), row.names = c(NA, 6L), class = "data.frame")

and here is the data for one team during one season

spurs <- structure(list(team = c("Tottenham H", "Tottenham H", "Tottenham H", 
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", 
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", 
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", 
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", 
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H", 
"Tottenham H", "Tottenham H", "Tottenham H", "Tottenham H"), 
    venue = c("A", "H", "A", "H", "A", "H", "A", "H", "A", "H", 
    "A", "H", "H", "H", "A", "A", "H", "H", "A", "H", "A", "H", 
    "A", "H", "A", "A", "H", "A", "H", "A", "H", "A"), result = c("D", 
    "W", "D", "D", "L", "L", "L", "W", "D", "W", "L", "D", "W", 
    "W", "D", "W", "D", "W", "L", "W", "W", "W", "W", "W", "W", 
    "D", "W", "W", "W", "W", "L", "L"), GF = c(0L, 3L, 0L, 1L, 
    0L, 1L, 2L, 5L, 0L, 3L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 
    3L, 3L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 4L, 2L, 1L, 0L), GA = c(0L, 
    1L, 0L, 1L, 1L, 3L, 5L, 0L, 0L, 1L, 3L, 1L, 0L, 0L, 1L, 0L, 
    1L, 0L, 2L, 0L, 1L, 0L, 1L, 1L, 1L, 2L, 1L, 1L, 0L, 0L, 5L, 
    3L), gameDate = structure(c(1333782000, 1333263600, 1332572400, 
    1332313200, 1331366400, 1330848000, 1330243200, 1328947200, 
    1328515200, 1327996800, 1327219200, 1326528000, 1326268800, 
    1325577600, 1325318400, 1324972800, 1324540800, 1324281600, 
    1323590400, 1322899200, 1322294400, 1321862400, 1320562800, 
    1319958000, 1319353200, 1318748400, 1317538800, 1316847600, 
    1316329200, 1315638000, 1314514800, 1313996400), class = c("POSIXct", 
    "POSIXt"), tzone = ""), season = structure(c(2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("2010/2011", 
    "2011/2012"), class = "factor"), points = c(1, 3, 1, 1, 0, 
    0, 0, 3, 1, 3, 0, 1, 3, 3, 1, 3, 1, 3, 0, 3, 3, 3, 3, 3, 
    3, 1, 3, 3, 3, 3, 0, 0), GD = c(0L, 2L, 0L, 0L, -1L, -2L, 
    -3L, 5L, 0L, 2L, -1L, 0L, 2L, 1L, 0L, 2L, 0L, 1L, -1L, 3L, 
    2L, 2L, 2L, 2L, 1L, 0L, 1L, 1L, 4L, 2L, -4L, -3L), cumpts = c(1, 
    4, 5, 6, 6, 6, 6, 9, 10, 13, 13, 14, 17, 20, 21, 24, 25, 
    28, 28, 31, 34, 37, 40, 43, 46, 47, 50, 53, 56, 59, 59, 59
    )), .Names = c("team", "venue", "result", "GF", "GA", "gameDate", 
"season", "points", "GD", "cumpts"), row.names = c(NA, -32L), class = "data.frame")

I then have this code on the spurs dataframe to calculate points scored in specific game lengths(here 5)

gameLength <- 5
seasonLength <- nrow(spurs)
cumPoints <- c()
cumPoints[1] <- spurs[gameLength,]$cumpts
for (i in gameLength+1:seasonLength) {
cumPoints[i-(gameLength-1)] <- ((spurs[i,]$cumpts)- 
 (spurs[i-gameLength,]$cumpts))
}
cumPoints <- cumPoints[!is.na(cumPoints)] # not sure why throws up NAs

This produces the correct output

 [1]  6  5  2  4  4  7  7  8  8 10  8 11 11 11  8 10 10 12 12 15 15
[22] 13 13 13 13 13 12  9

but I need to be able to transform the allData with a column containing this data for every season and team in the dataframe.

I am assuming I should use ddply somehow, unless there is a better alternative

Upvotes: 1

Views: 287

Answers (1)

Justin
Justin

Reputation: 43265

To replicate your output:

library(zoo)
rollapply(spurs$GD, gamelength, sum)

And if you have allData that looks like the spurs data.frame...

rollsum <- function(df, gamelen=gamelength) {
  require(zoo)
  out <- rollapply(df$points, gamelen, sum)

  return(out)
}

ddply(allData, .(team), rollsum)

Upvotes: 1

Related Questions