Reputation: 51
Date Return
2000 0.1
2001 0.2
2002 0.3
2003 0.4
2004 0.5
2005 0.6
2006 0.7
2007 0.8
2008 0.9
Using the example data, I would like to generate a column such as the '3-Year Sum' below. In which the 3rd entry on the '3-Year Sum' column is the value of the previous 3 years.
Date Value 3-Year Sum
2000 0.1 -
2001 0.2 -
2002 0.3 0.6
2003 0.4 -
2004 0.5 -
2005 0.6 1.5
2006 0.7 -
2007 0.8 -
2008 0.9 2.4
Many Thanks
Upvotes: 1
Views: 52
Reputation: 39603
Try this using a loop and an index:
#Data
df <- structure(list(Date = 2000:2008, Return = c(0.1, 0.2, 0.3, 0.4,
0.5, 0.6, 0.7, 0.8, 0.9)), class = "data.frame", row.names = c(NA,
-9L))
#Code
#Create index
seqi <- seq(3,dim(df)[1],by = 3)
#Allocate in var
df$var<- NA
#Sum
for(i in seqi)
{
df[i,'var']<-sum(df$Return[(i-2):i],na.rm=T)
}
Output:
Date Return var
1 2000 0.1 NA
2 2001 0.2 NA
3 2002 0.3 0.6
4 2003 0.4 NA
5 2004 0.5 NA
6 2005 0.6 1.5
7 2006 0.7 NA
8 2007 0.8 NA
9 2008 0.9 2.4
For multiple columns here an approach but have to choose the columns and allocate the results in other dataframe. After that you can combine them. All the code to do that is next:
#Data
df <- structure(list(Date = 2000:2008, Return = c(0.1, 0.2, 0.3, 0.4,
0.5, 0.6, 0.7, 0.8, 0.9), Return1 = c(0.1, 0.2, 0.3, 0.4, 0.5,
0.6, 0.7, 0.8, 0.9), Return2 = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6,
0.7, 0.8, 0.9)), row.names = c(NA, -9L), class = "data.frame")
#Function
compute <- function(x)
{
#Code
#Create index
seqi <- seq(3,length(x),by = 3)
#Create empty vector
empty <- rep(NA,length(x))
#Sum
for(i in seqi)
{
empty[i]<-sum(x[(i-2):i],na.rm=T)
}
return(empty)
}
Now apply:
#Apply
add <- as.data.frame(apply(df[,-c(1)],2,function(x) compute(x)))
names(add) <- paste0('Dif.',names(add))
#Final df
ndf <- cbind(df,add)
Date Return Return1 Return2 Dif.Return Dif.Return1 Dif.Return2
1 2000 0.1 0.1 0.1 NA NA NA
2 2001 0.2 0.2 0.2 NA NA NA
3 2002 0.3 0.3 0.3 0.6 0.6 0.6
4 2003 0.4 0.4 0.4 NA NA NA
5 2004 0.5 0.5 0.5 NA NA NA
6 2005 0.6 0.6 0.6 1.5 1.5 1.5
7 2006 0.7 0.7 0.7 NA NA NA
8 2007 0.8 0.8 0.8 NA NA NA
9 2008 0.9 0.9 0.9 2.4 2.4 2.4
Upvotes: 1
Reputation: 1224
Using data.table:
> library(data.table)
> DT <- data.table(Date = 2000:2008, Return = (1:9)/10)
> DT[, index := ceiling(seq_along(1:nrow(DT)) / 3)]
> DT[, `3-Year Sum` := .(ifelse(.I %% 3 == 0, cumsum(Return), NA)), by = index]
> DT[, index := NULL]
> DT
Date Return 3-Year Sum
1: 2000 0.1 NA
2: 2001 0.2 NA
3: 2002 0.3 0.6
4: 2003 0.4 NA
5: 2004 0.5 NA
6: 2005 0.6 1.5
7: 2006 0.7 NA
8: 2007 0.8 NA
9: 2008 0.9 2.4
Or with the data.table::frollsum function.
> DT <- data.table(Date = 2000:2008, Return = (1:9)/10)
> DT[, `3-Year Sum` := ifelse(.I %% 3 == 0, frollsum(Return, n = 3), NA)]
> DT
Date Return 3-Year Sum
1: 2000 0.1 NA
2: 2001 0.2 NA
3: 2002 0.3 0.6
4: 2003 0.4 NA
5: 2004 0.5 NA
6: 2005 0.6 1.5
7: 2006 0.7 NA
8: 2007 0.8 NA
9: 2008 0.9 2.4
For many columns at once.
> DT <- data.table(Date = 2000:2008, Return = (1:9)/10)
> DT[, Return2 := Return*10]
> for (i in c('Return', 'Return2')) {
new_col_name <- paste0(i, '_3_year_sum')
DT[, c(new_col_name) := ifelse(.I %% 3 == 0, frollsum(get(i), n = 3), NA)]
}
> DT
Date Return Return2 Return_3_year_sum Return2_3_year_sum
1: 2000 0.1 1 NA NA
2: 2001 0.2 2 NA NA
3: 2002 0.3 3 0.6 6
4: 2003 0.4 4 NA NA
5: 2004 0.5 5 NA NA
6: 2005 0.6 6 1.5 15
7: 2006 0.7 7 NA NA
8: 2007 0.8 8 NA NA
9: 2008 0.9 9 2.4 24
Upvotes: 0
Reputation: 35584
A base
R solution with ave()
:
df$Sum <- ave(df$Return, (1:nrow(df)-1) %/% 3, FUN = sum)
df$Sum[-seq(3, nrow(df), by = 3)] <- NA
# Date Return Sum
# 1 2000 0.1 NA
# 2 2001 0.2 NA
# 3 2002 0.3 0.6
# 4 2003 0.4 NA
# 5 2004 0.5 NA
# 6 2005 0.6 1.5
# 7 2006 0.7 NA
# 8 2007 0.8 NA
# 9 2008 0.9 2.4
# 10 2009 1.0 NA
# 11 2010 1.1 NA
Data
df <- data.frame(Date = 2000 + 0:10, Return = 1:11/10)
Upvotes: 0
Reputation: 4187
You could also approach this as follows in base R without a for-loop:
grp <- rep(1:(ceiling(nrow(df)/3)), each = 3, length.out = nrow(df))
df$three.yr.sum <- ave(df$Return, grp, FUN = function(x) zoo::rollsumr(x, k = 3, na.pad = TRUE))
The result:
> df
Date Return three.yr.sum
1 2000 0.1 NA
2 2001 0.2 NA
3 2002 0.3 0.6
4 2003 0.4 NA
5 2004 0.5 NA
6 2005 0.6 1.5
7 2006 0.7 NA
8 2007 0.8 NA
9 2008 0.9 2.4
Upvotes: 0