Reputation: 33
I am trying to perform a rolling regression for time t over the last 36 months for companies with observations for 18 of these months, but I am not able to make the function work. I only want the coefficient for var1. X, y, z are control variables.
Here is a sample of the data and the code I am trying to run.
structure(list(Year = c(2018, 2014, 2008, 2004, 2005, 2002, 2010,
2008, 2013, 1998), Month = c(6, 12, 4, 6, 4, 8, 12, 11, 3, 3),
ISIN = c("NO0004895103", "NO0010571680", "NO0010010473",
"NO0003079709", "NO0003117202", "NO0003073801", "NO0010379266",
"NO0004913609", "NO0003072407", "NO0003679102"), SIZE = c(3637822300,
1.155e+10, 1925631048.5, 519688494.5, 790931587.08, 127597142.4,
2892152759.3, 554108244.7, 73484767, 375946560), BM = c(7.47698486077218e-07,
7.92202995594714e-07, 1.9900582030005e-07, 3.59385930497676e-07,
6.54209144403066e-07, 6.72227955087816e-07, 1.43273711611857e-06,
5.15824596832591e-07, 1.23252873661613e-06, 2.79726876987137e-07
), OP = c(0.145781283498513, 0.0433749257278669, 0.130477272126991,
0.211294037715838, 0.127523990874984, -0.0185266025249259,
-0.0822835066759631, 0.128844560922287, -0.0155446815227611,
0.416024745410667), INVEST = c(1.58608106515088, 0.00307252384303782,
0.33925195328069, -0.0984424241606425, 0.333479950150282,
0.13302885162465, -0.035773912311751, 0.408569401011161,
-0.206778240645154, -0.177187857233583), MonthlyReturn = c(-0.019509251810137,
0.095308641975309, 0.14864864864865, -0.034364261168385,
-0.064512147964095, -0.14080459770115, 0.13580244980708,
-0.1890214797136, -0.045226130653266, 0.39682527166336),
RiskFreeRate = c(0.00064, 0.00117, 0.00532, 0.00163, 0.00166,
0.00594, 0.00206, 0.00499, 0.00153, 0.00332), ShareTurnover = c(31649L,
907793L, 5318465L, 1831390L, 8956640L, 302000L, 7333090L,
516000L, 75553L, 2836550L), MarketExcessReturn = c(1.0813,
2.7638, 11.5701, 6.4672, -3.3108, -1.8598, 10.5445, -9.0969,
-0.0793, 10.8305), ExcessReturn = c(-2.0149251810137, 9.4138641975309,
14.332864864865, -3.5994261168385, -6.6172147964095, -14.674459770115,
13.374244980708, -19.40114797136, -4.6756130653266, 39.350527166336
), TradeDate = structure(c(17711, 16434, 13999, 12599, 12902,
11929, 14973, 14211, 15791, 10316), class = "Date"), GR_SIZE = structure(c(2L,
3L, 2L, 2L, 2L, 1L, 3L, 1L, 1L, 1L), .Label = c("1", "2",
"3"), class = "factor"), GR_OP = structure(c(2L, 1L, 2L,
2L, 1L, 1L, 1L, 2L, 1L, 3L), .Label = c("1", "2", "3"), class = "factor"),
GR_BM = structure(c(2L, 2L, 1L, 1L, 2L, 2L, 3L, 2L, 2L, 1L
), .Label = c("1", "2", "3"), class = "factor"), GR_INVEST = structure(c(3L,
1L, 3L, 1L, 3L, 3L, 2L, 3L, 1L, 1L), .Label = c("1", "2",
"3"), class = "factor"), SIZE_BM = structure(c(5L, 8L, 4L,
4L, 5L, 2L, 9L, 2L, 2L, 1L), .Label = c("11", "12", "13",
"21", "22", "23", "31", "32", "33"), class = "factor"), SIZE_OP = structure(c(5L,
7L, 5L, 5L, 4L, 1L, 7L, 2L, 1L, 3L), .Label = c("11", "12",
"13", "21", "22", "23", "31", "32", "33"), class = "factor"),
SIZE_INVEST = structure(c(6L, 7L, 6L, 4L, 6L, 3L, 8L, 3L,
1L, 1L), .Label = c("11", "12", "13", "21", "22", "23", "31",
"32", "33"), class = "factor"), Date = structure(c(17683,
16405, 13970, 12570, 12874, 11900, 14944, 14184, 15765, 10286
), class = "Date"), EPU_Paper = c(197.436482473082, 181.040599101032,
58.1799902251583, 62.5412044042803, 57.4799138334861, 52.420100605017,
61.1549363311955, 92.1818760618723, 76.2564063202547, 118.390708950295
)), row.names = c(NA, -10L), class = "data.frame")
#merge dataframes
df_Final <- merge(df_Final, EPU_Paper, by = c("Year", "Month"))
# Create empty dataframe
date <- seq(as.Date("1991-01-31"),as.Date("2019-12-30"), by = "month")
Beta <- data.frame(ISIN = character(), Beta = numeric(), date =
as.Date(character()), stringsAsFactors = FALSE)
## Model
Returns_length <- 36 # No. of observations in rolling regression
Returns_req <- 18 # observations of stocks requred in period
for (i in 1:(length(date)-Returns_length)) {
beta.tab <- df_Final %>% filter(TradeDate >= date[i] & TradeDate < date[i+Returns_length]) %>%
group_by(ISIN) %>% filter(n() >= Returns_req) %>%
do(ols.model = lm(data = df_Final, formula = ExcessReturn ~ var1 + x + y + z)) %>%
mutate(Beta = coef(ols.model)[2]) %>% select("ISIN", "Beta") %>%
mutate(TradeDate = date[Returns_length+i])
Beta <- rbind(Beta, beta.tab)}
Upvotes: 1
Views: 242
Reputation: 269491
There seems to be multiple undefined inputs, df_Final
and EU_paper
, in the question but assuming that the data is in time order within group and that we want to apply lm
to w rows from the current row to the row w-1 prior then using some generated data in the Note at the end we define a coef.x
function to calculate the desired coefficient and a rolling function roll
which uses it (here with width 2) and then apply it by group g
.
library(zoo)
w <- 2
nr <- nrow(DF)
coef.x <- function(ix) coef(lm(y ~ x + x1, DF, subset = ix))[["x"]]
roll <- function(ix) rollapplyr(ix, w, coef.x, fill = NA)
transform(DF, coef.x = ave(1:nr, g, FUN = roll))
giving:
y x x1 g coef.x
1 1 -0.56047564655 0.4609162060 1 NA
2 2 -0.23017748948 -1.2650612346 1 3.0275676040
3 3 1.55870831415 -0.6868528519 1 0.5590071753
4 4 0.07050839142 -0.4456619701 2 NA
5 5 0.12928773516 1.2240817974 2 17.0127792594
6 6 1.71506498688 0.3598138271 2 0.6306055904
This could also be written using dplyr where w
, roll
and coef.x
are from above:
library(dplyr, exclude = c("filter", "lag"))
library(zoo)
DF %>%
group_by(g) %>%
mutate(coef.x = roll(cur_group_rows())) %>%
ungroup
# test data
set.seed(123)
DF <- data.frame(y = 1:6, x = rnorm(6), x1 = rnorm(6),
g = c(1, 1, 1, 2, 2, 2))
Upvotes: 1