Reputation: 51
I want to determine which variables are best suited for the model. To do this, I use the method of all possible regressions, i.e. build models with all possible combinations of predictors. For example:
library(fpp2)
# uschange is dataset from "fpp2" package
train <- ts(uschange[1:180, ], start = 1970, frequency = 5)
fit1 <- tslm(Consumption ~ Income, data = train)
fit2 <- tslm(Consumption ~ Production, data = train)
fit3 <- tslm(Consumption ~ Savings, data = train)
fit4 <- tslm(Consumption ~ Unemployment, data = train)
fit5 <- tslm(Consumption ~ Income + Production, data = train)
fit6 <- tslm(Consumption ~ Income + Savings, data = train)
# and so on...
After that, I need to evaluate the models in two ways:
test <- data.frame(
Income = uschange[181:187, 2],
Production = uschange[181:187, 3],
Savings = uschange[181:187, 4],
Unemployment = uschange[181:187, 5]
)
y <- uschange[181:187, 1]
CV(fit1)
accuracy(forecast(fit1, test), y)
CV(fit2)
accuracy(forecast(fit2, test), y)
CV(fit3)
accuracy(forecast(fit3, test), y)
# and so on...
As a result, I want to get a model with the smallest value of AICc
from CV()
and with the smallest error value (for example MAE
from accuracy()
).
How can I do this automatically?
EDIT:
> dput(head(uschange, 20))
structure(c(0.615986218, 0.46037569, 0.876791423, -0.274245141,
1.897370758, 0.911992909, 0.794538845, 1.648587467, 1.313722178,
1.891474954, 1.530714, 2.318294715, 1.81073916, -0.041739961,
0.354235565, -0.291632155, -0.877027936, 0.351135548, 0.409597702,
-1.475808634, 0.972261043, 1.169084717, 1.55327055, -0.255272381,
1.987153628, 1.447334175, 0.531811929, 1.160125137, 0.457011505,
1.016624409, 1.904101264, 3.890258661, 0.708252663, 0.79430954,
0.433818275, 1.093809792, -1.661684821, -0.938353209, 0.094487794,
-0.122595985, -2.452700312, -0.551525087, -0.358707862, -2.185454855,
1.90973412, 0.901535843, 0.308019416, 2.291304415, 4.149573867,
1.89062398, 1.273352897, 3.436892066, 2.799076357, 0.817688618,
0.868996932, 1.472961869, -0.882483578, 0.074279194, -0.41314971,
-4.064118932, 4.810311502, 7.287992337, 7.289013063, 0.985229644,
3.657770614, 6.051341804, -0.445832214, -1.53087186, -4.35859438,
-5.054525795, 5.809959038, 16.04471706, -5.348868495, 8.426034362,
2.758795652, 11.14642986, -2.533514487, -6.592644641, 0.51717884,
11.3433954, 0.9, 0.5, 0.5, 0.7, -0.1, -0.1, 0.1, 0, -0.2, -0.1,
-0.2, -0.3, -0.3, 0, -0.1, 0.1, 0.2, 0.3, 0.5, 1.3), .Dim = c(20L,
5L), .Dimnames = list(NULL, c("Consumption", "Income", "Production",
"Savings", "Unemployment")), .Tsp = c(1970, 1974.75, 4), class = c("mts",
"ts", "matrix"))
Upvotes: 0
Views: 349
Reputation: 11514
Try this:
# get all names of predictors
cols <- colnames(uschange)[-1]
# create all combinations
out <- unlist(lapply(1:length(cols), function(n) combn(cols, n, FUN=function(row) paste0("Consumption ~ ", paste0(row, collapse = "+")))))
# fit models:
mods = lapply(out, function(frml) tslm(frml, data=train))
# define helper function:
cv_this <- function(x){
return(list('cv' = CV(x), 'acc' = accuracy(forecast(x, test), y)))
}
# run helper function over all models to get evaluations out:
lapply(mods, cv_this)
Upvotes: 1