Reputation: 143
I have a model below
A = matrix(1, nrow = 400, ncol = 400)
A = 0.5**abs(row(A) - col(A))
X=mvtnorm::rmvnorm(300,mean=rep(0,400),sigma=A)))
Y=X[,6]+X[,12]+X[,15]+X[,20]+0.7*pnorm(X[,1])*rnorm(300)
df <- data.frame(Y, X)
and i want to fit least squares lasso to it. I need to summarise the following:
How can i calculate 1. and 2.? The results should be 100% and 6%.
av_model_size <- c(NULL)
a1 <- c(NULL)
a2 <- c(NULL)
for (i in 1:100) {
lassocv <- glmnet::cv.glmnet(X, Y, alpha = 1)
modelcv <- glmnet::glmnet(X, Y, alpha = 1, lambda = lassocv$lambda.min, standardize = TRUE)
lasso.coef <- modelcv$beta
av_model_size[[i]] <- sum(lasso.coef!=0)
a1[[i]] <- sum(lasso.coef[6] != 0 && lasso.coef[12] != 0 && lasso.coef[15] != 0 && lasso.coef[20] != 0)
a2[[i]] <- sum(lasso.coef[1] != 0)
}
mean(ams)
Upvotes: 2
Views: 76
Reputation: 11514
You will need to include the data generation in your simulation process. As you have outlined your example, you are executing the functions on the same set of data. Consider the following:
set.seed(42)
get_simulated_data <- function(){
A = matrix(1, nrow = 400, ncol = 400)
A = 0.5**abs(row(A) - col(A))
X = mvtnorm::rmvnorm(300,mean=rep(0,400),sigma=A)
Y = X[,6]+X[,12]+X[,15]+X[,20]+0.7*pnorm(X[,1])*rnorm(300)
dat <- list(Y=Y, X=X)
return(dat)
}
get_coefficients <- function(){
dat <- get_simulated_data()
lassocv <- glmnet::cv.glmnet(dat$X, dat$Y, alpha = 1)
modelcv <- glmnet::glmnet(dat$X, dat$Y, alpha = 1, lambda = lassocv$lambda.min, standardize = TRUE)
return(modelcv$beta)
}
out <- do.call(cbind, lapply(1:100, function(i) get_coefficients()))
rowMeans(out[c("V1", "V6", "V12", "V15", "V20"),] == 0)
V1 V6 V12 V15 V20
0.93 0.00 0.00 0.00 0.00
Upvotes: 1