Reputation: 1040
I have data such as this. I would like to run a linear regression on multiple variables separately without having to copy paste the regression.
dat <- read_table2("condition school Q2_1 Q2_2 Q2_4 Q2_8 Q2_10 Q2_11 Q2_14 Q2_15
1 A 3 4 3 2 2 4 4 2
0 B 3 3 3 2 1 2 2 1
1 C 4 4 4 3 3 4 3 3
0 D 3 4 3 3 2 2 4 2
1 A 2 4 2 3 3 3 3 3
0 B 2 4 4 2 3 2 2 3
1 C 3 3 3 2 3 3 2 3
0 D 4 4 3 3 3 2 2 3
1 A 3 3 3 3 2 3 3 2
0 B 3 3 3 2 3 3 4 1
1 C 4 4 4 4 3 3 3 3
0 D 3 3 4 3 3 4 4 2
1 A 2 2 2 2 2 2 2 3
0 B 3 3 3 2 2 2 2 3
1 C 3 4 3 1 2 3 3 3
0 D 3 4 2 2 3 4 3 3
1 A 4 4 4 3 3 4 3 2
0 B 4 2 3 2 3 2 2 1
1 C 4 3 3 4 3 4 3 3
0 D 3 3 3 2 2 2 3 2
1 A 4 2 3 3 2 2 2 3
")
I could run this separately and then pull out the coefficients.
model1 <- lmer(Q2_1~ (1|school) + condition ,data=regression_data , REML = F)
model2 <- lmer(Q2_2~ (1|school) + condition ,data=regression_data , REML = F)
model3 <- lmer(Q2_4~ (1|school) + condition ,data=regression_data , REML = F)
summary(model1)$coefficient
summary(model2)$coefficient
summary(model3)$coefficient
But I rather do this in one chunk of code. I adjusted some code online to come up with this:
# outcome
out_start=3
out_end= 10
out_nvar=out_end-out_start+1
out_variable=rep(NA, out_nvar)
out_beta=rep(NA, out_nvar)
out_se = rep(NA, out_nvar)
out_pvalue=rep(NA, out_nvar)
# exposure
exp_start=1
exp_end=2
exp_nvar=exp_end-exp_start+1
exp_variable=rep(NA, exp_nvar)
exp_beta=rep(NA, exp_nvar)
exp_se = rep(NA, out_nvar)
exp_pvalue=rep(NA, exp_nvar)
number=1
library(lme4)
for (i in out_start:out_end){
outcome = colnames(dat)[i]
for (j in exp_start:exp_end){
exposure = colnames(dat)[j]
model <- lmer(get(outcome) ~ get(exposure) + (1|school) + condition,
data=dat , REML = F, na.action = na.exclude)
Vcov <- vcov(model, useScale = FALSE)
beta <- fixef(model)
se <- sqrt(diag(Vcov))
zval <- beta / se
pval <- 2 * pnorm(abs(zval), lower.tail = FALSE)
out_beta[number] = as.numeric(beta[2])
out_se[number] = as.numeric(se[2])
out_pvalue[number] = as.numeric(pval[2])
out_variable[number] = outcome
number = number + 1
exp_beta[number] = as.numeric(beta[2])
exp_se[number] = as.numeric(se[2])
exp_pvalue[number] = as.numeric(pval[2])
exp_variable[number] = exposure
number = number + 1
}
}
outcome = data.frame(out_variable, out_beta, out_se, out_pvalue)
exposure = data.frame(exp_variable, exp_beta, exp_se, exp_pvalue)
library(tidyverse)
outcome = outcome %>%
dplyr::rename(
variable = out_variable,
beta = out_beta,
se = out_se,
pvalue = out_pvalue,
)
exposure = exposure %>%
dplyr::rename(
variable = exp_variable,
beta = exp_beta,
se = exp_se,
pvalue = exp_pvalue,
)
all = rbind(outcome, exposure)
all = na.omit(all)
This does not give me quite what I want -- it gives me the impact of being in the condition group for each dependent variable (question) but doe not show me the value of the intercept.
For example, for Q2_1 I expect to see:
model_lmer <- lmer(Q2_1~(1|school) + condition ,data=dat , REML = F)
summary(model_lmer)$coefficient
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 3.1000000 0.2079585 21 14.9068177 1.213221e-12
condition 0.1727273 0.2873360 21 0.6011334 5.541851e-01
Instead I see:
var beta se p value
1 Q2_1 1.727273e-01 0.2873360 0.54775114
3 Q2_1 -2.559690e-15 0.3737413 1.00000000
Any suggestions are appreciated!
Upvotes: 0
Views: 856
Reputation: 39585
You can try this base R
solution. I hope this helps:
library(lme4)
#Apply
models <- function(x)
{
model1 <- lmer(x~ (1|school) + condition ,data=dat , REML = F)
return(model1)
}
y <- apply(dat[,-c(1,2)],2,models)
#Extract results
extract <- function(x)
{
z <- as.data.frame(summary(x)$coefficient)
z$id <- rownames(z)
z <- z[,c(dim(z)[2],1:(dim(z)[2]-1))]
rownames(z)<-NULL
return(z)
}
#Extract summary with function
DF <- as.data.frame(do.call(rbind,lapply(y,extract)))
#Format variables
DF$var <- gsub("\\..*","",rownames(DF))
#Arrange columns
DF <- DF[,c(dim(DF)[2],1:(dim(DF)[2]-1))]
rownames(DF)<-NULL
var id Estimate Std. Error t value
1 Q2_1 (Intercept) 3.100000000 0.2079585 14.90681768
2 Q2_1 condition 0.172727273 0.2873360 0.60113340
3 Q2_2 (Intercept) 3.300000000 0.2251503 14.65687832
4 Q2_2 condition 0.063636364 0.3110898 0.20455947
5 Q2_4 (Intercept) 3.100000000 0.1928371 16.07574475
6 Q2_4 condition -0.009090909 0.2664427 -0.03411956
7 Q2_8 (Intercept) 2.300000000 0.2212714 10.39447415
8 Q2_8 condition 0.427272727 0.3057304 1.39754743
9 Q2_10 (Intercept) 2.500000000 0.1855144 13.47604443
10 Q2_10 condition 0.045454545 0.2563249 0.17733173
11 Q2_11 (Intercept) 2.500000000 0.2404001 10.39933014
12 Q2_11 condition 0.681818182 0.3321605 2.05267707
13 Q2_14 (Intercept) 2.800000000 0.2313147 12.10472106
14 Q2_14 condition 0.018181818 0.3196072 0.05688801
15 Q2_15 (Intercept) 2.100000000 0.2079585 10.09816681
16 Q2_15 condition 0.627272727 0.2873360 2.18306339
Upvotes: 1