Reputation: 1711
I want to make a loop for the linear model but get a problem.
First, I write a loop(can not run) that extracts the beta value I wanted.
y <- c('y1', 'y2')
x1 <- c('a1', 'a2', 'a3')
x2 <- c('A', 'B')
for (y in y) {
for (x1 in x1) {
for (x2 in x2) {
m <- lm(as.name(y) ~ as.name(x1) + as.name(x2), data = dat) %>% summary() %>% .$coefficients %>% .[2,1]
}
}
}
The combination of y
, x1
, and x2
in the for loop is not my expectation. The formula in lm
model like this: expand.grid(y, x1, x2)
. And what I expected is all the combination of the same letter on the independent variable position. This is my original code:
m1 <- lm(y1 ~ a1 + A, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m2 <- lm(y1 ~ a2 + A, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m3 <- lm(y1 ~ a3 + A, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m4 <- lm(y2 ~ a1 + A, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m5 <- lm(y2 ~ a2 + A, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m6 <- lm(y2 ~ a3 + A, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m7 <- lm(y1 ~ b1 + B, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m8 <- lm(y1 ~ b2 + B, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m9 <- lm(y1 ~ b3 + B, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m10 <- lm(y2 ~ b1 + B, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m11 <- lm(y2 ~ b2 + B, dat) %>% summary() %>% .$coefficients %>% .[2,1]
m12 <- lm(y2 ~ b3 + B, dat) %>% summary() %>% .$coefficients %>% .[2,1]```
And this is my data.
Any help will be highly appreciated!
dat <- structure(list(y1 = c(0.838141931, 0.174850172, 0.116144283,
0.113778511, 0.494270733, 0.874482265, 0.325621743, 0.661045636,
0.452396096), y2 = c(0.487877797, 0.360726955, 0.380614137, 0.169760207,
0.359371965, 0.743837108, 0.156535906, 0.995989192, 0.331058618
), a1 = c(0.336537159, 0.446060609, 0.57586382, 0.09629329, 0.491634112,
0.988226873, 0.929105257, 0.605957031, 0.470720774), a2 = c(0.128615421,
0.831986313, 0.267777151, 0.313178319, 0.7776461, 0.863337292,
0.042818986, 0.830029959, 0.901586271), a3 = c(0.291053766, 0.546719865,
0.918797744, 0.976353885, 0.193777436, 0.953859399, 0.963312236,
0.191449484, 0.825034161), b1 = c(0.31510338, 0.5441007, 0.515466925,
0.030702511, 0.020932599, 0.334734486, 0.586588252, 0.562970761,
0.848337089), b2 = c(0.426787995, 0.350719803, 0.706471337, 0.346462166,
0.099766511, 0.219781154, 0.565047862, 0.50282167, 0.727813725
), b3 = c(0.799666435, 0.07225825, 0.409411895, 0.701122141,
0.529991257, 0.478439097, 0.79467065, 0.442156618, 0.026693511
), A = c(0.43143391, 0.662313075, 0.584967093, 0.866110621, 0.598682492,
0.14665666, 0.454315631, 0.448968611, 0.238969939), B = c(0.060625179,
0.410312393, 0.614411256, 0.127343899, 0.90370096, 0.882024428,
0.681389602, 0.56535592, 0.850829599)), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -9L), spec = structure(list(
cols = list(y1 = structure(list(), class = c("collector_double",
"collector")), y2 = structure(list(), class = c("collector_double",
"collector")), a1 = structure(list(), class = c("collector_double",
"collector")), a2 = structure(list(), class = c("collector_double",
"collector")), a3 = structure(list(), class = c("collector_double",
"collector")), b1 = structure(list(), class = c("collector_double",
"collector")), b2 = structure(list(), class = c("collector_double",
"collector")), b3 = structure(list(), class = c("collector_double",
"collector")), A = structure(list(), class = c("collector_double",
"collector")), B = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1), class = "col_spec"))
Upvotes: 0
Views: 96
Reputation: 46888
If you need: y ~ b[1-3] + B and y ~ a[1-3] + A and don't need y ~ a[1-3] + B and y ~ b[1-3] + A, then you can set up the data.frame like this:
library(dplyr)
res = rbind(
expand.grid(response=c('y1', 'y2'),pre1=c('a1', 'a2', 'a3'),pre2='A',stringsAsFactors = FALSE),
expand.grid(response=c('y1', 'y2'),pre1=c('b1', 'b2', 'b3'),pre2='B',stringsAsFactors = FALSE)
)
From parts of your code, it seems like you only need the 2nd coefficient, so a simple base R method will be to use reformulate
to construct the formula for every row:
res$coef = sapply(1:nrow(res),function(i){
predictor = as.character(res[i,c("pre1","pre2")])
response = res$response[i]
f = reformulate(predictor,response=response)
coefficients(lm(f,data=dat))[2]
})
head(res)
response pre1 pre2 coef
1 y1 a1 A -0.254942513
2 y2 a1 A -0.005955600
3 y1 a2 A -0.006177156
4 y2 a2 A 0.286669786
5 y1 a3 A -0.393284033
6 y2 a3 A -0.387500900
A tidy solution will be something like this, where we nest the models first
library(purrr)
library(broom)
library(tidyr)
res = rbind(
expand.grid(response=c('y1', 'y2'),pre1=c('a1', 'a2', 'a3'),pre2='A',stringsAsFactors = FALSE),
expand.grid(response=c('y1', 'y2'),pre1=c('b1', 'b2', 'b3'),pre2='B',stringsAsFactors = FALSE)
)
res = res %>%
mutate(model=1:n()) %>%
nest(param=c(c(response, pre1, pre2))) %>%
mutate(
fit = map(param,~
lm(reformulate(c(.$pre1,.$pre2),response=.$response),data=dat)),
tidied=map(fit,tidy)
)
The model and results are nested in a tibble:
res
# A tibble: 12 x 4
model param fit tidied
<int> <list> <list> <list>
1 1 <tibble [1 × 3]> <lm> <tibble [3 × 5]>
2 2 <tibble [1 × 3]> <lm> <tibble [3 × 5]>
3 3 <tibble [1 × 3]> <lm> <tibble [3 × 5]>
4 4 <tibble [1 × 3]> <lm> <tibble [3 × 5]>
And if you need the 2nd coefficient:
res %>% unnest(c(param,tidied)) %>% filter(term==pre1)
# A tibble: 12 x 10
model response pre1 pre2 fit term estimate std.error statistic p.value
<int> <chr> <chr> <chr> <list> <chr> <dbl> <dbl> <dbl> <dbl>
1 1 y1 a1 A <lm> a1 -0.255 0.390 -0.653 0.538
2 2 y2 a1 A <lm> a1 -0.00596 0.479 -0.0124 0.990
3 3 y1 a2 A <lm> a2 -0.00618 0.246 -0.0251 0.981
4 4 y2 a2 A <lm> a2 0.287 0.268 1.07 0.325
5 5 y1 a3 A <lm> a3 -0.393 0.176 -2.24 0.0664
6 6 y2 a3 A <lm> a3 -0.388 0.233 -1.66 0.148
7 7 y1 a1 B <lm> a1 0.507 0.541 0.937 0.385
8 8 y2 a1 B <lm> a1 0.372 0.510 0.729 0.493
9 9 y1 a2 B <lm> a2 0.182 0.391 0.466 0.657
10 10 y2 a2 B <lm> a2 0.436 0.319 1.37 0.221
11 11 y1 a3 B <lm> a3 -0.371 0.310 -1.19 0.277
12 12 y2 a3 B <lm> a3 -0.381 0.276 -1.38 0.217
Upvotes: 1
Reputation: 388797
Using expand.grid
you can create all combinations of y
, x1
and x2
. Create formula as strings using sprintf
.
df <- expand.grid(y, x1, x2)
formula_strings <- sprintf('%s ~ %s + %s', df$Var1, df$Var2, df$Var3)
formula_strings
# [1] "y1 ~ a1 + A" "y2 ~ a1 + A" "y1 ~ a2 + A" "y2 ~ a2 + A"
# [5] "y1 ~ a3 + A" "y2 ~ a3 + A" "y1 ~ a1 + B" "y2 ~ a1 + B"
# [9] "y1 ~ a2 + B" "y2 ~ a2 + B" "y1 ~ a3 + B" "y2 ~ a3 + B"
Using sapply
apply the model and extract coefficients from each.
values <- sapply(formula_strings, function(x)
lm(x, dat) %>% summary() %>% .$coefficients %>% .[2,1])
values
#y1 ~ a1 + A y2 ~ a1 + A y1 ~ a2 + A y2 ~ a2 + A y1 ~ a3 + A y2 ~ a3 + A
# -0.254943 -0.005956 -0.006177 0.286670 -0.393284 -0.387501
#y1 ~ a1 + B y2 ~ a1 + B y1 ~ a2 + B y2 ~ a2 + B y1 ~ a3 + B y2 ~ a3 + B
# 0.506848 0.371615 0.182370 0.435684 -0.370723 -0.380668
Upvotes: 1
Reputation: 1261
I'm not sure if I got you right, but it seems you've organized your data in the wrong way. Maybe this is what you might want to:
new_dat = data.frame(
y = c(rep(dat$y1, 3 + 3), rep(dat$y2, 3 + 3)),
x = c(rep(c(dat$a1, dat$a2, dat$a3), 2),
rep(c(dat$b1, dat$b2, dat$b3), 2)),
z = c(rep(dat$A, 3 + 3), rep(dat$B, 3 + 3))
)
models = with(new_dat, mapply(function(y, x, z) model = lm(y ~ x + z),
y, x, z, SIMPLIFY = FALSE))
Let us know if that's exactly what you're looking for. If not, elaborate on it...
Upvotes: 2