How to loop regressions and get the original names of the variables?

Question

Let's assume I have the following dataframe:

df = structure(list(X1 = c(-1.18944760538218, -1.29854014882472, 0.583311783714593, 
0.650871260739756, -0.000391966126701203, 1.23053193924217, 1.70030189473426, 
-0.576054488536938, 0.0291802126175275, 1.43019460222912, -1.0301194391522, 
0.672348388347025, -0.173407147880241, -1.5368071194984, 0.676906374446062, 
0.245381059292165, 0.192944401742425, -1.22846757821128, 0.761438857461385, 
-1.49010138930763), X2 = c(-1.18944760538218, -1.29854014882472, 
0.583311783714593, 0.650871260739756, -0.000391966126701203, 
1.23053193924217, 1.70030189473426, -0.576054488536938, 0.0291802126175275, 
1.43019460222912, -1.0301194391522, 0.672348388347025, -0.173407147880241, 
-1.5368071194984, 0.676906374446062, 0.245381059292165, 0.192944401742425, 
-1.22846757821128, 0.761438857461385, -1.49010138930763), X3 = c(-1.18944760538218, 
-1.29854014882472, 0.583311783714593, 0.650871260739756, -0.000391966126701203, 
1.23053193924217, 1.70030189473426, -0.576054488536938, 0.0291802126175275, 
1.43019460222912, -1.0301194391522, 0.672348388347025, -0.173407147880241, 
-1.5368071194984, 0.676906374446062, 0.245381059292165, 0.192944401742425, 
-1.22846757821128, 0.761438857461385, -1.49010138930763), X4 = c(-1.18944760538218, 
-1.29854014882472, 0.583311783714593, 0.650871260739756, -0.000391966126701203, 
1.23053193924217, 1.70030189473426, -0.576054488536938, 0.0291802126175275, 
1.43019460222912, -1.0301194391522, 0.672348388347025, -0.173407147880241, 
-1.5368071194984, 0.676906374446062, 0.245381059292165, 0.192944401742425, 
-1.22846757821128, 0.761438857461385, -1.49010138930763), X5 = c(-1.18944760538218, 
-1.29854014882472, 0.583311783714593, 0.650871260739756, -0.000391966126701203, 
1.23053193924217, 1.70030189473426, -0.576054488536938, 0.0291802126175275, 
1.43019460222912, -1.0301194391522, 0.672348388347025, -0.173407147880241, 
-1.5368071194984, 0.676906374446062, 0.245381059292165, 0.192944401742425, 
-1.22846757821128, 0.761438857461385, -1.49010138930763), X6 = c(-1.18944760538218, 
-1.29854014882472, 0.583311783714593, 0.650871260739756, -0.000391966126701203, 
1.23053193924217, 1.70030189473426, -0.576054488536938, 0.0291802126175275, 
1.43019460222912, -1.0301194391522, 0.672348388347025, -0.173407147880241, 
-1.5368071194984, 0.676906374446062, 0.245381059292165, 0.192944401742425, 
-1.22846757821128, 0.761438857461385, -1.49010138930763), X7 = c(-1.18944760538218, 
-1.29854014882472, 0.583311783714593, 0.650871260739756, -0.000391966126701203, 
1.23053193924217, 1.70030189473426, -0.576054488536938, 0.0291802126175275, 
1.43019460222912, -1.0301194391522, 0.672348388347025, -0.173407147880241, 
-1.5368071194984, 0.676906374446062, 0.245381059292165, 0.192944401742425, 
-1.22846757821128, 0.761438857461385, -1.49010138930763), X8 = c(-1.18944760538218, 
-1.29854014882472, 0.583311783714593, 0.650871260739756, -0.000391966126701203, 
1.23053193924217, 1.70030189473426, -0.576054488536938, 0.0291802126175275, 
1.43019460222912, -1.0301194391522, 0.672348388347025, -0.173407147880241, 
-1.5368071194984, 0.676906374446062, 0.245381059292165, 0.192944401742425, 
-1.22846757821128, 0.761438857461385, -1.49010138930763)), class = "data.frame", row.names = c(NA, 
-20L))

              X1            X2            X3            X4            X5
1  -1.1894476054 -1.1894476054 -1.1894476054 -1.1894476054 -1.1894476054
2  -1.2985401488 -1.2985401488 -1.2985401488 -1.2985401488 -1.2985401488
3   0.5833117837  0.5833117837  0.5833117837  0.5833117837  0.5833117837
4   0.6508712607  0.6508712607  0.6508712607  0.6508712607  0.6508712607
5  -0.0003919661 -0.0003919661 -0.0003919661 -0.0003919661 -0.0003919661
6   1.2305319392  1.2305319392  1.2305319392  1.2305319392  1.2305319392
7   1.7003018947  1.7003018947  1.7003018947  1.7003018947  1.7003018947
8  -0.5760544885 -0.5760544885 -0.5760544885 -0.5760544885 -0.5760544885
9   0.0291802126  0.0291802126  0.0291802126  0.0291802126  0.0291802126
10  1.4301946022  1.4301946022  1.4301946022  1.4301946022  1.4301946022
11 -1.0301194392 -1.0301194392 -1.0301194392 -1.0301194392 -1.0301194392
12  0.6723483883  0.6723483883  0.6723483883  0.6723483883  0.6723483883
13 -0.1734071479 -0.1734071479 -0.1734071479 -0.1734071479 -0.1734071479
14 -1.5368071195 -1.5368071195 -1.5368071195 -1.5368071195 -1.5368071195
15  0.6769063744  0.6769063744  0.6769063744  0.6769063744  0.6769063744
16  0.2453810593  0.2453810593  0.2453810593  0.2453810593  0.2453810593
17  0.1929444017  0.1929444017  0.1929444017  0.1929444017  0.1929444017
18 -1.2284675782 -1.2284675782 -1.2284675782 -1.2284675782 -1.2284675782
19  0.7614388575  0.7614388575  0.7614388575  0.7614388575  0.7614388575
20 -1.4901013893 -1.4901013893 -1.4901013893 -1.4901013893 -1.4901013893
              X6            X7            X8
1  -1.1894476054 -1.1894476054 -1.1894476054
2  -1.2985401488 -1.2985401488 -1.2985401488
3   0.5833117837  0.5833117837  0.5833117837
4   0.6508712607  0.6508712607  0.6508712607
5  -0.0003919661 -0.0003919661 -0.0003919661
6   1.2305319392  1.2305319392  1.2305319392
7   1.7003018947  1.7003018947  1.7003018947
8  -0.5760544885 -0.5760544885 -0.5760544885
9   0.0291802126  0.0291802126  0.0291802126
10  1.4301946022  1.4301946022  1.4301946022
11 -1.0301194392 -1.0301194392 -1.0301194392
12  0.6723483883  0.6723483883  0.6723483883
13 -0.1734071479 -0.1734071479 -0.1734071479
14 -1.5368071195 -1.5368071195 -1.5368071195
15  0.6769063744  0.6769063744  0.6769063744
16  0.2453810593  0.2453810593  0.2453810593
17  0.1929444017  0.1929444017  0.1929444017
18 -1.2284675782 -1.2284675782 -1.2284675782
19  0.7614388575  0.7614388575  0.7614388575
20 -1.4901013893 -1.4901013893 -1.4901013893

I want to set up a series of regressions, say the first column on all the other ones, one at a time. To do so I write the following loop:

for (i in df[,-c(1)]) {

model <- lm(df$X1 ~ i)
print(summary(model))

}

The problem is that the ouput of this loop (in the summary table) reads off "i" for every regressor. Instead, I would like to have the original name of the variable in the regression table.

Call:
lm(formula = df$X1 ~ i) # instead of i, the original name of each variable

Residuals:
       Min         1Q     Median         3Q        Max 
-2.734e-16 -4.960e-17 -4.252e-17  5.340e-18  5.756e-16 

Coefficients:
              Estimate Std. Error    t value Pr(>|t|)    
(Intercept) -3.103e-18  3.693e-17 -8.400e-02    0.934    
i            1.000e+00  3.748e-17  2.668e+16   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.651e-16 on 18 degrees of freedom
Multiple R-squared:      1, Adjusted R-squared:      1 
F-statistic: 7.118e+32 on 1 and 18 DF,  p-value: < 2.2e-16

Can anyone help me fix this?

Thanks!

MrFlick · Accepted Answer

If you really need to get it into the Call part of the output, you need to build the expression with all the symbols you want then evaluate it. The code looks a bit messier but something like

for (i in names(df)[-1]) {
  model <- do.call("lm", list(reformulate(i, "X1"), quote(df)))
  print(summary(model))
}

How to loop regressions and get the original names of the variables?

Answers (1)

Related Questions