How to get R^2 list when doing regression for each group separately in R

Question

I try perform regression analysis by group separately.

df=structure(list(shop = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L), .Label = c("a", "c"), class = "factor"), art = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("b", "d"), class = "factor"), 
    Y = c(177L, 122L, 175L, 140L, 201L, 202L, 279L, 253L, 236L, 
    137L, 166L, 241L, 195L, 221L, 238L, 203L, 254L, 219L, 101L, 
    157L, 188L, 219L, 267L, 126L, 291L, 239L, 230L), x1 = c(1L, 
    0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 
    0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L), x2 = c(0L, 1L, 
    1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 
    1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L), x3 = c(0L, 0L, 0L, 
    1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 
    1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L), x4 = c(0L, 0L, 1L, 1L, 
    0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 
    0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L), x5 = c(0L, 0L, 1L, 1L, 0L, 
    0L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 
    1L, 0L, 0L, 1L, 1L, 1L, 0L), x6 = c(0L, 1L, 0L, 0L, 1L, 1L, 
    0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 
    1L, 1L, 1L, 1L, 0L, 1L), x7 = c(1L, 1L, 0L, 0L, 1L, 0L, 0L, 
    0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 
    0L, 1L, 1L, 1L, 0L), x8 = c(0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 
    1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 
    0L, 1L, 0L, 1L), x9 = c(1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 
    0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 
    1L, 1L, 0L)), .Names = c("shop", "art", "Y", "x1", "x2", 
"x3", "x4", "x5", "x6", "x7", "x8", "x9"), class = "data.frame", row.names = c(NA, 
-27L))

Y-is metric depended var and X's are metric independed vars. art+shop are group variable , for each category separately regression

I do so

library(tidyverse)
library(broom)
my_lm <- function(df) {
  lm(Y ~ ., data = df)
}
df %>% 
  group_by(art, shop) %>% 
  nest() %>%
  mutate(fit = map(data, my_lm),
         tidy = map(fit, tidy)) %>%
  select(-fit, - data) %>%
  unnest()

and i get result by group but without R^2

#output
  art    shop   term        estimate std.error statistic p.value
                            
 1 b      a      (Intercept)    31.0      269      0.115   0.927 
 2 b      a      x1            109        153      0.714   0.605 
 3 b      a      x2           - 23.0      223     -0.103   0.934 
 4 b      a      x3           - 15.0      185     -0.0810  0.949 
 5 b      a      x4             31.0      333      0.0931  0.941 
 6 b      a      x5             81.0      457      0.177   0.888 
 7 b      a      x6             77.0      162      0.475   0.718 
 8 b      a      x7           - 17.0      310     -0.0548  0.965 
 9 b      a      x8           - 15.0      214     -0.0700  0.956 
10 b      a      x9             54.0      349      0.155   0.902 
11 d      c      (Intercept)   199         98.8    2.01    0.0907
12 d      c      x1           - 15.7       60.8   -0.259   0.804 
13 d      c      x2              5.98      48.8    0.123   0.906 
14 d      c      x3              7.34      57.8    0.127   0.903 
15 d      c      x4           - 20.1       53.8   -0.373   0.722 
16 d      c      x5           - 43.2       41.8   -1.03    0.342 
17 d      c      x6              1.93      34.5    0.0560  0.957 
18 d      c      x7             31.9       40.5    0.787   0.461 
19 d      c      x8             36.0       45.9    0.786   0.462 
20 d      c      x9             10.7       49.7    0.215   0.837

How to this output add the R^2 that desired output would be

    shop art        term     estimate   std.error    statistic
 1:    a   b (Intercept)           31  268,646608  0,115393231
 2:    a   b          x1          109 152,7350647  0,713654066
 3:    a   b          x2          -23 222,6477038 -0,103302211
 4:    a   b          x3          -15 185,1026742 -0,081036106
 5:    a   b          x4           31 332,8783562  0,093127112
 6:    a   b          x5           81 457,4090073  0,177084401
 7:    a   b          x6           77         162  0,475308642
 8:    a   b          x7          -17 310,2063829 -0,054802225
 9:    a   b          x8          -15 214,3058562 -0,069993421
10:    a   b          x9           54 348,9168955  0,154764647
11:    c   d (Intercept)  198,9136739 98,78333406  2,013635962
12:    c   d          x1  -15,7423706 60,77075951 -0,259045151
13:    c   d          x2  5,983181588 48,82713728  0,122538038
14:    c   d          x3  7,343913337 57,76809925  0,127127488
15:    c   d          x4 -20,06162536  53,8052343 -0,372856389
16:    c   d          x5 -43,15292531 41,84669556 -1,031214645
17:    c   d          x6  1,929902209 34,46111208  0,056002319
18:    c   d          x7  31,86102681 40,48793664  0,786926414
19:    c   d          x8  36,04569213 45,87094642  0,785806593
20:    c   d          x9  10,66460125 49,66736055  0,214720515
        p.value R-square
 1: 0,926861869        1
 2: 0,605403261        1
 3: 0,934468215        1
 4: 0,948523296        1
 5: 0,940883943        1
 6: 0,888421301        1
 7: 0,717531724        1
 8: 0,965146684        1
 9: 0,955513357        1
10: 0,902249287        1
11: 0,090696495      0,9
12: 0,804258667      0,9
13: 0,906474295      0,9
14: 0,902992982      0,9
15: 0,722072243      0,9
16:  0,34220381      0,9
17: 0,957158278      0,9
18: 0,461267807      0,9
19: 0,461875906      0,9
20: 0,837097183      0,9

So each category art+shop will get R^2. Any of your help is very valuable to me

Mossa · Accepted Answer

You want broom::glance for this:

df %>% 
  group_by(art, shop) %>% 
  group_modify(~ {
    fit <- my_lm(.x)
    # tidy(fit)
    # glance(fit)
    full_join(tidy(fit), glance(fit), by = character())
  }) %>% 
  ungroup()

Here I use a different approach, as what you're asking can be accomplished in a much more concise way. Not that the nest-strategy is bad. Note that the by = character() in full_join indicates that it should be a Cartesian product.

How to get R^2 list when doing regression for each group separately in R

Answers (1)

Related Questions