Stefano Potter
Stefano Potter

Reputation: 3577

Extract slope and r squared from grouped linear models using broom

I have a dataframe that I want to run linear models on by group, then use the broom package to extract the slope and r squared for each model. So far I am trying this:

library(tidyverse)
library(broom)

#read in the dataset
data(mtcars) 

#add a group variable
mtcars <- mtcars %>% as_tibble() %>% mutate(LC = 1)

#create a second group
mtcars2 <- mtcars 
mtcars2 <- mtcars2 %>% mutate(LC = 2)

#bind together
mtcars <- rbind(mtcars, mtcars2)

#groupby and run regressions
all_regress <-  mtcars %>% group_by(LC) %>%
  do(mod1 = lm(mpg ~ disp, data = .),
     mod2 = lm(mpg ~ wt, data = .))

#use broom the extract the slope and rsq per group
glance <-all_regress %>% mutate(tidy = map(mod1, broom::tidy),
                                   glance = map(mod1, broom::glance),
                                   augment = map(mod1, broom::augment),
                                   rsq = glance %>% map_dbl('r.squared'),
                                   slope = tidy %>% map_dbl(function(x) x$estimate[2])) 

but this fails with:

Error: Problem with `mutate()` input `tidy`.
x No tidy method for objects of class qr
ℹ Input `tidy` is `map(mod1, broom::tidy)`.
ℹ The error occurred in row 1.

If I do this without groups such as:

    #read in the dataset
    data(mtcars) 
    
    mtcars <- mtcars %>% as_tibble()
   
    #run regressions
    all_regress <-  mtcars %>%
      do(mod1 = lm(mpg ~ disp, data = .),
         mod2 = lm(mpg ~ wt, data = .))
    
    #use broom the extract the slope and rsq per group
    glance <- all_regress %>% mutate(tidy = map(mod1, broom::tidy),
                                       glance = map(mod1, broom::glance),
                                       augment = map(mod1, broom::augment),
                                       rsq = glance %>% map_dbl('r.squared'),
                                       slope = tidy %>% map_dbl(function(x) x$estimate[2])) 

there is no error.

Upvotes: 2

Views: 1550

Answers (2)

jpdugo17
jpdugo17

Reputation: 7116

I used this approach, its longer but i think theres more control in the individual steps. Finally i created a tibble with lists columns containing each model.

library(tidyverse)
library(broom)

#read in the dataset
data(mtcars) 

#add a group variable
mtcars <- mtcars %>% as_tibble() %>% dplyr::select(-c(vs, am, gear, carb, cyl)) %>% mutate(LC = 1)

#create a second group
mtcars2 <- mtcars 
mtcars2 <- mtcars2 %>% mutate(LC = 2)

#bind together
mtcars <- bind_rows(mtcars2, mtcars)

#group_split and run regressions
all_regress <-  mtcars %>% group_split(LC) %>% 

    
       map(~ list(mod1 = lm(mpg ~ disp, data = .),
       mod2 = lm(mpg ~ wt, data = .)))


# example <- all_regress[[2]][[1]] %>% glance()
#the list has 2 levels with 2 models each
data <- all_regress %>% 
    map(~
            map(.x, function(model){
                #column lists are needed because each function output different objects
                tibble(mod = list(model),
                       tidy = list(broom::tidy(model)),
                       glance = list(broom::glance(model)),
                       augment = list(broom::augment(model))) %>%
                    mutate(
                        rsq = list(glance[[1]]$r.squared),
                        slope = list(tidy[[1]]$estimate[2]))

                       
            } ))

data_final <- 
data %>% map2(unique(mtcars$LC), ~
                 map2(.x, .y, function(each_model, lc){
                     mutate(each_model, LC = lc)
                 }))

final_format <- #because of the list structure i need to bind the two datasets in each level and then bind them again.
map(data_final, ~reduce(.x, rbind)) %>% reduce(rbind)


#acces the data
final_format[1, 1][[1]]

Upvotes: 1

VitaminB16
VitaminB16

Reputation: 1234

I think simply adding ungroup() achieves what you need:

all_regress <-  mtcars %>% group_by(LC) %>%
  do(mod1 = lm(mpg ~ disp, data = .),
     mod2 = lm(mpg ~ wt, data = .)) %>% ungroup()

#use broom the extract the slope and rsq per group
glance <-all_regress %>% mutate(tidy = map(mod1, broom::tidy),
                                   glance = map(mod1, broom::glance),
                                   augment = map(mod1, broom::augment),
                                   rsq = glance %>% map_dbl('r.squared'),
                                   slope = tidy %>% map_dbl(function(x) x$estimate[2])) 

Upvotes: 2

Related Questions