user3022875
user3022875

Reputation: 9018

create summary table after dplyr action

I'd like to add a couple columns to the d2 dataframe that are sourced from the "result" vector in the "d" data frame. Is there an easy way to do this?

Here is the "d" data frame. Notice the result column is the mean of each GROUP & FLAG combination. i.e. 3 is the mean of group A when flag = 0

    d = data.frame(x=c(seq(1,5,1),seq(11,15,1),100,1000),group= c(rep("A",5),rep("B",5),"A","B")) 
    d = d%>%
      group_by(group)  %>%  
      mutate(    
                   U=quantile(x, 0.75) + 1.5*IQR(x),
                   L=quantile(x, 0.25) - 1.5*IQR(x),
                   flag = ifelse(x>U | x<L,1,0),
                    mu = mean(x)
                   ) %>%  
  group_by(group, flag) %>%
  mutate(result = mean(x))

    as.data.frame(d)
      x group    U    L flag        mu result
1     1     A  8.5 -1.5    0  19.16667      3
2     2     A  8.5 -1.5    0  19.16667      3
3     3     A  8.5 -1.5    0  19.16667      3
4     4     A  8.5 -1.5    0  19.16667      3
5     5     A  8.5 -1.5    0  19.16667      3
6    11     B 18.5  8.5    0 177.50000     13
7    12     B 18.5  8.5    0 177.50000     13
8    13     B 18.5  8.5    0 177.50000     13
9    14     B 18.5  8.5    0 177.50000     13
10   15     B 18.5  8.5    0 177.50000     13
11  100     A  8.5 -1.5    1  19.16667    100
12 1000     B 18.5  8.5    1 177.50000   1000

Now I'd like to have a summary table where I show the grand group mean which is already present in the "mu" column but I'd like to add 2 more columns "mu_1" and "mu_0" which I manually added below. Is there an efficient way to do this?

Thank you.

    d2 = d %>% group_by(group) %>% 
         summarise(U = mean(U),
                   L = mean (L),
                   mu= mean(mu)
                   )
         as.data.frame(d2)



 group    U    L        mu   mu_1    mu_0
1     A  8.5 -1.5  19.16667   100     3
2     B 18.5  8.5 177.50000   1000    13

Upvotes: 1

Views: 474

Answers (1)

Steven Beaupr&#233;
Steven Beaupr&#233;

Reputation: 21621

You could do:

d %>%
  group_by(group) %>%
  summarise(U = mean(U), L = mean(L), mu = mean(mu), 
            mu_1 = mean(result[flag == 1]), 
            mu_0 = mean(result[flag == 0]))

Which gives:

## A tibble: 2 x 6
#   group     U     L        mu  mu_1  mu_0
#  <fctr> <dbl> <dbl>     <dbl> <dbl> <dbl>
#1      A   8.5  -1.5  19.16667   100     3
#2      B  18.5   8.5 177.50000  1000    13

Or you could perform two different summaries (one grouped by group and flag, the other by group only) and left_join() them together:

library(dplyr)
library(tidyr)
d %>%
  group_by(group, flag) %>%
  summarise(mean = mean(result)) %>%
  spread(flag, mean, sep = "-mu") %>%
  left_join(d %>% 
              group_by(group) %>%
              summarise_each(funs(mean), U, L, mu), .)

Which gives:

## A tibble: 2 x 6
#   group     U     L        mu flag-mu0 flag-mu1
#  <fctr> <dbl> <dbl>     <dbl>    <dbl>    <dbl>
#1      A   8.5  -1.5  19.16667        3      100
#2      B  18.5   8.5 177.50000       13     1000

Upvotes: 3

Related Questions