MYaseen208
MYaseen208

Reputation: 23898

Appeding summary row of total for each factor level

Required Output

This is the required output (numbers may be different).

 City      Res       Pop  Pop1

  Total          4503739  4455
  State    Urban 3003948  2966
  State    Rural 1499791  1489

  Total          1000915   986
  A        Urban  500414   493
  A        Rural  500501   494

  Total           999938   1009
  B        Urban  499922   497
  B        Rural  500016   512

  Total          1000912  976
  C       Urban  501638   493
  C       Rural  499274   483

R code is

City <- rep(LETTERS[1:3], each = 2) 
Res  <- factor(rep(c("Urban", "Rural"), times = length(City)/2))
set.seed(12345)
Pop  <- rpois(n = length(City), lambda = 500000)
Pop1 <- rpois(n = length(City), lambda = 500)
df   <- data.frame(City, Res, Pop, Pop1)
df

library(tidyverse)

df %>% 
  group_by(Res) %>%
  summarise_if(is.numeric, sum, na.rm = TRUE) %>%
  mutate(City = "State") %>% 
  bind_rows(df) %>% 
  select(City, everything()) %>% 
  ungroup(Res) %>% 
  group_by(., City) %>%
  bind_rows(
        group_by(., Res) %>%
        summarise(Pop = sum(Pop), Pop1 = sum(Pop1)),
        . ) %>% 
  select(City, everything())


# A tibble: 10 x 4
   City  Res       Pop  Pop1
   <chr> <fct>   <int> <int>
 1 NA    Rural 2999582  2978
 2 NA    Urban 3003948  2966
 3 State Rural 1499791  1489
 4 State Urban 1501974  1483
 5 A     Urban  500414   493
 6 A     Rural  500501   494
 7 B     Urban  499922   497
 8 B     Rural  500016   512
 9 C     Urban  501638   493
10 C     Rural  499274   483

I wonder how to get the required output more efficiently. Thanks

sessionInfo

sessionInfo()
R version 3.6.0 (2019-04-26)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.2 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] forcats_0.4.0        stringr_1.4.0        dplyr_0.8.1         
[4] purrr_0.3.2          readr_1.3.1          tidyr_0.8.3.9000    
[7] tibble_2.1.1         ggplot2_3.1.1        tidyverse_1.2.1.9000

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.1       cellranger_1.1.0 pillar_1.4.0     compiler_3.6.0  
 [5] dbplyr_1.4.0     plyr_1.8.4       tools_3.6.0      zeallot_0.1.0   
 [9] lubridate_1.7.4  jsonlite_1.6     nlme_3.1-140     gtable_0.3.0    
[13] lattice_0.20-38  pkgconfig_2.0.2  rlang_0.3.4.9003 reprex_0.3.0    
[17] cli_1.1.0        DBI_1.0.0        rstudioapi_0.10  haven_2.1.0     
[21] withr_2.1.2      xml2_1.2.0.9000  httr_1.4.0       fs_1.3.1        
[25] generics_0.0.2   vctrs_0.1.0.9003 hms_0.4.2        grid_3.6.0      
[29] tidyselect_0.2.5 glue_1.3.1       R6_2.4.0         fansi_0.4.0     
[33] readxl_1.3.1     modelr_0.1.4     magrittr_1.5     backports_1.1.4 
[37] scales_1.0.0     rvest_0.3.4      assertthat_0.2.1 colorspace_1.4-1
[41] utf8_1.1.4       stringi_1.4.3    lazyeval_0.2.2   munsell_0.5.0   
[45] broom_0.5.2      crayon_1.3.4 

Upvotes: 2

Views: 65

Answers (3)

A. Suliman
A. Suliman

Reputation: 13125

Here is an option based on purrr::map_df and split. We can split df using City then loop through each City: bind at the top of each City a row Total which sum Pop and Pop1

library(dplyr)
library(purrr)
df %>%
  group_by(Res) %>%
  summarise_if(is.numeric, sum, na.rm = TRUE) %>% 
  arrange(Res= factor(Res, levels=c('Urban','Rural'))) %>%
  mutate(City = "State") %>% 
  bind_rows(df) %>% 
  mutate(City=factor(City, levels = c('State','A','B','C'))) %>% 
  split(.$City) %>% 
  map_df(., ~bind_rows(summarise_if(.x,is.numeric, sum) %>% mutate(City='Total', Res=''), 
                       .x %>% mutate_if(is.factor, as.character)) %>% 
  select(City, Res, Pop, Pop1))


# A tibble: 12 x 4
   City  Res       Pop  Pop1
  <chr> <chr>   <int> <int>
1 Total ""    3001765  2972
2 State Urban 1501974  1483
3 State Rural 1499791  1489
4 Total ""    1000915   987
5 A     Urban  500414   493
6 A     Rural  500501   494
7 Total ""     999938  1009
8 B     Urban  499922   497
9 B     Rural  500016   512
10 Total ""    1000912   976
11 C     Urban  501638   493
12 C     Rural  499274   483

Upvotes: 1

akrun
akrun

Reputation: 887118

Here is an option with add_row

library(tidyverse)
df %>% 
  group_split(Res) %>%
   map_df(~ add_row(., City = "State", Res = first(.$Res), 
          Pop = sum(.$Pop), Pop1 = sum(.$Pop1)) %>% 
           add_row(., City = 'Total', Res = first(.$Res), 
          Pop = sum(.$Pop), Pop1 = sum(.$Pop1)))
# A tibble: 10 x 4
#   City  Res       Pop  Pop1
#   <fct> <fct>   <int> <int>
# 1 A     Rural  500501   494
# 2 B     Rural  500016   512
# 3 C     Rural  499274   483
# 4 State Rural 1499791  1489
# 5 Total Rural 2999582  2978
# 6 A     Urban  500414   493
# 7 B     Urban  499922   497
# 8 C     Urban  501638   493
# 9 State Urban 1501974  1483
#10 Total Urban 3003948  2966

Or another option is rollup from data.table

library(data.table)
f1 <- function(dat), rollup(dat, lapply(.SD, sum), by = "Res",
       .SDcols = Pop:Pop1)
setDT(df)
out1 <- rbind(f1(df)[-.N][, City := "State"], df)
rbind(f1(out1)[-.N][, City := "Total"], out1)

Upvotes: 1

Jon Spring
Jon Spring

Reputation: 66490

library(tidyverse)
df %>%
  select(Res, Pop, Pop1) %>%
  group_by(Res) %>%
  summarise_all(sum) %>%
  bind_rows(df) %>%
  mutate(City = fct_explicit_na(City, "State")) %>%  # from forcats: renames NA as factor
  select(City, Res, Pop, Pop1)

# A tibble: 8 x 4
  City  Res       Pop  Pop1
  <fct> <fct>   <int> <int>
1 State Rural 1499791  1489
2 State Urban 1501974  1483
3 A     Urban  500414   493
4 A     Rural  500501   494
5 B     Urban  499922   497
6 B     Rural  500016   512
7 C     Urban  501638   493
8 C     Rural  499274   483

Upvotes: 1

Related Questions