Luis
Luis

Reputation: 1584

Dplyr computation when cross tables have more than two groups

Dplyr is really great to work with datasets and I think I can replace all data manipulation that I always did in Excel by using dplyr commands. However, it turns difficult for computing subtotals and percentage when dealing with several subgroups and I didn't find any related code in my recent search on this subject here.

Please check the following table. Everything works fine, but after spending more than a day, I'm still confused about how to perform the last calculation: totals for each country, cH, EC, GT, MX and the USA.

Subtotals

I'll be glad if some of the experts we just find here could help me with this process.

That's the code and the data structure if you want to reproduce the entire environment.

Thank you.

library(tidyverse)
data %>% group_by(Q7_MoEd, add=TRUE) %>% 
  mutate(educ_levels_count = n()) %>% 

  #how many countries we are dealing with
  group_by(Country, add=TRUE) %>% 
  mutate(country_count = n()) %>% 

  #lets add how many education levels we have
  group_by(Q7_MoEd, Country, educ_levels_count, country_count, add=TRUE) %>% 
  summarise(
    educ_count = n(), 
    educ_percentage = round(educ_count/first(educ_levels_count),2), 
    #country_percentage = educ_count/first(country_count), 
  ) %>% 
  mutate(n_percentage = paste0(educ_count," (",educ_percentage,"%)")) %>% #merge variables
  select(Q7_MoEd, Country, n_percentage, educ_levels_count) %>% 
  spread(Country, n_percentage, fill="-") %>% 
  janitor::adorn_totals("row")



data <- structure(list(Q7_MoEd = structure(c(6L, 3L, 3L, 5L, 5L, 5L, 
                                             5L, 5L, 5L, 5L, 5L, 3L, 5L, 6L, 5L, 5L, 5L, 6L, 5L, 5L, 5L, 2L, 
                                             6L, 6L, 6L, 5L, 5L, 5L, 5L, 6L, 5L, 6L, 2L, 6L, 6L, 6L, 6L, 5L, 
                                             4L, 5L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 6L, 1L, 5L, 5L, 6L, 5L, 6L, 
                                             5L, 6L, 6L, 6L, 5L, 6L, 6L, 5L, 3L, 5L, 5L, 5L, 6L, 5L, 5L, 6L, 
                                             5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 5L, 5L, 4L, 5L, 3L, 5L, 2L, 6L, 
                                             5L, 5L, 3L, 3L, 6L, 5L, 2L, 5L, 5L, 6L, 5L, 6L, 4L, 6L, 5L, 3L, 
                                             5L, 6L, 5L, 6L, 6L, 5L, 3L, 5L, 5L, 4L, 5L, 6L, 5L, 2L, 5L, 6L, 
                                             5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 3L, 5L, 6L, 5L, 5L, 1L, 5L, 6L, 
                                             5L, 5L, 5L, 6L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 4L, 6L, 5L, 6L, 5L, 
                                             5L, 5L, 6L, 2L, 5L, 3L, 6L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
                                             5L, 5L, 5L, 5L, 5L, 5L, 3L, 5L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 5L, 
                                             5L, 3L, 5L, 5L, 6L, 5L, 5L, 5L, 6L, 5L, 6L, 5L, 3L, 5L, 4L, 5L, 
                                             5L, 3L, 6L, 6L, 3L, 4L, 4L, 2L, 3L, 5L, 5L, 3L, 5L, 3L, 5L, 6L, 
                                             5L, 5L, 5L, 5L, 6L, 6L, 4L, 5L, 5L, 5L, 3L, 5L, 5L, 5L, 3L, 5L, 
                                             3L, 6L, 5L, 3L, 5L, 3L, 5L, 5L, 5L, 5L, 3L, 5L, 2L, 5L, 5L, 3L, 
                                             3L, 2L, 3L, 5L, 5L, 3L, 3L, 5L, 1L, 5L, 4L, 5L, 3L, 3L, 5L, 5L, 
                                             3L, 5L, 5L, 3L, 5L, 5L, 6L, 3L, 5L, 3L, 5L, 5L, 3L, 3L, 5L, 4L, 
                                             3L, 5L, 6L, 3L, 5L, 6L, 3L, 5L, 1L, 5L, 5L, 5L, 6L, 5L, 3L, 2L, 
                                             2L, 2L, 2L, 4L, 2L, 2L, 2L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 5L, 
                                             2L, 5L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 3L, 2L, 
                                             5L, 2L, 4L, 2L, 3L, 4L, 3L, 3L, 4L, 4L, 3L, 2L, 1L, 1L, 3L, 3L, 
                                             3L, 3L, 2L, 2L, 3L, 2L, 3L, 5L, 3L, 5L, 4L, 1L, 5L, 2L, 2L, 1L, 
                                             1L, 1L, 5L, 5L, 3L, 2L, 1L, 5L, 3L, 5L, 6L, 5L, 4L, 5L, 6L, 5L, 
                                             3L, 5L, 3L, 5L, 3L, 1L, 3L, 4L, 2L, 6L, 5L, 5L, 6L, 6L, 5L, 2L, 
                                             4L, 4L, 6L, 5L, 6L, 4L, 6L, 3L, 3L, 4L, 1L, 6L, 6L, 1L, 2L, 2L, 
                                             1L, 1L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 6L, 1L, 2L, 1L, 6L, 
                                             1L, 1L, 5L, 1L, 3L, 3L, 1L, 3L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 
                                             2L, 1L, 1L, 2L, 6L, 4L, 2L, 4L, 4L, 2L, 4L, 2L, 4L, 4L, 6L, 5L, 
                                             2L, 5L, 4L, 6L, 4L, 6L, 4L, 4L, 4L, 4L, 3L, 2L, 5L, 4L, 2L, 6L, 
                                             6L, 2L, 4L, 2L, 6L, 2L, 5L, 4L, 3L, 2L, 4L, 4L, 4L, 5L, 4L, 4L, 
                                             4L, 4L, 6L, 5L, 2L, 5L, 6L, 2L, 5L, 5L, 5L, 2L, 2L, 5L, 4L, 6L, 
                                             4L, 2L, 2L, 4L, 2L, 4L, 2L, 2L, 1L, 4L, 4L, 4L, 5L, 5L, 2L, 4L, 
                                             2L, 4L, 2L, 1L, 4L, 5L, 5L, 2L, 4L, 4L, 4L, 3L, 4L, 2L, 4L, 2L, 
                                             2L, 6L, 1L, 2L, 2L, 4L, 4L, 4L, 2L, 4L, 5L, 2L, 4L, 4L, 4L, 2L, 
                                             5L, 6L, 4L, 5L, 5L, 4L, 5L, 2L, 4L, 4L, 4L, 5L, 5L, 4L, 4L, 4L, 
                                             4L, 2L, 5L, 5L, 4L, 5L, 4L, 4L, 5L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                             1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 2L, 3L, 5L, 
                                             4L, 2L, 2L, 2L, 2L, 3L, 1L, 4L, 2L, 3L, 4L, 2L, 3L, 2L, 5L, 3L, 
                                             3L, 5L, 2L, 1L), .Label = c("Primaria", "Secundaria", "Bachillerato", 
                                                                         "Tecnico", "Universitario", "Otro", "No sé", "No aplica", "T?o", 
                                                                         "No se"), class = "factor"), Country = c("MX", "MX", "MX", "MX", 
                                                                                                                  "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", 
                                                                                                                  "MX", "MX", "MX", "MX", "MX", "CH", "CH", "EC", "EC", "EC", "EC", 
                                                                                                                  "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", 
                                                                                                                  "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", 
                                                                                                                  "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", 
                                                                                                                  "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", 
                                                                                                                  "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", 
                                                                                                                  "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", 
                                                                                                                  "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", 
                                                                                                                  "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", 
                                                                                                                  "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", 
                                                                                                                  "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", 
                                                                                                                  "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", 
                                                                                                                  "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", "EC", 
                                                                                                                  "EC", "EC", "EC", "EC", "EC", "EC", "EC", "GT", "GT", "GT", "GT", 
                                                                                                                  "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", 
                                                                                                                  "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", 
                                                                                                                  "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", 
                                                                                                                  "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", 
                                                                                                                  "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", 
                                                                                                                  "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", 
                                                                                                                  "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", 
                                                                                                                  "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", 
                                                                                                                  "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", 
                                                                                                                  "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", 
                                                                                                                  "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", 
                                                                                                                  "GT", "GT", "GT", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", 
                                                                                                                  "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", 
                                                                                                                  "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", 
                                                                                                                  "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", 
                                                                                                                  "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", 
                                                                                                                  "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", 
                                                                                                                  "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", 
                                                                                                                  "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", 
                                                                                                                  "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", 
                                                                                                                  "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "MX", "GT", 
                                                                                                                  "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", 
                                                                                                                  "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", 
                                                                                                                  "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", 
                                                                                                                  "GT", "GT", "GT", "GT", "GT", "GT", "GT", "GT", "CH", "CH", "CH", 
                                                                                                                  "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", 
                                                                                                                  "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", 
                                                                                                                  "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", 
                                                                                                                  "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", 
                                                                                                                  "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", 
                                                                                                                  "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", 
                                                                                                                  "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", 
                                                                                                                  "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", 
                                                                                                                  "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", 
                                                                                                                  "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", 
                                                                                                                  "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", 
                                                                                                                  "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", "CH", 
                                                                                                                  "US", "US", "US", "US", "US", "US", "US", "US", "US", "US", "US", 
                                                                                                                  "US", "US", "US", "US", "US", "US", "US", "US", "US", "US", "US", 
                                                                                                                  "US", "US", "US", "US", "US", "US", "US", "US", "US", "US", "US", 
                                                                                                                  "US", "US", "US", "US", "US", "US", "US", "US", "US")), class = c("tbl_df", 
                                                                                                                                                                                    "tbl", "data.frame"), row.names = c(NA, -618L))

Upvotes: 0

Views: 157

Answers (1)

Mark Peterson
Mark Peterson

Reputation: 9570

The trouble appears to be that you are trying to sum after you have converted the variables to characters (and added the percentages). You could convert back, but it is probably easier to just step back and calculate the totals first.

Here, I am just separating out the first part of what you did and calling it step_one. Importantly, this stops just before converting the column to a character.

step_one <-
  data %>%
  group_by(Q7_MoEd, add=TRUE) %>% 
  mutate(educ_levels_count = n()) %>% 

  #how many countries we are dealing with
  group_by(Country, add=TRUE) %>% 
  mutate(country_count = n()) %>% 

  #lets add how many education levels we have
  group_by(Q7_MoEd, Country, educ_levels_count, country_count, add=TRUE) %>% 
  summarise(
    educ_count = n(), 
    educ_percentage = round(educ_count/first(educ_levels_count),2)
    #country_percentage = educ_count/first(country_count), 
  ) %>%
  ungroup()

Then, I am using that to calculate the totals myself:

totals <-
  step_one %>%
  mutate(Q7_MoEd = "Total") %>%
  group_by(Country, Q7_MoEd) %>%
  summarise(
    educ_count = sum(educ_count)
  ) %>%
  ungroup() %>%
  mutate(
    educ_percentage = round(educ_count/sum(educ_count),2)
    , educ_levels_count = sum(educ_count)
  )

I am planning to manually set the sort order for the result (to keep "Total" at the bottom, and better sort the other entries), using a variable:

sort_order <-
  c("Primaria"
    , "Secundaria"
    , "Tecnico"
    , "Bachillerato"
    , "Universitario"
    , "Otro"
    , "Total")

Finally, I am binding the step_one and totals data together and modifying it using the same steps you used before -- though I no longer need adorn_totals because the totals are already calculated:

bind_rows(
  step_one
  , totals
) %>% 
  mutate(
    n_percentage = paste0(educ_count," (",educ_percentage,"%)")
    , Q7_MoEd = factor(Q7_MoEd, levels = sort_order)
         ) %>%
  select(Q7_MoEd, Country, n_percentage, educ_levels_count) %>% 
  spread(Country, n_percentage, fill="0 (0.00%)")

returns:

# A tibble: 7 x 7
  Q7_MoEd       educ_levels_count CH          EC          GT          MX         US        
  <fct>                     <int> <chr>       <chr>       <chr>       <chr>      <chr>     
1 Primaria                     50 3 (0.06%)   2 (0.04%)   26 (0.52%)  10 (0.2%)  9 (0.18%) 
2 Secundaria                   97 36 (0.37%)  5 (0.05%)   11 (0.11%)  25 (0.26%) 20 (0.21%)
3 Tecnico                      83 56 (0.67%)  5 (0.06%)   8 (0.1%)    11 (0.13%) 3 (0.04%) 
4 Bachillerato                 79 3 (0.04%)   9 (0.11%)   35 (0.44%)  25 (0.32%) 7 (0.09%) 
5 Universitario               228 27 (0.12%)  82 (0.36%)  72 (0.32%)  44 (0.19%) 3 (0.01%) 
6 Otro                         81 12 (0.15%)  40 (0.49%)  18 (0.22%)  11 (0.14%) 0 (0.00%) 
7 Total                       618 137 (0.22%) 143 (0.23%) 170 (0.28%) 126 (0.2%) 42 (0.07%)

Upvotes: 1

Related Questions