Reputation: 81
This is a follow-up question to this one.
Reworded:
Building on the previous dataset, name
is now important for my summary statistics because there are multiples of some animals with different names. Instead, we can still carry a descriptive statistic in the key_quality
field.
You will see that the resulting table contains all the individual statistics laid out nicely, but I would also like to see a mean of all baby dogs' heights for example. Then I would also like to see a mean of all baby mammals' heights as well. How do we add those stats?
animals_3 <- data.table(
family = rep(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2 , 2, 2, 2, 2,3 ,3 ,3), 2),
animal = rep(c(1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4), 2),
name = rep(c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6),2),
key_quality = rep(c(rep("bold", 3), rep("smiles", 3), rep("sings", 3), rep("fast", 3), rep("sleepy", 3), rep("ticklish", 3)), 2),
age = rep(c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3), 2),
field = c(rep(1, 18), rep(2, 18)),
value = c(
c(25, 45, 75, 40, 65, 80, 10, 25, 50, 10, 15, 25, 18, 24, 40, 5, 15, 20),
c(5, 15, 30, 7, 19, 40, 3, 9, 13, 2, 5, 9, 4, 7, 14, 1, 2, 3.5))
)
animals_3 <- expss::apply_labels(
animals_3,
family = "|",
family = c("mammal" = 1, "reptilia" = 2, "amphibia" = 3),
animal = "|",
animal = c("dog" = 1, "cat" = 2, "turtle" = 3, "frog" = 4),
name = "|",
name = c("fred" = 1, "billiy" = 2, "tod" = 3, "timmy" = 4, "ricardo" = 5, "jonno" = 6),
key_quality = "|",
age = "|",
age = c("baby" = 1, "young" = 2, "mature" = 3),
field = "|",
field = c("height" = 1, "weight" = 2),
value = "|"
)
expss::expss_output_viewer()
animals_3 %>%
expss::tab_rows(family %nest% animal %nest% name) %>%
# here we create separate column for name
expss::tab_cols(total(label = "quality")) %>%
expss::tab_cells(key_quality) %>%
expss::tab_stat_fun(unique) %>%
# end of creation
expss::tab_cells(value) %>%
expss::tab_cols(age %nest%field) %>%
expss::tab_stat_sum(label = "") %>%
expss::tab_pivot(stat_position = "outside_column") %>%
expss::drop_empty_rows()
Thanks so much!
Upvotes: 1
Views: 120
Reputation: 4846
You need different statistics for the same nested variables. For this case it is simpler to calculate all statistics for each group and then remove unnecessary rows and labels. It is very easy because resulting table is just a usual data.frame.
library(expss)
animals_3 <- data.table(
family = rep(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2 , 2, 2, 2, 2,3 ,3 ,3), 2),
animal = rep(c(1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4), 2),
name = rep(c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6),2),
key_quality = rep(c(rep("bold", 3), rep("smiles", 3), rep("sings", 3), rep("fast", 3), rep("sleepy", 3), rep("ticklish", 3)), 2),
age = rep(c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3), 2),
field = c(rep(1, 18), rep(2, 18)),
value = c(
c(25, 45, 75, 40, 65, 80, 10, 25, 50, 10, 15, 25, 18, 24, 40, 5, 15, 20),
c(5, 15, 30, 7, 19, 40, 3, 9, 13, 2, 5, 9, 4, 7, 14, 1, 2, 3.5))
)
animals_3 <- expss::apply_labels(
animals_3,
family = "|",
family = c("mammal" = 1, "reptilia" = 2, "amphibia" = 3),
animal = "|",
animal = c("dog" = 1, "cat" = 2, "turtle" = 3, "frog" = 4),
name = "|",
name = c("fred" = 1, "billiy" = 2, "tod" = 3, "timmy" = 4, "ricardo" = 5, "jonno" = 6),
key_quality = "|",
age = "|",
age = c("baby" = 1, "young" = 2, "mature" = 3),
field = "|",
field = c("height" = 1, "weight" = 2),
value = "|"
)
expss::expss_output_viewer()
animals_3 %>%
expss::tab_rows(family %nest% animal %nest% name) %>%
# here we create separate column for name
expss::tab_cols(total(label = "quality")) %>%
expss::tab_cells(key_quality) %>%
# we name 'unique' as 'sum' to merge different statistics in one row
expss::tab_stat_fun(sum = unique) %>%
# end of creation
expss::tab_rows(family %nest% list(total(), animal %nest% list(total(label = "|"), name))) %>%
expss::tab_cells(value) %>%
expss::tab_cols(age %nest%field) %>%
expss::tab_stat_fun(mean, sum) %>%
expss::tab_pivot(stat_position = "outside_column") %>%
expss::drop_empty_rows() %>%
where(
(grepl("Total", row_labels) & grepl("mean", row_labels)) # to keep total and mean
| (!grepl("Total", row_labels) & grepl("sum", row_labels)) # to drop mean with all other rows
) %>%
compute(row_labels = gsub("\\|(mean|sum|#Total)", "", row_labels)) %>% # remove labels 'sum', 'mean', '#Total'
make_subheadings(1) # optional part
# | | | quality | baby | | young | | mature | |
# | | | | height | weight | height | weight | height | weight |
# | -------- | ------- | -------- | ------ | ------ | ------ | ------ | ------ | ------ |
# | mammal | | | 25 | 5 | 45.0 | 14.3 | 68.3 | 27.7 |
# | dog | | | 65 | 12 | 110.0 | 34.0 | 155.0 | 70.0 |
# | | fred | bold | 25 | 5 | 45.0 | 15.0 | 75.0 | 30.0 |
# | | billiy | smiles | 40 | 7 | 65.0 | 19.0 | 80.0 | 40.0 |
# | cat | | | 10 | 3 | 25.0 | 9.0 | 50.0 | 13.0 |
# | | tod | sings | 10 | 3 | 25.0 | 9.0 | 50.0 | 13.0 |
# | reptilia | | | 14 | 3 | 19.5 | 6.0 | 32.5 | 11.5 |
# | turtle | | | 28 | 6 | 39.0 | 12.0 | 65.0 | 23.0 |
# | | timmy | fast | 10 | 2 | 15.0 | 5.0 | 25.0 | 9.0 |
# | | ricardo | sleepy | 18 | 4 | 24.0 | 7.0 | 40.0 | 14.0 |
# | amphibia | | | 5 | 1 | 15.0 | 2.0 | 20.0 | 3.5 |
# | frog | | | 5 | 1 | 15.0 | 2.0 | 20.0 | 3.5 |
# | | jonno | ticklish | 5 | 1 | 15.0 | 2.0 | 20.0 | 3.5 |
Upvotes: 1