Reputation: 1274
I have a dataset called ballons
with two clusters : True and false.
I'm searching to count modalities frequencies per cluster and per column, so I tried:
library(ggplot2)
library(tidyverse)
ballons=structure(list(YELLOW = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("PURPLE",
"YELLOW"), class = "factor"), SMALL = structure(c(2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L
), .Label = c("LARGE", "SMALL"), class = "factor"), STRETCH = structure(c(2L,
2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L,
1L, 1L), .Label = c("DIP", "STRETCH"), class = "factor"), ADULT = structure(c(1L,
2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L,
1L, 2L), .Label = c("ADULT", "CHILD"), class = "factor"), T = c(TRUE,
FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE,
FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE)), class = "data.frame", row.names = c(NA,
-19L))
r1=group_split(ballons %>%
group_by(T))
print(r1)
data.frame(do.call(rbind, lapply(r1,function(x) sapply(x,table))))
YELLOW SMALL STRETCH ADULT T
1 6, 6 6, 6 8, 4 4, 8 12
2 4, 3 4, 3 0, 7 7, 0 7
However I'm searching this representation ( expected output ):
YELLOW PURPLE SMALL LARGE ...
FALSE 6 6 6 6 ...
TRUE 4 3 4 3 etc
I also tried without success:
lapply(r1,function(x) sapply(x,table))
data.frame(do.call(rbind, lapply(r1,function(x) sapply(x,table))))
sapply(ballons,table,ballons$T)
Upvotes: 2
Views: 104
Reputation: 4708
Another approach could be to use janitor::tabyl
which is useful for tables:
library(janitor)
tabyl(ballons, T, YELLOW)
# T PURPLE YELLOW
# FALSE 6 6
# TRUE 4 3
Now loop over all columns and then join output:
ballons[-5] %>%
imap(~tabyl(dat = ballons, T, !!sym(.y))) %>%
reduce(full_join, by = "T")
# T PURPLE YELLOW LARGE SMALL DIP STRETCH ADULT CHILD
# FALSE 6 6 6 6 8 4 4 8
# TRUE 4 3 4 3 0 7 7 0
!!sym
helps convert the individual columns to objects, see @Lionel Henry explanation here and @Moody_Mudskipper with tabyl
here.
Upvotes: 2
Reputation: 8880
library(tidyverse)
ballons %>%
pivot_longer(-T) %>%
pivot_wider(T, names_from = value, values_fn = length, values_fill = 0)
#> # A tibble: 2 x 9
#> T YELLOW SMALL STRETCH ADULT CHILD DIP LARGE PURPLE
#> <lgl> <int> <int> <int> <int> <int> <int> <int> <int>
#> 1 TRUE 3 3 7 7 0 0 4 4
#> 2 FALSE 6 6 4 4 8 8 6 6
Upvotes: 1
Reputation: 1466
You are already using tidyverse so you can use a tidy solution
table(ballons %>% pivot_longer(cols = !c(T)) %>% select(T,value))
Upvotes: 3