Tou Mou
Tou Mou

Reputation: 1274

How to count frequencies by columns for each group

I have a dataset called ballons with two clusters : True and false. I'm searching to count modalities frequencies per cluster and per column, so I tried:

library(ggplot2)
library(tidyverse)

ballons=structure(list(YELLOW = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("PURPLE", 
"YELLOW"), class = "factor"), SMALL = structure(c(2L, 2L, 2L, 
2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L
), .Label = c("LARGE", "SMALL"), class = "factor"), STRETCH = structure(c(2L, 
2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 
1L, 1L), .Label = c("DIP", "STRETCH"), class = "factor"), ADULT = structure(c(1L, 
2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 
1L, 2L), .Label = c("ADULT", "CHILD"), class = "factor"), T = c(TRUE, 
FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, 
FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE)), class = "data.frame", row.names = c(NA, 
-19L))

r1=group_split(ballons %>%
  group_by(T))

print(r1)


data.frame(do.call(rbind, lapply(r1,function(x) sapply(x,table))))


  YELLOW SMALL STRETCH ADULT  T
1   6, 6  6, 6    8, 4  4, 8 12
2   4, 3  4, 3    0, 7  7, 0  7    

However I'm searching this representation ( expected output ):

      YELLOW PURPLE SMALL LARGE ...
FALSE   6    6      6      6    ...
TRUE    4    3      4      3    etc

I also tried without success:

lapply(r1,function(x) sapply(x,table))
data.frame(do.call(rbind, lapply(r1,function(x) sapply(x,table))))
sapply(ballons,table,ballons$T)

Upvotes: 2

Views: 104

Answers (3)

user63230
user63230

Reputation: 4708

Another approach could be to use janitor::tabyl which is useful for tables:

library(janitor)
tabyl(ballons, T, YELLOW)
#      T PURPLE YELLOW
#  FALSE      6      6
#   TRUE      4      3

Now loop over all columns and then join output:

ballons[-5] %>% 
  imap(~tabyl(dat = ballons, T, !!sym(.y))) %>% 
  reduce(full_join, by = "T")
#      T PURPLE YELLOW LARGE SMALL DIP STRETCH ADULT CHILD
#  FALSE      6      6     6     6   8       4     4     8
#   TRUE      4      3     4     3   0       7     7     0

!!sym helps convert the individual columns to objects, see @Lionel Henry explanation here and @Moody_Mudskipper with tabyl here.

Upvotes: 2

Yuriy Saraykin
Yuriy Saraykin

Reputation: 8880

library(tidyverse)

ballons %>% 
  pivot_longer(-T) %>% 
  pivot_wider(T, names_from = value, values_fn = length, values_fill = 0)
#> # A tibble: 2 x 9
#>   T     YELLOW SMALL STRETCH ADULT CHILD   DIP LARGE PURPLE
#>   <lgl>  <int> <int>   <int> <int> <int> <int> <int>  <int>
#> 1 TRUE       3     3       7     7     0     0     4      4
#> 2 FALSE      6     6       4     4     8     8     6      6

Upvotes: 1

Sandwichnick
Sandwichnick

Reputation: 1466

You are already using tidyverse so you can use a tidy solution

table(ballons %>% pivot_longer(cols = !c(T)) %>% select(T,value))

Upvotes: 3

Related Questions