GOGA GOGA
GOGA GOGA

Reputation: 407

calculate the number of each element in the column by group

my data.frame:

data<-structure(list(GROUP = c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), 
                     col1 = c(23L, 432L, 234L, 234L, 3123L, 657L,
                              8768L, 123L,42323L), 
                     col2 = c(567L, 765L, 8678L, 46L, 35L, 24L, 76L, 
                              789L, 45L), 
                     col3 = c(1L, 3L, 5L, 7L, 8L, 0L, 8L, 7L, 3L), 
                     col4 = c("S", "S", "S", "S", "F", "F", "F", 
                              "F", "F"), 
                     col5 = c("a","a", "s", "s", "s", "s", "s", 
                              "s", "s")
               ), class = "data.frame", row.names = c(NA,-9L))

Function:

combination = list(c(2,3),c(3,4),c(5,6))
    wilcox.fun <- function(df, id_group){
      df = df[df$GROUP%in%id_group,]
      x <- function(dat) { 
        do.call(rbind, lapply(combination, function(x) {
          col1 <- dat[[x[1]]]
          col2 <- dat[[x[2]]]
          if(is.numeric(col1) && is.numeric(col2)) test <- wilcox.test(col1, col2)
          else  test <- fisher.test(col1, col2)
          data.frame(Test = sprintf('Group %s by Group %s Group',x[1],x[2]), 
                     p = test$p.value,
                     col <- paste(x[1],unique(dat[[x[1]]]),unique(table(dat[[x[1]]])),":",x[2],unique(dat[[x[2]]]),unique(table(dat[[x[2]]])))
          )
        }))
      }
      return (purrr::map_df(split(df, df$GROUP),x,.id="GROUP" ))
    }
    
    wilcox.fun(data, c(1, 2))

I want to calculate for each value from the columns their number by groups.

For example, for the column col4, the number of "S" in group 1 = 2. In group 2, the number of " S " = 2. In group 1, the number of " F " = 3. In group 2, the number of " F " = 2 and so on

I tried using the table function, but it outputs too many repeating combinations.

Upvotes: 0

Views: 588

Answers (3)

ThomasIsCoding
ThomasIsCoding

Reputation: 102880

You can try the code below

lapply(
  data[-1],
  aggregate,
  data[1],
  length
)

which gives

$col1
  GROUP x
1     1 5
2     2 4

$col2
  GROUP x
1     1 5
2     2 4

$col3
  GROUP x
1     1 5
2     2 4

$col4
  GROUP x
1     1 5
2     2 4

$col5
  GROUP x
1     1 5
2     2 4

Or using table

lapply(
  data[-1],
  table,
  data$GROUP
)

gives

$col1

        1 2
  23    1 0
  123   0 1
  234   1 1
  432   0 1
  657   0 1
  3123  1 0
  8768  1 0
  42323 1 0

$col2

       1 2
  24   0 1
  35   1 0
  45   1 0
  46   0 1
  76   1 0
  567  1 0
  765  0 1
  789  0 1
  8678 1 0

$col3

    1 2
  0 0 1
  1 1 0
  3 1 1
  5 1 0
  7 0 2
  8 2 0

$col4

    1 2
  F 3 2
  S 2 2

$col5

    1 2
  a 1 1
  s 4 3

Upvotes: 1

jamoreiras
jamoreiras

Reputation: 343

You can do this with dplyr. In order to store the output in a list:

library(dplyr)
lapply(names(data), function(i) {
   data %>% select([email protected], GROUP) %>% group_by([email protected],GROUP) %>% count()
 })

will retrieve:

[[1]]
# A tibble: 2 x 3
# Groups:   [email protected], GROUP [2]
  `[email protected]` GROUP     n
  <chr>     <int> <int>
1 GROUP         1     5
2 GROUP         2     4

[[2]]
# A tibble: 2 x 3
# Groups:   [email protected], GROUP [2]
  `[email protected]` GROUP     n
  <chr>     <int> <int>
1 col1          1     5
2 col1          2     4
...

Upvotes: 1

Martin Gal
Martin Gal

Reputation: 16998

Not sure what you are asking for:

library(dplyr)
data %>% 
  group_by(GROUP, col4) %>% 
  count()

returns

# A tibble: 4 x 3
# Groups:   GROUP, col4 [4]
  GROUP col4      n
  <int> <chr> <int>
1     1 F         3
2     1 S         2
3     2 F         2
4     2 S         2

Upvotes: 2

Related Questions