Reputation: 407
my data.frame:
data<-structure(list(GROUP = c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L),
col1 = c(23L, 432L, 234L, 234L, 3123L, 657L,
8768L, 123L,42323L),
col2 = c(567L, 765L, 8678L, 46L, 35L, 24L, 76L,
789L, 45L),
col3 = c(1L, 3L, 5L, 7L, 8L, 0L, 8L, 7L, 3L),
col4 = c("S", "S", "S", "S", "F", "F", "F",
"F", "F"),
col5 = c("a","a", "s", "s", "s", "s", "s",
"s", "s")
), class = "data.frame", row.names = c(NA,-9L))
Function:
combination = list(c(2,3),c(3,4),c(5,6))
wilcox.fun <- function(df, id_group){
df = df[df$GROUP%in%id_group,]
x <- function(dat) {
do.call(rbind, lapply(combination, function(x) {
col1 <- dat[[x[1]]]
col2 <- dat[[x[2]]]
if(is.numeric(col1) && is.numeric(col2)) test <- wilcox.test(col1, col2)
else test <- fisher.test(col1, col2)
data.frame(Test = sprintf('Group %s by Group %s Group',x[1],x[2]),
p = test$p.value,
col <- paste(x[1],unique(dat[[x[1]]]),unique(table(dat[[x[1]]])),":",x[2],unique(dat[[x[2]]]),unique(table(dat[[x[2]]])))
)
}))
}
return (purrr::map_df(split(df, df$GROUP),x,.id="GROUP" ))
}
wilcox.fun(data, c(1, 2))
I want to calculate for each value from the columns their number by groups.
For example, for the column col4
, the number of "S" in group 1 = 2. In group 2, the number of " S " = 2. In group 1, the number of " F " = 3. In group 2, the number of " F " = 2 and so on
I tried using the table
function, but it outputs too many repeating combinations.
Upvotes: 0
Views: 588
Reputation: 102880
You can try the code below
lapply(
data[-1],
aggregate,
data[1],
length
)
which gives
$col1
GROUP x
1 1 5
2 2 4
$col2
GROUP x
1 1 5
2 2 4
$col3
GROUP x
1 1 5
2 2 4
$col4
GROUP x
1 1 5
2 2 4
$col5
GROUP x
1 1 5
2 2 4
Or using table
lapply(
data[-1],
table,
data$GROUP
)
gives
$col1
1 2
23 1 0
123 0 1
234 1 1
432 0 1
657 0 1
3123 1 0
8768 1 0
42323 1 0
$col2
1 2
24 0 1
35 1 0
45 1 0
46 0 1
76 1 0
567 1 0
765 0 1
789 0 1
8678 1 0
$col3
1 2
0 0 1
1 1 0
3 1 1
5 1 0
7 0 2
8 2 0
$col4
1 2
F 3 2
S 2 2
$col5
1 2
a 1 1
s 4 3
Upvotes: 1
Reputation: 343
You can do this with dplyr
. In order to store the output in a list:
library(dplyr)
lapply(names(data), function(i) {
data %>% select([email protected], GROUP) %>% group_by([email protected],GROUP) %>% count()
})
will retrieve:
[[1]]
# A tibble: 2 x 3
# Groups: [email protected], GROUP [2]
`[email protected]` GROUP n
<chr> <int> <int>
1 GROUP 1 5
2 GROUP 2 4
[[2]]
# A tibble: 2 x 3
# Groups: [email protected], GROUP [2]
`[email protected]` GROUP n
<chr> <int> <int>
1 col1 1 5
2 col1 2 4
...
Upvotes: 1
Reputation: 16998
Not sure what you are asking for:
library(dplyr)
data %>%
group_by(GROUP, col4) %>%
count()
returns
# A tibble: 4 x 3
# Groups: GROUP, col4 [4]
GROUP col4 n
<int> <chr> <int>
1 1 F 3
2 1 S 2
3 2 F 2
4 2 S 2
Upvotes: 2