Reputation: 21
I have a big data.frame
composed of 239
objects and 546639
variables. The elements of the data.frame
include A
, B
, or 0
. Now I want to know the number of every element in every row. The following is part of the data.frame
.
1 rs22233… B B B B B B B B B B B
2 rs38622… B B B B B B B B A B A
3 rs13933… B B A B B B B B B B B
4 rs38637… B B A A A B B B A B A
5 rs12554… B B B B A B A B B B B
6 rs41105… A A A A B A B A A A B
Upvotes: 2
Views: 55
Reputation: 50678
Using table
(thanks to @thelatemail):
table(factor(unlist(df[-1]), levels = c("A", "B", "0")), row(df[-1]))
# 1 2 3 4 5 6
# A 0 2 1 5 2 8
# B 11 9 10 6 9 3
# 0 0 0 0 0 0 0
Or (slower):
sapply(split(df, 1:nrow(df)), function(x)
table(factor(unlist(x[, -1]), levels = c("A", "B", "0"))))
# 1 2 3 4 5 6
#A 0 2 1 5 2 8
#B 11 9 10 6 9 3
#0 0 0 0 0 0 0
Explanation: factor(..., levels = c("A", "B", "0"))
ensures that table
will always report counts for the same three factor
levels, which you can then store in a matrix
.
Using rle
:
lapply(split(df, 1:nrow(df)), function(x)
as.data.frame(unclass(rle(as.character(sort(unlist(x[, -1])))))))
#$`1`
# lengths values
#1 11 B
#
#$`2`
# lengths values
#1 2 A
#2 9 B
#
#$`3`
# lengths values
#1 1 A
#2 10 B
#
#$`4`
# lengths values
#1 5 A
#2 6 B
#
#$`5`
# lengths values
#1 2 A
#2 9 B
#
#$`6`
# lengths values
#1 8 A
#2 3 B
Using tidyr::gather
and dplyr::count
:
library(tidyverse);
df %>%
gather(key, val, -V2) %>%
count(V2, val)
## A tibble: 11 x 3
#V2 val n
#<fct> <chr> <int>
#1 rs12554… A 2
#2 rs12554… B 9
#3 rs13933… A 1
#4 rs13933… B 10
#5 rs22233… B 11
#6 rs38622… A 2
#7 rs38622… B 9
#8 rs38637… A 5
#9 rs38637… B 6
#10 rs41105… A 8
#11 rs41105… B 3
df <- read.table(text =
"1 rs22233… B B B B B B B B B B B
2 rs38622… B B B B B B B B A B A
3 rs13933… B B A B B B B B B B B
4 rs38637… B B A A A B B B A B A
5 rs12554… B B B B A B A B B B B
6 rs41105… A A A A B A B A A A B ", row.names = 1)
Upvotes: 2
Reputation: 10422
Using dplyr
and tidyr
:
library(dplyr)
library(tidyr)
df %>%
gather(key, value, V3:V13) %>%
group_by(V2) %>%
count(value) %>%
spread(value, n)
# A tibble: 6 x 3
# Groups: V2 [6]
V2 A B
<fct> <int> <int>
1 rs12554… 2 9
2 rs13933… 1 10
3 rs22233… NA 11
4 rs38622… 2 9
5 rs38637… 5 6
6 rs41105… 8 3
Upvotes: 1
Reputation: 47320
We can use apply
to count by rows using table
:
apply(df[-c(1,2)],1,table)
# [[1]]
#
# B
# 11
#
# [[2]]
#
# A B
# 2 9
#
# [[3]]
#
# A B
# 1 10
#
# [[4]]
#
# A B
# 5 6
#
# [[5]]
#
# A B
# 2 9
#
# [[6]]
#
# A B
# 8 3
Upvotes: 2