Reputation: 31
I have a dataframe as follows:
LA LE LI LO LU
A 1 0 0 0 0
B 0 0 1 1 1
C 0 0 0 0 0
D 1 0 0 0 0
E 1 0 1 1 0
and I want to count how many values match between two rows, but not counting equal 0s as a match, only equal 1s. for example AxE = 1, BxE = 2 , but in a way that i will get those values for every combination of 2 rows. im using
df = read.table('rmatr.txt', header = TRUE, sep = "\t", row.names = 1)
to read my file
Upvotes: 1
Views: 276
Reputation: 886938
We could also use crossprod
from base R
out <- as.data.frame.table(tcrossprod(as.matrix(df)))
out[1:2] <- t(apply(out[1:2], 1, sort))
subset(out, Var1 != Var2 & !duplicated(out[1:2]))
# Var1 Var2 Freq
#2 A B 0
#3 A C 0
#4 A D 1
#5 A E 1
#8 B C 0
#9 B D 0
#10 B E 2
#14 C D 0
#15 C E 0
#20 D E 1
df <- structure(list(LA = c(1L, 0L, 0L, 1L, 1L), LE = c(0L, 0L, 0L,
0L, 0L), LI = c(0L, 1L, 0L, 0L, 1L), LO = c(0L, 1L, 0L, 0L, 1L
), LU = c(0L, 1L, 0L, 0L, 0L)), class = "data.frame", row.names = c("A",
"B", "C", "D", "E"))
Upvotes: 0
Reputation: 101064
Maybe you can try combn
like below
cbind(
data.frame(t(combn(row.names(df), 2))),
cnt = combn(asplit(df, 1), 2, FUN = function(x) sum(do.call("*", x)))
)
which gives
X1 X2 cnt
1 A B 0
2 A C 0
3 A D 1
4 A E 1
5 B C 0
6 B D 0
7 B E 2
8 C D 0
9 C E 0
10 D E 1
Data
> dput(df)
structure(list(LA = c(1L, 0L, 0L, 1L, 1L), LE = c(0L, 0L, 0L,
0L, 0L), LI = c(0L, 1L, 0L, 0L, 1L), LO = c(0L, 1L, 0L, 0L, 1L
), LU = c(0L, 1L, 0L, 0L, 0L)), class = "data.frame", row.names = c("A",
"B", "C", "D", "E"))
Upvotes: 2
Reputation: 10375
tmp=do.call(cbind,
sapply(1:(nrow(df)-1),function(i){
sapply((i+1):nrow(df),function(j){
c(i,j,sum(df[i,]==1 & df[j,]==1))
})
})
)
data.frame(
"Row1"=rownames(df)[tmp[1,]],
"Row2"=rownames(df)[tmp[2,]],
"cnt"=tmp[3,]
)
Row1 Row2 cnt
1 A B 0
2 A C 0
3 A D 1
4 A E 1
5 B C 0
6 B D 0
7 B E 2
8 C D 0
9 C E 0
10 D E 1
Upvotes: 1