MSM
MSM

Reputation: 85

Creating a co-occurrence matrix using r

I would like to build a co-occurance matrix for the following data(randomly generated):

         OTU1 OTU2 OTU3 OTU4 .... OTU598
patient1  0    1    1    0        1    
patient2  1    0    1    0        1 
.
.
.
patient143 1   0    1    1         1

I want to create a co-occurance matrix with three columns as follows: (one patient): Consider first otu and compare with all other OTUs. if two OTUs (eg: OTU1 and OTU2) have 1, then I would like to create a column called counts and put 1.

otu1    otu2    counts
OTU1    OTU2      0
OTU1    OTU3      0
OTU1    OTU4      0
OTU2    OTU1      0
OTU2    OTU3      1

So I tried the following code:

a <- apply(bin_OTU_tab,1,function(row_val) lapply(1:ncol(bin_OTU_tab), 
                                              function(i){ lapply(i:ncol(bin_OTU_tab),
                                                                  function(j){if(bin_OTU_tab[row_val,i] == 1&&bin_OTU_tab[row_val,j] == 1){return(1) }else{return(0)
                                                                    }})}))

Its in the matrix format. I want to change this matrix into dataframe and arrange as the desired output(as mentioned above). I hope rbind would work? But couldnt get it how to continue from here. could anyone pls guide me in this?

Upvotes: 0

Views: 197

Answers (2)

ThomasIsCoding
ThomasIsCoding

Reputation: 101343

If you are using base R, then the following code may be something you want

l <- apply(m, 1, function(v) 
  setNames(data.frame(cbind(t(combn(colnames(m),2)),(M <- outer(v,v,"*"))[lower.tri(M)])), 
           c("otu1","otu2","counts")))

yielding

> l
$patient1
   otu1 otu2 counts
1  OTU1 OTU2      0
2  OTU1 OTU3      0
3  OTU1 OTU4      0
4  OTU1 OTU5      0
5  OTU2 OTU3      0
6  OTU2 OTU4      0
7  OTU2 OTU5      0
8  OTU3 OTU4      0
9  OTU3 OTU5      0
10 OTU4 OTU5      1

$patient2
   otu1 otu2 counts
1  OTU1 OTU2      0
2  OTU1 OTU3      0
3  OTU1 OTU4      0
4  OTU1 OTU5      0
5  OTU2 OTU3      0
6  OTU2 OTU4      0
7  OTU2 OTU5      0
8  OTU3 OTU4      0
9  OTU3 OTU5      0
10 OTU4 OTU5      0

$patient3
   otu1 otu2 counts
1  OTU1 OTU2      0
2  OTU1 OTU3      0
3  OTU1 OTU4      0
4  OTU1 OTU5      0
5  OTU2 OTU3      1
6  OTU2 OTU4      1
7  OTU2 OTU5      0
8  OTU3 OTU4      1
9  OTU3 OTU5      0
10 OTU4 OTU5      0

$patient4
   otu1 otu2 counts
1  OTU1 OTU2      1
2  OTU1 OTU3      1
3  OTU1 OTU4      0
4  OTU1 OTU5      0
5  OTU2 OTU3      1
6  OTU2 OTU4      0
7  OTU2 OTU5      0
8  OTU3 OTU4      0
9  OTU3 OTU5      0
10 OTU4 OTU5      0

DATA

m <- structure(c(0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 
0, 0, 0), .Dim = 4:5, .Dimnames = list(c("patient1", "patient2", 
"patient3", "patient4"), c("OTU1", "OTU2", "OTU3", "OTU4", "OTU5"
)))

Upvotes: 0

Gregor Thomas
Gregor Thomas

Reputation: 145775

This gives a list of matrices, one for each patient:

# fake data
set.seed(47)
m = matrix(round(runif(15)), nrow = 3)
colnames(m) = paste0("OTU", 1:ncol(m))
m
#      OTU1 OTU2 OTU3 OTU4 OTU5
# [1,]    1    1    0    1    0
# [2,]    0    1    0    0    1
# [3,]    1    1    1    1    1

template = as.data.frame(t(combn(colnames(m), 2)))
names(template) = c("otu1", "otu2")
template$counts = 0

result = apply(m, 1, function(x) {
  ones = names(x)[x == 1]
  result = template
  result[result$otu1 %in% ones & result$otu2 %in% ones, "counts"] = 1 
  return(result)
})

result
# [[1]]
#    otu1 otu2 counts
# 1  OTU1 OTU2      1
# 2  OTU1 OTU3      0
# 3  OTU1 OTU4      1
# 4  OTU1 OTU5      0
# 5  OTU2 OTU3      0
# 6  OTU2 OTU4      1
# 7  OTU2 OTU5      0
# 8  OTU3 OTU4      0
# 9  OTU3 OTU5      0
# 10 OTU4 OTU5      0
# 
# [[2]]
#    otu1 otu2 counts
# 1  OTU1 OTU2      0
# 2  OTU1 OTU3      0
# 3  OTU1 OTU4      0
# 4  OTU1 OTU5      0
# 5  OTU2 OTU3      0
# 6  OTU2 OTU4      0
# 7  OTU2 OTU5      1
# 8  OTU3 OTU4      0
# 9  OTU3 OTU5      0
# 10 OTU4 OTU5      0
# 
# [[3]]
#    otu1 otu2 counts
# 1  OTU1 OTU2      1
# 2  OTU1 OTU3      1
# 3  OTU1 OTU4      1
# 4  OTU1 OTU5      1
# 5  OTU2 OTU3      1
# 6  OTU2 OTU4      1
# 7  OTU2 OTU5      1
# 8  OTU3 OTU4      1
# 9  OTU3 OTU5      1
# 10 OTU4 OTU5      1

Upvotes: 1

Related Questions