Ahdee
Ahdee

Reputation: 4949

Visualize frequency of categorical variables between different samples?

Hi I would like to generate a plot that looks like this. enter image description here

At first glance this looks like an upset plot however it is not because I don't want to tabulate the intersection. I have the code below to generate a plot that tabulates intersection in upset plot but what I want is to just count the total and show the intersection like the above image. Is this possible?

library ( ComplexHeatmap)
lt = list(drug1 = c ( "s1","s2","s3"),
          drug2 = c ( "s1","s2"),
          drug3 = c ( "s2")  )

m1 = make_comb_mat(lt) # the default mode is `distinct`

length ( unique ( unlist ( lt) ) ) # three samples

UpSet( m1)

Upvotes: 0

Views: 148

Answers (1)

d.b
d.b

Reputation: 32548

It's doable in base plot but maybe not worth the trouble.

lt = list(drug1 = c ( "s1","s2","s3"),
          drug2 = c ( "s1","s2"),
          drug3 = c ( "s2"))

bc = table(unlist(lt))
bc = bc[order(names(bc))]

m = sapply(sort(unique(unlist(lt))), function(x) sapply(lt, function(y) x %in% y))

margin = 0.25

graphics.off()
plot(1, 1,
     xlim = c(1, length(bc)),
     ylim = c(0, max(bc) + NROW(m) + margin),
     type = "n",
     ann = FALSE, 
     axes = FALSE)
#box()

for (i in seq_along(bc)){
    lines(rbind(c(i, 0), c(i, bc[i])), lwd = 8, lend = "butt")
}

for (i in 1:NROW(m)){
    for (j in seq_along(bc)){
        if (m[i, j]){
            points(j, i + max(bc) + margin, cex = 2, lwd = 2)
            if (i > 1 & m[max(1, i-1), j]){
                lines(rbind(c(j, i - 1 + max(bc) + margin),
                            c(j, i + max(bc) + margin)))
            }
        }
    }
}

axis(1, at = 1:NCOL(m), labels = colnames(m), las = 1)
# axis(2, at = seq_along(bc), labels = seq_along(bc), las = 1)
axis(2, at = (1:NROW(m))+ max(bc) + margin, labels = row.names(m), las = 1)

enter image description here

Upvotes: 1

Related Questions