neversaint
neversaint

Reputation: 63984

How to create ecdf plot using ggplot with one value versus grouped values

I have two tibbles: ref.tbl and s1.tbl


ref.tbl <- structure(list(K = c("K1", "K2", "K3", "K1", "K2", "K3", "K1", 
"K2", "K3", "K1", "K2", "K3", "K1", "K2", "K3", "K1", "K2", "K3", 
"K1", "K2", "K3", "K1", "K2", "K3", "K1", "K2", "K3", "K1", "K2", 
"K3"), some_score = c("0.09651", "0.09787", "0.09526", "0.09543", 
"0.09479", "0.09435", "0.09122", "0.09123", "0.09024", "0.09458", 
"0.09478", "0.09299", "0.09382", "0.09450", "0.09285", "0.10158", 
"0.10127", "0.09889", "0.10339", "0.10476", "0.10370", "0.13594", 
"0.13257", "0.14651", "0.09458", "0.09473", "0.09325", "0.09296", 
"0.09350", "0.09196")), .Names = c("K", "some_score"), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -30L))
ref.tbl
#>     K some_score
#> 1  K1    0.09651
#> 2  K2    0.09787
#> 3  K3    0.09526
#> 4  K1    0.09543
#> 5  K2    0.09479
#> 6  K3    0.09435
#> 7  K1    0.09122
#> 8  K2    0.09123
#> 9  K3    0.09024
#> 10 K1    0.09458
#> 11 K2    0.09478
#> 12 K3    0.09299
#> 13 K1    0.09382
#> 14 K2    0.09450
#> 15 K3    0.09285
#> 16 K1    0.10158
#> 17 K2    0.10127
#> 18 K3    0.09889
#> 19 K1    0.10339
#> 20 K2    0.10476
#> 21 K3    0.10370
#> 22 K1    0.13594
#> 23 K2    0.13257
#> 24 K3    0.14651
#> 25 K1    0.09458
#> 26 K2    0.09473
#> 27 K3    0.09325
#> 28 K1    0.09296
#> 29 K2    0.09350
#> 30 K3    0.09196

s1.tbl <- structure(list(Sample_name = c("s1", "s1", "s1", "s1", "s1", 
"s1", "s1", "s1", "s1", "s1"), some_score = c("0.04741", "0.06210", 
"0.09606", "0.08851", "0.05063", "0.39775", "0.05509", "0.10784", 
"0.04680", "0.04782")), .Names = c("Sample_name", "some_score"
), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"
))
s1.tbl
#>    Sample_name some_score
#> 1           s1    0.04741
#> 2           s1    0.06210
#> 3           s1    0.09606
#> 4           s1    0.08851
#> 5           s1    0.05063
#> 6           s1    0.39775
#> 7           s1    0.05509
#> 8           s1    0.10784
#> 9           s1    0.04680
#> 10          s1    0.04782

What I want to do is to create an ECDF plot with one s1 line and three K lines.

With this code:

library(dplyr)
library(ggplot2)
ref.k1 <- ref.tbl %>%
         select(Sample_name = K, some_score) %>%
         filter(Sample_name=="K1")
dat <- dplyr::bind_rows(ref.k1,s1.tbl)
dat
ggplot(dat, aes(x = some_score)) +
  stat_ecdf(aes(group = Sample_name, colour = Sample_name)) +
  scale_color_manual(values = c("black","red")) +
  theme_minimal(base_size=10) +
  theme(strip.background=element_blank(),strip.text = element_text(size=5)) +
  theme(legend.title=element_blank()) +
  theme(axis.text.x=element_text(angle=45, hjust=1,size=10)) +
  theme(axis.text.y=element_text( hjust=1,size=5))

I can just make S1 vs K1:

enter image description here

What I want to have is three black K lines (K1,K2,K3) versus one red S1. lines. How can I do that given ref.table and s1.tbl ?

Upvotes: 0

Views: 113

Answers (1)

Adam Quek
Adam Quek

Reputation: 7153

s1.tbl <- s1.tbl %>% 
       mutate(some_score=some_score %>% as.numeric)

ref.tbl <- ref.tbl %>% 
       mutate(some_score=some_score %>% as.numeric) %>% 
       setNames(c("Sample_name", "some_score"))

dat <- rbind(s1.tbl, ref.tbl)


ggplot(dat, aes(x = some_score)) +
    stat_ecdf(aes(group = Sample_name, colour = Sample_name)) +
    scale_color_manual(values = c(rep("black",3),"red")) + 
    theme_minimal(base_size=10) +
    theme(strip.background=element_blank(),strip.text = element_text(size=5)) +
    theme(legend.title=element_blank()) +
    theme(axis.text.x=element_text(angle=45, hjust=1,size=10)) +
    theme(axis.text.y=element_text( hjust=1,size=5))

enter image description here

Upvotes: 2

Related Questions