Reputation: 79
Imagine I have 4 samples of different sizes, and within each sample I have sequential positions from m to n.
I'd like to visualise the length of each sample with a coloured bar, and place lines indicating the positions within a sample, like this:
This is what I have so far, however I don’t want the positions to be stored in separate data frames.
library("ggplot2")
# data for coloured bars (samples)
dat=data.frame(x1=c(1,1,1,1), x2=c(200,100,270,500), y1=c(1.1,1.3,1.5,1.7), y2=c(1.2,1.4,1.6,1.8), samp=c('1','2','3','4'))
# data for each position within each sample
pos1=data.frame(x1=c(100,50,24,100), x2=c(101,51,25,101), y1=c(1.1,1.3,1.5,1.7), y2=c(1.2,1.4,1.6,1.8), samp=c('1','2','3','4'))
pos2=data.frame(x1=c(110,65,120,405), x2=c(111,66,121,406), y1=c(1.1,1.3,1.5,1.7), y2=c(1.2,1.4,1.6,1.8), samp=c('1','2','3','4'))
pos3=data.frame(x1=c(190,70,240,442), x2=c(191,71,241,443), y1=c(1.1,1.3,1.5,1.7), y2=c(1.2,1.4,1.6,1.8), samp=c('1','2','3','4'))
# plot each sample above the next
ggplot() +
scale_x_continuous(name="x") +
scale_y_continuous(name="y") +
geom_rect(data=dat, mapping=aes(xmin=x1, xmax=x2, ymin=y1, ymax=y2, fill=samp), color="black", alpha=0.5) +
geom_text(data=dat, aes(x=x1, y=y1, label=samp), size=4) +
geom_rect(data=pos1, mapping=aes(xmin=x1, xmax=x2, ymin=y1, ymax=y2), color="black", alpha=0.5) +
geom_rect(data=pos2, mapping=aes(xmin=x1, xmax=x2, ymin=y1, ymax=y2), color="black", alpha=0.5) +
geom_rect(data=pos3, mapping=aes(xmin=x1, xmax=x2, ymin=y1, ymax=y2), color="black", alpha=0.5)
Can anyone help me to plot this with pos1, pos2, and pos3 combined into a single data frame? I'm also open to suggestions for better ways of doing this!
This is a very simplified version of the data. I have around 20 samples in total, and hundreds of positions per sample.
Any help would be much appreciated!
Cheers : )
EDIT #1:
My raw data looks something like this:
positions=data.frame(sample=c(1,1,1,2,2,2,3,3,3,4,4,4),position=c(100,110,190,50,65,70,24,120,240,100,405,442))
samples=data.frame(sample=c(1,2,3,4), length=c(200,100,270,500))
Upvotes: 0
Views: 374
Reputation: 675
Update:
I looked at the data again and I think this is more in line with what you are asking for:
dat2 <- dat %>%
mutate(x2 = 0, obs = 0) %>%
bind_rows(pos1 %>% mutate(obs = 1)) %>%
bind_rows(pos2 %>% mutate(obs = 2)) %>%
bind_rows(pos3 %>% mutate(obs = 3)) %>%
bind_rows(dat %>% mutate(obs = 4)) %>%
group_by(samp) %>%
mutate(x = x2 - lag(x2))
ggplot(dat2, aes(x=samp)) +
geom_col(aes(y=x, fill=obs), color="black", alpha=0.5) +
coord_flip() +
scale_fill_gradient(low="steelblue", high="steelblue") +
theme(legend.position = "none")
Original Answer:
I'm not sure I'm reading your data correctly, but here is an attempt:
dat2 <- dat %>%
mutate(obs = 0) %>%
bind_rows(pos1 %>% mutate(obs = 1)) %>%
bind_rows(pos2 %>% mutate(obs = 2)) %>%
bind_rows(pos3 %>% mutate(obs = 3))
ggplot(dat2, aes(x=samp)) +
geom_col(aes(y=x2, fill=obs), color="black", alpha=0.5) +
coord_flip() +
scale_fill_gradient(low="steelblue", high="steelblue") +
theme(legend.position = "none")
Upvotes: 2