Reputation: 2584
I have a data way much bigger than this representative one
df<- structure(list(Pama1 = structure(c(2L, 5L, 3L, 5L, 3L, 3L, 3L,
3L, 3L, 4L, 1L), .Label = c("", "DD1", "n/a", "PAMANA", "zf"), class = "factor"),
X = structure(c(11L, 3L, 10L, 2L, 4L, 5L, 6L, 7L, 8L, 9L,
1L), .Label = c("", "116", "12", "138", "197", "219", "224",
"230", "280", "85", "Start1"), class = "factor"), X.1 = structure(c(11L,
10L, 2L, 4L, 3L, 5L, 8L, 6L, 7L, 9L, 1L), .Label = c("",
"101", "145", "199", "222", "227", "233", "238", "331", "89",
"End1"), class = "factor"), Pama2 = structure(c(2L, 4L, 4L,
4L, 3L, 4L, 4L, 6L, 5L, 1L, 1L), .Label = c("", "DD2", "GGTR",
"n/a", "PAMANA", "T_reg"), class = "factor"), X.2 = structure(c(9L,
2L, 2L, 8L, 3L, 4L, 5L, 6L, 7L, 1L, 1L), .Label = c("", "1",
"115", "208", "214", "232", "376", "85", "Start2"), class = "factor"),
X.3 = structure(c(10L, 8L, 2L, 9L, 3L, 4L, 5L, 6L, 7L, 1L,
1L), .Label = c("", "15", "195", "229", "231", "362", "577",
"76", "86", "End2"), class = "factor"), Pama3 = structure(c(1L,
3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("DD3",
"GGTR", "n/a"), class = "factor"), X.4 = structure(c(10L,
1L, 7L, 8L, 9L, 9L, 2L, 3L, 4L, 5L, 6L), .Label = c("1",
"129", "136", "153", "166", "178", "48", "65", "66", "Start1"
), class = "factor"), X.5 = structure(c(10L, 6L, 7L, 8L,
3L, 9L, 1L, 2L, 4L, 4L, 5L), .Label = c("131", "138", "144",
"168", "180", "34", "51", "70", "79", "End2"), class = "factor")), .Names = c("Pama1",
"X", "X.1", "Pama2", "X.2", "X.3", "Pama3", "X.4", "X.5"), class = "data.frame", row.names = c(NA,
-11L))
At first I put all starts and ends together and find the range
In this example it is 1 to 577
Then I want to plot or highlight the regions that there is a string
for example, something like this figure that I made
or even like this
Upvotes: 2
Views: 167
Reputation: 93851
The code below tidies up the data into a form suitable for plotting and then uses geom_segment
to lay out the sequences. To tidy the data, we want each column to be a variable and each row to be an observation.
library(tidyverse)
## Clean up data frame and convert to long form
df = map_df(seq(1,ncol(df),3), # Turn each group of three columns into separate data frames that we'll stack into long format
~ setNames(df[-1,.x:(.x+2)], c("DD","Start","End")), # Column names appear to be in the first data row, so we'll remove this row and provide new column names
.id="Pama") %>% # This line and next add a "Pama" column
mutate(Pama = paste0("Pama", Pama)) %>%
filter(!DD %in% c("n/a","")) %>% # Remove empty rows
mutate_at(vars(matches("^[SE]")), funs(as.numeric(as.character(.)))) # All columns are in character format. Convert the numbers to numeric format.
The data frame now looks like this:
Pama DD Start End <chr> <chr> <dbl> <dbl> 1 Pama1 zf 12 89 2 Pama1 zf 116 199 3 Pama1 PAMANA 280 331 4 Pama2 GGTR 115 195 5 Pama2 T_reg 232 362 6 Pama2 PAMANA 376 577 7 Pama3 GGTR 66 144
ggplot(df, aes(y=Pama, yend=Pama)) +
geom_segment(data=data.frame(Pama=unique(df$Pama), x=min(df$Start), xend=max(df$End)),
aes(x=x, xend=xend), colour="grey80", size=10) +
geom_segment(aes(x=Start, xend=End, colour=DD), size=20) +
geom_text(aes(x=(Start+End)/2, label=DD), colour="white", size=3, fontface="bold") +
geom_text(data=gather(df, key, value, Start:End),
aes(x=value, label=value, colour=DD), size=2.5,
fontface="bold", position=position_nudge(0,-0.3)) +
guides(colour=FALSE) +
scale_x_continuous(breaks=seq(0,1000,100)) +
labs(x="", y="") +
theme_classic(base_size=15) +
theme(axis.line.y=element_blank(),
axis.ticks.y=element_blank())
UPDATE: To address your comment, here's another way of positioning the numbers to avoid overlap.
ggplot(df, aes(y=Pama, yend=Pama)) +
geom_segment(data=data.frame(Pama=unique(df$Pama), x=min(df$Start), xend=max(df$End)),
aes(x=x, xend=xend), colour="grey80", size=10) +
geom_segment(aes(x=Start, xend=End, colour=DD), size=20) +
geom_text(aes(x=(Start+End)/2, label=DD), colour="white", size=3, fontface="bold") +
geom_text(data=gather(df, key, value, Start:End),
aes(x=ifelse(key=="Start", value + 10, value - 10), label=value),
colour="white", size=2.8, fontface="bold", position=position_nudge(0,-0.2)) +
guides(colour=FALSE) +
scale_x_continuous(breaks=seq(0,1000,100)) +
labs(x="", y="") +
theme_classic(base_size=15) +
theme(axis.line.y=element_blank(),
axis.ticks.y=element_blank())
UPDATE 2: To address your second comment, we'll add a grouping column that we'll use to alternate high and low number labels:
# Add grouping variable to alternate high and low labels
df = df %>% group_by(Pama) %>% arrange(Start) %>%
mutate(hilow = rep(c("high","low"),nrow(df))[1:n()])
ggplot(df, aes(y=Pama, yend=Pama)) +
geom_segment(data=data.frame(Pama=unique(df$Pama), x=min(df$Start), xend=max(df$End)),
aes(x=x, xend=xend), colour="grey80", size=10) +
geom_segment(aes(x=Start, xend=End, colour=DD), size=20) +
geom_text(aes(x=(Start+End)/2, label=DD), colour="white", size=3, fontface="bold") +
geom_text(data=gather(df, key, value, Start:End) %>% filter(hilow=="high"),
aes(x=value, label=value, colour=DD), hjust=0.5,
size=3, fontface="bold", position=position_nudge(0,0.3)) +
geom_text(data=gather(df, key, value, Start:End) %>% filter(hilow=="low"),
aes(x=value, label=value, colour=DD), hjust=0.5,
size=3, fontface="bold", position=position_nudge(0,-0.3)) +
guides(colour=FALSE) +
scale_x_continuous(breaks=seq(0,1000,100)) +
labs(x="", y="") +
theme_classic(base_size=15) +
theme(axis.line.y=element_blank(),
axis.ticks.y=element_blank())
Upvotes: 2
Reputation: 13680
That is one weird data structure you have. This will probably work out better down the line if you can modify the source data in some way to obtain a tidy dataframe
from the start, where each column is a single variable and each row is an observation.
We can wrangle the data to obtain such dataframe
(this is in base
R, you can arguably achieve the same in other ways, also using dplyr
or data.table
):
df2 <- rbind(setNames(cbind(rep('DD1', nrow(df) - 1), df[2:nrow(df), 1:3]), c('DD', 'Pama', 'Start', 'End')),
setNames(cbind(rep('DD2', nrow(df) - 1), df[2:nrow(df), 4:6]), c('DD', 'Pama', 'Start', 'End')),
setNames(cbind(rep('DD3', nrow(df) - 1), df[2:nrow(df), 7:9]), c('DD', 'Pama', 'Start', 'End'))
)
df2$Start <- as.numeric(as.character(df2$Start))
df2$End <- as.numeric(as.character(df2$End))
df2 <- df2[!df2$Pama %in% c('','n/a'), ]
df2
#> DD Pama Start End
#> 2 DD1 zf 12 89
#> 4 DD1 zf 116 199
#> 10 DD1 PAMANA 280 331
#> 51 DD2 GGTR 115 195
#> 81 DD2 T_reg 232 362
#> 91 DD2 PAMANA 376 577
#> 52 DD3 GGTR 66 144
This gives us a nice dataset, where we can map any ggplot2
's aestethic
to a simple column:
library(ggplot2)
ggplot(df2, aes(y = DD, color = Pama)) +
geom_segment(aes(x = Start, xend = End, yend = DD), size = 10) +
geom_text(aes(label = Start, x = Start), size = 2.5, nudge_y = -.15) +
geom_text(aes(label = End, x = End), size = 2.5, nudge_y = -.15) +
scale_y_discrete(position = 'right') +
theme(panel.background = element_rect(fill = 'white'),
axis.text.x = element_blank(),
axis.text.y.right = element_text(size = 14),
axis.ticks.y = element_blank(),
axis.title = element_blank())
The text positioning can indeed cause problems, it does so in this example, where we had to reduce the text size to get it somewhat right.
Here is a solution, based on the ggrepel
package:
library(ggplot2)
library(ggrepel)
ggplot(df2, aes(y = DD, color = Pama)) +
geom_segment(aes(x = Start, xend = End, yend = DD), size = 10) +
geom_text_repel(data = function(d) tidyr::gather(d, k, p, -DD, -Pama), aes(label = p, x = p), size = 5, nudge_y = -.15, segment.size = 0) +
# geom_label_repel(aes(label = End, x = End), size = 5, nudge_y = -.15) +
geom_text(aes(x = (Start + End) / 2, label = Pama), colour = "white", size = 2.5) +
scale_y_discrete(position = 'right') +
guides(color = FALSE) +
theme(panel.background = element_rect(fill = 'white'),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y.right = element_text(size = 14),
axis.ticks.y = element_blank(),
axis.title = element_blank())
(The text size is forcibly increased to show it does not overlap)
PS: Yea, this update makes it even more similar to @eipi10 answer.. it's a great answer, of course I'd steal from him :P
Upvotes: 1