Reputation: 6155
> dput(my.precious)
structure(list(Vehicle.ID2 = c("2351.2360", "503.496", "2508.2498",
"2256.2243", "952.946", "2327.2315", "683.682", "880.866", "347.342",
"115.116", "2239.2229", "1680.1675", "1044.1029", "323.321",
"2354.2337", "1628.1621", "1603.1598", "417.404", "1291.1285",
"84.78", "2861.2855", "2804.2802", "1084.1080", "1885.1876",
"1778.1775", "1509.1505", "379.372", "2620.2616", "1146.1133",
"2476.2472", "750.737", "2119.2112", "411.397", "1515.1512",
"2204.2194", "879.872", "986.981", "1129.1124", "2954.2948",
"2928.2924", "462.438", "2629.2620", "2962.2950", "615.610",
"1405.1400", "806.800", "1767.1765", "199.192", "1888.1878",
"2525.2517", "142.141", "687.682", "1446.1445", "39.27", "2556.2550",
"292.281", "2034.2017", "2464.2447", "2046.2037", "2567.2552",
"705.697", "180.175", "1701.1699", "2086.2071", "2427.2402",
"965.961", "1561.1558", "2185.2180", "2148.2138", "2589.2582",
"1770.1761", "1027.1032", "2995.2982", "973.967", "405.399",
"2115.2106", "2754.2742", "2586.2576", "1733.1729", "943.928",
"1245.1239", "31.18", "146.141", "1865.1861", "588.579", "2216.2212",
"513.501", "1470.1467", "518.515", "2348.2339", "2212.2208",
"1504.1489", "2814.2812", "2618.2615", "2597.2593", "3018.3009",
"1641.1638", "929.917", "2052.2045", "1702.1694"), Vehicle.class = structure(c(1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("Car following", "Heavy-vehicle following"
), class = "factor"), PrecVehClass = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("Motorcycle", "Car", "Heavy-vehicle"), class = "factor"),
CC2 = c(32.5766501673563, 33.1462524122711, 114.985655309494,
0, 19.6198370044607, 6.33947396494466, 4.41629586850399,
45.7201738350116, 77.2852308366414, 23.4653247796564, 113.858471174095,
18.2949618097755, 15.1430447619764, 18.7949281381009, 56.150849563362,
0.871136231063019, 10.1789190682619, 21.8538402563161, 24.4424229038064,
21.8644774356173, 78.8898916107299, 59.0436899337149, 34.952193382661,
30.0676154315454, 12.1631954913147, 22.0999532188296, 34.4320551117948,
51.6072494224724, 49.8285734316947, 83.7391153614881, 68.7393621760813,
23.3109392847383, 0, 63.8918058981795, 0.117898698373665,
35.9301550863017, 41.408066837246, 67.9609018034737, 77.6228604725088,
50.3819848446467, 158.427611013205, 61.7191536455709, 63.4184192224484,
52.3067956266756, 56.239305476488, 23.4972280626377, 0, 5.44649970936757,
45.325372359443, 44.140432941474, 26.4621220704583, 21.9722600148252,
0, 47.5859211404629, 65.4619356384739, 50.3173084316458,
7.14323295461026, 49.9184456786638, 57.632603327405, 70.4138804098259,
27.3086664432516, 39.2627818278854, 13.8954239118315, 16.5224386897373,
0.336396348580877, 34.6684621497679, 0.80866365546683, 63.8680515267192,
14.7996906960015, 61.5616857306764, 65.3043233970858, 21.5517378489972,
26.6451085013455, 16.4717475328769, 34.5554653009784, 36.647363180998,
86.7844694571702, 157.154018248369, 47.5411300112071, 2.64972923204488,
15.45052725276, 10.0503437206614, 0, 7.95701592069599, 65.2275028899913,
16.6622992517697, 0.084677923994235, 23.5450734083073, 20.7709172539573,
29.1191855784058, 82.1117069705742, 53.0859602212412, 37.6419285717603,
82.0220785025156, 42.6655290135778, 68.302184817338, 62.2055693283554,
22.0752327366978, 16.2898985629383, 48.0306011348524)), .Names = c("Vehicle.ID2",
"Vehicle.class", "PrecVehClass", "CC2"), class = c("tbl_df",
"data.frame"), row.names = c(NA, -100L))
I want to plot the distribution of the variable 'CC2' in facet_wrap
s of 'Vehicle.class' and 'PrecVehClass'. Also, I want to display the mean value, standard deviation and number of pairs on the plots. I used following code:
my.theme<-function(base_size = 12, base_family = "Trebuchet MS")
{theme(plot.title = element_text(size = rel(1)), panel.grid.major=element_line(color='grey'), panel.grid.minor=element_line(color='grey', linetype='dashed'), legend.position='right', legend.title=element_blank(),legend.background = element_blank(), strip.text = element_text(size=13, face="bold",lineheight=4), strip.background = element_rect(colour="black", fill="white"),legend.title = element_text(colour="black", size=16, face="bold"), legend.text = element_text(colour="black", size = 16), axis.title.x = element_text(face="bold", size=14), axis.title.y = element_text(face="bold", size=14))
}
pairs.CC2 <- ddply(my.precious, .(Vehicle.class, PrecVehClass), function(x) length(unique(x$Vehicle.ID2)))
means.CC2 <- ddply(my.precious, .(Vehicle.class, PrecVehClass), function(x) mean(x$CC2, na.rm=T))
sd.CC2 <- ddply(my.precious, .(Vehicle.class, PrecVehClass), function(x) sd(x$CC2, na.rm=T))
ggplot() +
geom_histogram(data=my.precious, aes(x=CC2, y=..count../sum(..count..)*100),color="black", fill="grey", alpha=0.5) +
facet_wrap(Vehicle.class~PrecVehClass, scale="free_y") +
labs(x = "Distance in addition to safety distance (ft)", y="percentage") +
theme_bw() + my.theme() +
geom_text(data=pairs.CC2, aes(x=200, y=0.4, label=paste(V1, "pairs", sep=" ")), size=5, face="italic") +
geom_vline(data=means.CC2, aes(xintercept=V1), color="blue", linetype = "longdash", size=1) + geom_text(data=means.CC2, aes(x=mean(V1, na.rm=T),y=0.4, label=paste("Mean=", round(V1,1), "ft",sep=" ")), size=5) + geom_text(data=sd.CC2, aes(x=mean(V1, na.rm=T),y=0.35, label=paste("SD=", round(V1,1), sep=" ")), size=5)
This plots following:
You can see the 'mean', 'SD' and 'pairs' texts are not at visually pleasing locations. For this sample data I can relatively easily adjust the positions by controlling x and y arguments in geom_text
but in the original data there are atleast 2 more facets for this data frame. And there are lots of other data frames having same kind of distributions which I want to plot. How can I ensure that these text annotations are placed on same locations e.g. top right or top left in every facet so that there is uniformity and plots look publication quality?
Upvotes: 0
Views: 334
Reputation: 59355
It turns out that ggplot
stores the axis limits in a "ggplot object" produced when the plot is rendered. You can create but not render with ggplot_build(...)
and then access these (albeit in a roundabout way). Calling you original data, df
, and using your pairs.CC2
, mean.CC2
, and sd.CC2
,
# build the plot absent the mean, sd, and pairs annotations
ggp <-ggplot() +
geom_histogram(data=df, aes(x=CC2, y=..count../sum(..count..)*100),color="black", fill="grey", alpha=0.5) +
facet_wrap(Vehicle.class~PrecVehClass, scale="free_y") +
labs(x = "Distance in addition to safety distance (ft)", y="percentage") +
theme_bw() + my.theme() +
geom_vline(data=means.CC2, aes(xintercept=V1), color="blue", linetype = "longdash", size=1)
# extract x- and y-range information for each panel (facet)
panels <- ggplot_build(ggp)[["panel"]]
limits <- do.call(rbind,lapply(panels$ranges,
function(range)c(range$x.range,range$y.range)))
colnames(limits) <- c("x.lo","x.hi","y.lo","y.hi")
# combine this with your mean, sd, and pairs data
labs <- cbind(means.CC2,sd=sd.CC2$V1,pairs=pairs.CC2$V1,limits)
# use labs to drive the placement of the annotations
ggp +
geom_text(data=labs, aes(x=x.hi,y=y.hi-0.0*(y.hi-y.lo),label=paste(pairs,"pairs",sep=" ")), size=5,hjust=1)+
geom_text(data=labs, aes(x=x.hi,y=y.hi-0.1*(y.hi-y.lo),label=paste("Mean=", round(V1,1), "ft",sep=" ")), size=5,hjust=1) +
geom_text(data=labs, aes(x=x.hi,y=y.hi-0.2*(y.hi-y.lo),label=paste("SD=", round(sd,1),sep=" ")), size=5,hjust=1)
Produces this:
Upvotes: 1
Reputation: 93811
You can gain more control over label placement by creating a data frame with the summary information that includes y-position values. The summary data frame just has to include the facetting variables so that geom_text
can automatically place labels at different y-positions for different facets. For example:
library(ggplot2)
library(dplyr)
# Pre-summarize the data into histogram bins. We need this to calculate appropriate
# values for the y-position of the labels
hist.bins = my.precious %>%
group_by(Vehicle.class, PrecVehClass,
breaks=cut(CC2, seq(0,max(CC2)+5,5),
seq(5,max(CC2)+5,5), include.lowest=TRUE)) %>%
summarise(count=n()) %>%
ungroup() %>%
mutate(percent=count/sum(count)*100)
# Data frame with y-position of labels. I've set the value to 90% of the maximum
# value of percent, but you can set it to whatever you like, or vary it by group.
pos = hist.bins %>% group_by(Vehicle.class, PrecVehClass) %>%
summarise(y.pos = 0.9 * max(percent))
# Data frame with summary stats
CC2stats = my.precious %>% group_by(Vehicle.class, PrecVehClass) %>%
summarise(mean=mean(CC2, na.rm=T),
sd = sd(CC2, na.rm=T),
pairs=length(unique(Vehicle.ID2)))
# Merge y-positions into CC2stats
CC2stats = merge(CC2stats, pos, by=c("Vehicle.class", "PrecVehClass"))
# Plot histogram
ggplot() +
geom_histogram(data=my.precious, aes(x=CC2, y=..count../sum(..count..)*100),
color="black", fill="grey", alpha=0.5,
breaks=seq(0,max(my.precious$CC2)+5,5)) +
facet_wrap(Vehicle.class~PrecVehClass, scale="free_y") +
labs(x = "Distance in addition to safety distance (ft)", y="percentage") +
theme_bw() + my.theme() +
# Add text labels using CC2stats data frame
geom_text(data=CC2stats, aes(x=140, y=y.pos,
label=paste(pairs, " pairs", sep=" ")),
size=5, face="italic") +
geom_vline(data=CC2stats, aes(xintercept=mean),
color="blue", linetype = "longdash", size=1) +
geom_text(data=CC2stats,
aes(x=140,y=0.95*y.pos, label=paste0("Mean = ", round(mean,1),
" ft",sep=" ")), size=5) +
geom_text(data=CC2stats,
aes(x=140,y=0.90*y.pos, label=paste0("SD = ", round(sd,1), sep=" ")),
size=5)
Note that I've included a breaks
argument in geom_histogram
. This is so that the breaks in the graph will correspond to the breaks in hist.bins
, which ensures that the maximum value of hist.bins$percent
will correspond to the y-range in the graph.
And here's the result:
Upvotes: 1