Reputation: 479
I have some data showing a response to a treatment (a categorical variable). Each replicate also has a known value assigned to it based on the magnitude of the treatment relative to the individual (a continuous variable). What I'd like to do is to show the response to both the treatment effect (a bar plot) and the continuous variable (a scatterplot / regression) in a single plot using ggplot2
, in a similar way that the facet_wrap
or facet_grid
commands can be used. Basically, I'd like to recreate the plot below using ggplot2
.
Here is the code which I used to generate the sample data and to create the plot
## GENERATE CONTINUOUS VARIABLES FOR EACH TREATMENT (A - D)
A <- abs(norm(10, 1, 1))
B <- abs(rnorm(10, 3, 1))
C <- abs(rnorm(10, 5, 1))
D <- abs(rnorm(10, 7, 1))
## GENERATE RESPONSE TO TREATMENTS
res_A<-rnorm(10, 28, 3)
res_B<-rnorm(10, 22, 3)
res_C<-rnorm(10, 18, 3)
res_D<-rnorm(10, 12, 3)
## ESTABLISH DATA FRAMES FOR TREATMENTS AND RESPONSE
treatments<-data.frame(A, B, C, D)
response<-data.frame(res_A, res_B, res_C, res_D)
## CONVERT EACH DATA FRAME TO LONG FORM
library(reshape2)
treatments <-treatments %>% gather(Treatment, cont_x, A:D)
response <-response %>% gather(Treatment, Response, res_A:res_D)
## CREATE FINAL DATA FRAME WITH REQUIRED DATA
data<-data.frame(treatments$Treatment, treatments$cont_x, response$Response)
colnames(data) <- c("Treatment", "X", "Response")
## ESTABLISH MEANS AND STANDARD ERROR FOR TREATMENT EFFECTS
means<-tapply(data$Response,list(data$Treatment),mean,na.rm=T)
ER<-tapply(data$Response,list(data$Treatment),sd,na.rm=T)/sqrt(tapply(data$Response,list(data$Treatment),length))
## SET AESTHETICS AND LABEL VALUES
cols<-c("darkcyan","olivedrab3", "palevioletred3","brown3")
labs<-c("A", "B", "C", "D")
## GENERATE PLOT CANVASS
par(mfrow=c(1,2))
par(mar=c(3.5,3,2,1))
## GENEATE BAR PLOT
graph<-tapply(data$Response,data$Treatment,mean,na.rm=T)
plot<-barplot(graph,col=cols,las=1,xaxt='n',yaxt='n',
xlab=NA,ylab =NA,font.lab=2,
cex=0.6,cex.lab=0.6,font.lab=2,font.axis=2,
cex.axis=0.6,ylim=c(0,40), main="TREATMENT EFFECTS")
box()
arrows(x0=plot,y0=means-ER,x1=plot,
y1=means+ER,code=3,angle=90,length=0.02,lwd=1)
axis(side=1,line=0,at=plot,labels=labs,
cex.axis=0.8,mgp=c(0,0.5,0),tck=-0.02,font.axis=1)
axis(side=2,line=0,at=seq(0,40,10),las=1,cex.axis=0.8,
labels=seq(0,40,10),cex=0.6,mgp=c(0,0.6,0))
xlab<-c("Treatment")
ylab<-c("Response")
mtext(xlab, side=1, cex=1.2, line=2)
mtext(ylab, side=2, cex=1.2, line=1.75)
mark<-(means+ER)+2
text(0.7,mark[1],"a",font=1,cex=1.2)
text(1.9,mark[2],"b",font=1,cex=1.2)
text(3.1,mark[3],"c",font=1,cex=1.2)
text(4.3,mark[4],"d",font=1,cex=1.2)
## GENERATE SCATTERPLOT
par(mar=c(3.5, 2, 2, 2))
plot(data$X, data$Response,type='n',ylim = c(0, 40), xlim=c(0,9),pch=21, col='black', cex=1.5, xaxt='n',
yaxt='n', xlab=NA, ylab=NA, main = "CONTINUOUS RESPONSE")
axis(side=1,line=0,tck=NA,at=seq(0,9,3),labels=T,
cex.axis=0.8,mgp=c(0,0.5,0),tck=-0.02,font.axis=1)
axis(side=2,line=0,at=seq(0,40,10),labels=F, tck=0.01)
axis(side=2,line=0,at=seq(0,40,10),labels=F, tck=-0.01)
xlab<-c("Continuous variable")
mtext(xlab, side=1, cex=1.2, line=2)
## PERFORM REGRESSION AND ADD IN REGRESSION LINE
model<-lm (Response ~ X, data = data)
abline(model, lwd=2)
## ADD IN CONFIDENCE INTERVAL
newx <- seq(0,9,length.out=1000)
preds <- predict(model, newdata = data.frame(X=newx),
interval = 'confidence')
lines(newx, preds[ ,3], lty = 'dashed', col = "grey36",lwd=1)
lines(newx, preds[ ,2], lty = 'dashed', col = 'grey36',lwd=1)
polygon(c(rev(newx), newx), c(rev(preds[ ,3]), preds[ ,2]), col = 'grey80', border = NA)
## ADD IN POINTS ONTOP OF CI POLYGON
points(data$X, data$Response, bg= ifelse(data$Treatment == "A", "darkcyan",
ifelse(data$Treatment == "B","olivedrab3", ifelse(data$Treatment == "C", "palevioletred3", "brown3"))),pch=21, col='black', cex=1.5)
## ADD THE REGRESSION EQ
eq<-expression(italic("y = 28.54 - 2.16x"))
rsq<-expression(italic("R"^{2}~"= 0.76 ***"))
text(5 ,35, eq, cex=1.2)
text(4.5, 33, rsq, cex=1.2
Is it even possible to do this using ggplot2
?
Upvotes: 3
Views: 2200
Reputation: 18493
Yes, both plots can be created using ggplot. Here's the barplot:
library(ggplot2)
library(dplyr)
data %>%
group_by(Treatment) %>%
summarise(Response=mean(Response)) %>%
ggplot(aes(x=Treatment, y=Response)) +
geom_col(aes(fill=cols)) +
geom_errorbar(ymin=means+ER, ymax=means-ER, width = 0.1) +
geom_text(aes(label=tolower(Treatment)), position=position_stack(vjust = 1.1)) +
labs(title="TREATMENT EFFECTS") +
theme_bw() +
theme(legend.position="none")
And here's the scatterplot with equation and R2 added. The ggpubr package is useful here and can save on coding.
library(ggpubr)
data %>%
ggplot(aes(x=X, y=Response)) +
geom_point(aes(fill=Treatment), pch=21, col="black", size=4) +
scale_fill_manual(values=c("darkcyan", "olivedrab3", "palevioletred3", "brown3")) +
scale_x_continuous(breaks=seq(0,9,3)) +
scale_y_continuous(lim=c(0, 40), labels=NULL) +
geom_smooth(method="lm") +
stat_regline_equation(label.y=33, label.x=4,
aes(label = ..adj.rr.label..)) +
stat_regline_equation(label.y=35, label.x=4,
aes(label = ..eq.label..)) +
labs(title="CONTINUOUS RESPONSE", x="Continuous variable", y="") +
theme_classic() +
theme(legend.position="none", plot.title = element_text(hjust = 0.5))
Upvotes: 0
Reputation: 24770
Sure you can use ggplot2
with a little help from ggpubr
and ggpmisc
.
library(ggplot2)
library(ggpubr)
library(ggpmisc)
a <- data %>%
group_by(Treatment) %>%
summarise(Response=mean(Response)) %>%
mutate(se = sd(Response)/sqrt(length(Response))) %>% ungroup %>%
ggplot(aes(x=Treatment,y=Response,fill = Treatment)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label=tolower(Treatment)), position=position_stack(vjust = 1.3)) +
geom_errorbar(aes(ymin = Response - se, ymax = Response + se), width = 0.1) + scale_fill_manual(values = cols) +
labs(title="TREATMENT EFFECTS")
b <- data %>%
ggplot(aes(x=X,y=Response)) +
geom_smooth(method="lm", show.legend = FALSE) +
geom_point(aes(fill = factor(Treatment)),shape=21,size=3, show.legend = FALSE) +
scale_fill_manual(values = cols) +
labs(title = "CONTINUOUS RESPONSE", xlab="Continuous Variable") +
xlab("Continuous Variable") +
stat_poly_eq(formula = y ~ x, label.x = 0.9, label.y = 0.95, aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), parse = TRUE)
ggarrange(a,b,nrow = 1)
data <- structure(list(Treatment = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L), .Label = c("A", "B", "C", "D"), class = "factor"), X = c(0.0267649727104236,
1.39488715616046, 0.21355823940511, 0.49907410504292, 0.375279051787701,
0.681959942595334, 2.05952354292797, 2.26083984353423, 1.11354591314711,
0.34506137947979, 2.07866454079728, 3.20194490569932, 3.26342299162599,
2.06754732525705, 4.02484423090347, 3.21831253488128, 3.56925840330762,
3.92631915144912, 2.55130407898901, 4.88369094725247, 4.85805706436391,
6.06714331089959, 5.05696298595936, 4.28599088092722, 2.64907718621996,
5.50017966947343, 5.27853136585637, 5.8694723514342, 4.57774253201089,
4.28459862391154, 6.6919479712577, 6.89039252602714, 7.36883429701188,
5.01895090471179, 7.66623439220746, 7.27620218490248, 6.44135570941742,
7.66409390386461, 8.09858213415943, 4.89114777053612), Response = c(28.647362805403,
30.5878855986189, 36.3739824861786, 33.5874379487616, 23.8060926287858,
30.8520531077353, 26.5940268747477, 28.8356526462252, 30.4727218173035,
26.8151163416507, 17.6391456006427, 19.0921380684935, 21.6950437768534,
23.9017396212974, 27.1407090174467, 15.4322366130883, 26.9809942596379,
22.7341801522041, 23.6518581209459, 21.8377270248132, 13.2905142368901,
19.8951142352182, 17.1400860924093, 16.847732448511, 15.6213812276033,
18.3368951001566, 18.7411799795391, 17.5514579276854, 14.2841781950673,
21.6044042356051, 11.0037691942103, 13.0260853225773, 10.6862778263241,
9.4482751070798, 11.9896873712498, 10.0798146375625, 12.6332310111476,
14.4806588768585, 6.89810707498932, 7.55062781781536)), class = "data.frame", row.names = c(NA,
-40L))
cols <- c("darkcyan", "olivedrab3", "palevioletred3", "brown3")
Upvotes: 2
Reputation: 306
The answer is yes, but not elegantly (using ggplot2
alone). This is because the whole idea behind the "grammar of graphics" philosophy implemented in ggplot2
is that individual variables (i.e. columns in a tidy data.frame
) get mapped to individual plot aesthetics. Ergo, using one of the face_*
functions is only going to break plots up into panels based on a categorical value (2 categorical values in the case of a face_grid
). However, in this scenario, the x and y axes will still be mapped to the same variable. In your example, this would require mapping the x-axis to a variable containing categorical values on the left and a separate variable containing continuous ones on the right. More on this here.
If you want to use ggplot2
to produce these plots, and then combine them programmatically without having to do so manually, I would recommend looking into ggplot2
extensions such as cowplot
, gridExtra
, or egg
. I'm sure these packages have some nice vignettes, but this is a tutorial that I have found to be a helpful in the past ggplot2 - Easy Way to Mix Multiple Graphs on The Same Page.
Upvotes: -1