Reputation: 480
My aim is to do a basic bar chart where the data comes from two data frames that have the same variables. The plot should look like the one below, but without the two shortcomings. There should be legends for the data frames, and the error bars should be in the middle of the bars. Do you know how to make it? Below is the code that produced that plot, and I don't think bind_rows
is the optimal solution here.
The code ↓
bind_rows(B, C, .id = "id") %>%
filter(question %in% c("Q1", "Q2")) %>%
ggplot(aes(x = question, y = mean)) +
geom_bar(aes(fill = id), stat = "identity", position = "dodge2", width = 0.5) +
geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, width = 0.1, colour = id)) +
coord_flip()
The data frames ↓
structure(list(question = c("Q1", "Q10", "Q11", "Q12", "Q2",
"Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "Q9"), n = c(204L, 204L,
204L, 204L, 204L, 204L, 204L, 204L, 204L, 204L, 204L, 204L),
mean = c(5.22549019607843, NA, 4.95098039215686, 4.39705882352941,
5.47058823529412, 5.51470588235294, 4.50490196078431, 4.92647058823529,
4.40686274509804, 5.56862745098039, 5.56372549019608, 5.23529411764706
), sd = c(1.1524816893289, NA, 1.31214449357814, 1.5422430010719,
1.12039650223724, 1.15104553532809, 1.37714471881058, 1.34621721218454,
1.30030385262334, 0.871099231072865, 0.830963499839951, 1.36945187401243
)), row.names = c(NA, 12L), class = c("tbl_df", "tbl", "data.frame"
))
structure(list(question = c("Q1", "Q10", "Q11", "Q12", "Q2",
"Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "Q9"), n = c(13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L), mean = c(5.38461538461539,
4.38461538461539, 4.69230769230769, 4.30769230769231, 5.15384615384615,
5.38461538461539, 4.76923076923077, 5.30769230769231, 4.53846153846154,
5.61538461538461, 5.69230769230769, 4.92307692307692), sd = c(1.26085034391223,
1.44559454541846, 1.03155347127648, 1.60128153805087, 0.898717034272917,
1.12089707663561, 1.01273936708367, 0.85485041426511, 0.967417922046845,
1.26085034391223, 0.85485041426511, 1.84668795692624)), row.names = c(NA,
12L), class = c("tbl_df", "tbl", "data.frame"))
Upvotes: 3
Views: 1485
Reputation: 458
This post is tagged along with ggplot2
and tidyverse
. However, if you want to just use the base
package, here's one approach (a bit lengthy and tricky, but it does the work just fine):
df1<-structure(list(question = c("Q1", "Q10", "Q11", "Q12", "Q2", "Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "Q9"),
n = c(204L, 204L, 204L, 204L, 204L, 204L, 204L, 204L, 204L, 204L, 204L, 204L),
mean = c(5.22549019607843, NA, 4.95098039215686, 4.39705882352941, 5.47058823529412,
5.51470588235294, 4.50490196078431, 4.92647058823529, 4.40686274509804, 5.56862745098039,
5.56372549019608, 5.23529411764706), sd = c(1.1524816893289, NA, 1.31214449357814, 1.5422430010719,
1.12039650223724, 1.15104553532809, 1.37714471881058, 1.34621721218454, 1.30030385262334,
0.871099231072865, 0.830963499839951, 1.36945187401243)), row.names = c(NA, 12L),
class = c("tbl_df", "tbl", "data.frame"))
df2<-structure(list(question = c("Q1", "Q10", "Q11", "Q12", "Q2", "Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "Q9"),
n = c(13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L),
mean = c(5.38461538461539, 4.38461538461539, 4.69230769230769, 4.30769230769231, 5.15384615384615,
5.38461538461539, 4.76923076923077, 5.30769230769231, 4.53846153846154, 5.61538461538461,
5.69230769230769, 4.92307692307692), sd = c(1.26085034391223, 1.44559454541846, 1.03155347127648,
1.60128153805087, 0.898717034272917, 1.12089707663561, 1.01273936708367, 0.85485041426511,
0.967417922046845, 1.26085034391223, 0.85485041426511, 1.84668795692624)), row.names = c(NA, 12L),
class = c("tbl_df", "tbl", "data.frame"))
First, we merge the two dataframes:
df3<-merge(df1,df2,by="question")
#df3
#question n.x mean.x sd.x n.y mean.y sd.y
#1 Q1 204 5.225490 1.1524817 13 5.384615 1.2608503
#2 Q10 204 NA NA 13 4.384615 1.4455945
#3 Q11 204 4.950980 1.3121445 13 4.692308 1.0315535
#4 Q12 204 4.397059 1.5422430 13 4.307692 1.6012815
#5 Q2 204 5.470588 1.1203965 13 5.153846 0.8987170
#6 Q3 204 5.514706 1.1510455 13 5.384615 1.1208971
#7 Q4 204 4.504902 1.3771447 13 4.769231 1.0127394
#8 Q5 204 4.926471 1.3462172 13 5.307692 0.8548504
#9 Q6 204 4.406863 1.3003039 13 4.538462 0.9674179
#10 Q7 204 5.568627 0.8710992 13 5.615385 1.2608503
#11 Q8 204 5.563725 0.8309635 13 5.692308 0.8548504
#12 Q9 204 5.235294 1.3694519 13 4.923077 1.8466880
Then, we store our data in the form of a matrix:
mat<-as.matrix(cbind(df3$mean.x,df3$mean.y))
#mat
# [,1] [,2]
# [1,] 5.225490 5.384615
# [2,] NA 4.384615
# [3,] 4.950980 4.692308
# [4,] 4.397059 4.307692
# [5,] 5.470588 5.153846
# [6,] 5.514706 5.384615
# [7,] 4.504902 4.769231
# [8,] 4.926471 5.307692
# [9,] 4.406863 4.538462
#[10,] 5.568627 5.615385
#[11,] 5.563725 5.692308
#[12,] 5.235294 4.923077
Finally, we plot the barplots:
#par(mar=c(5,4,4,5)+0.1)
mid<-barplot(t(mat),beside=T,names.arg=df3$question,cex.names=0.8,
horiz=T,xlim=c(0,7),col=c('#44c1f2','#f28744'),
xlab='mean',ylab='question')
Now, for plotting error bars in the barplot from the base
package, we can do the following (taking inspiration from this post by @Laryx Decidua):
arrows(x0=df1$mean-df1$sd, y0=mid[1,], x1=df1$mean+df1$sd, y1=mid[1,],
code=3, angle=90, length=0.04, col="red")
arrows(x0=df2$mean-df2$sd, y0=mid[2,], x1=df2$mean+df2$sd, y1=mid[2,],
code=3, angle=90, length=0.04, col="blue")
Now add the legend:
legend(7,21,c('1','2'),fill=c('#44c1f2','#f28744'),xpd=T,title='ID')
Which brings us to a plot like this:
Upvotes: 1
Reputation: 7665
I think dplyr::bind_rows()
works perfectly fine here. To align the grouped bars and the error bars, use position_dodge()
for both layers.
bind_rows(B, C, .id = "id") %>%
filter(question %in% c("Q1", "Q2")) %>%
ggplot() +
aes(mean, question, fill = id, xmin = mean - sd, xmax = mean + sd) +
geom_col(position = "dodge2", width = 0.5) +
geom_errorbar(position = position_dodge2(padding = 0.5), width = 0.5) +
theme_minimal()
position_dodge2()
is also needed if you want to add additional layers.
last_plot() +
geom_point(position = position_dodge2(width = 0.5))
Upvotes: 1