Reputation: 3044
Data: Data
Code:
## Load the data
ifpricc = read.csv(file = "IFPRI_CCAgg2050.csv", heade=TRUE)
#-----------------------------------------------------------------------
# Plotting Kernel density distribution for the final yield impact data
#-----------------------------------------------------------------------
ifpricc.df = as.data.frame(ifpricc)
ifpricc_mlt.df = melt(ifpricc.df, id.vars=c("crop","codereg","reg","sres","gcm","scen"))
kernel = ggplot(data=subset(ifpricc_mlt.df, reg %in% c("Canada","United States","Oceania","OECD Europe","Eastern Europe","Former USSR") & gcm %in% c("CSIRO","MIROC","noCC")),
aes(x = value, y = ..density..))
kernel = kernel + geom_density(aes(fill = gcm), alpha=.4, subset = .(crop %in% c("WHET")),
position="identity", stat="density", size=0.75,
bw = "nrd0", adjust = 1.5,
kernel = c("gaussian"))
kernel = kernel + scale_fill_manual(name="GCM model",breaks=c("CSIRO","MIROC","noCC"), values=c("red","blue","gray80"))
kernel = kernel + facet_grid(sres ~ reg, scale="free") + scale_y_continuous(breaks=seq(0,2,.25))
kernel = kernel + labs(title="Kernel density distribution - with and without climate change", y="Density", x="Yield") + theme_bw()
kernel = kernel + theme(plot.title=element_text(face="bold", size=rel(2), hjust=0.5, vjust=1.5, family="serif"),
axis.text.x=element_text(color="black", size=rel(2), hjust=0.5, family="serif"),
axis.text.y=element_text(color="black", size=rel(2), hjust=1, family="serif"),
axis.title.x=element_text(face="bold", color="black", size=rel(1.6), hjust=0.5, vjust=0.2, family="serif"),
axis.title.y=element_text(face="bold", color="black", size=rel(1.6), hjust=0.5, vjust=0.2, family="serif"),
strip.text=element_text(face="bold", size=rel(1.5), family="serif"),
legend.text=element_text(face="bold", size=rel(1.25), family="serif"),
legend.title=element_text(face="bold", size=rel(1.45), family="serif"))
Results:
Question:
What I am trying to achieve here is to plot Kernel density curves. My problem is that I want to overlay the baseline kernel curves (in lower facet) over the colored ones (two upper facets) and which represent deviations from the baseline. Any help will be greatly appreciated.
Cheers :)
Alternative Question:
So I tinkered a bit after looking up potential solutions on the website, and I came up with this: instead of faceting using facet_grid(sres ~ reg)
by "sres" x "reg", I faceted by using facet_wrap(~ reg)
. It produces something closer to what I am intending .
The problem now is that I cannot identify the distribution by "sres", which is what I am looking for. To solve this, I thought to annotate the plot by adding vertical lines that plot the mean of the data by "sres". But I kind of am at loss of how to move from here.
Any suggestions?
Upvotes: 3
Views: 1527
Reputation: 32859
If I understand, I think this is what you want. You need to rearrange the data: repeat the lines in the data frame containing the noCC
factor, once with sres = A1B
, and once with sres = B1
. That way, the noCC density curve will appear in the A1B facets and in the B1 facets.
In addition, the melting of the data frame has no effect other than to create a column of 1s. Also, I do the subsetting outside the call to ggplot2
.
library(ggplot2)
ifpricc = read.csv(file = "IFPRI_CCAgg2050.csv", heade=TRUE)
# Subset the data frame
df = subset(ifpricc,
reg %in% c("Canada","United States","Oceania","OECD Europe","Eastern Europe","Former USSR") &
gcm %in% c("CSIRO","MIROC","noCC") &
crop %in% c("WHET"))
# Manipulate the data frame
x = df[df$sres == "PM", ]
x = rbind(x, x)
x$sres = rep(c("A1B", "B1"), each = dim(x)[1]/2)
df = df[df$sres != "PM",]
df = rbind(df, x)
# Draw the plot
ggplot(data=df, aes(x = yield, fill = gcm)) +
geom_density(alpha=.4, size=0.75, adjust = 1.5) +
scale_fill_manual(name="GCM model",breaks=c("CSIRO","MIROC","noCC"),
values=c("red","blue","gray80")) +
facet_grid(sres ~ reg, scale="free") + scale_y_continuous(breaks=seq(0,2,.25))
EDIT: The facet_wrap version:
The idea is to draw two charts: one for A1B, and the second for B1; then to put the two charts together using functions from the gridExtra
package. But that will give a legend for each chart. It would look better with only one legend. Therefore, draw one of the charts so that its legend can be extracted. Then draw the two charts without their legends, and put the two charts and the legend together.
library(ggplot)
library(gridExtra)
library(gtable)
ifpricc = read.csv(file = "IFPRI_CCAgg2050.csv", heade=TRUE)
# Subset the data frame
df = subset(ifpricc,
reg %in% c("Canada","United States","Oceania","OECD Europe","Eastern Europe","Former USSR") &
gcm %in% c("CSIRO","MIROC","noCC") &
crop %in% c("WHET"))
# Draw first chart
p1 = ggplot(data=df[df$sres != "B1", ], aes(x = yield, fill = gcm)) +
geom_density(alpha=.4, size=0.75, adjust = 1.5) +
ggtitle("sres = A1B") +
scale_fill_manual(name="GCM model",breaks=c("CSIRO","MIROC","noCC"),
values=c("red","blue","gray80")) +
facet_wrap( ~ reg, scales = "free_x") + scale_y_continuous(breaks=seq(0,2,.25))
# Extract its legend
legend = gtable_filter(ggplot_gtable(ggplot_build(p1)), "guide-box")
# Redraw the first chart without its legend
p1 = p1 + guides(fill = FALSE)
# Draw the second chart without its legend
p2 = ggplot(data=df[df$sres != "A1B", ], aes(x = yield, fill = gcm)) +
geom_density(alpha=.4, size=0.75, adjust = 1.5) +
ggtitle("sres = B1") +
scale_fill_manual(name="GCM model",breaks=c("CSIRO","MIROC","noCC"),
values=c("red","blue","gray80"), guide = "none") +
facet_wrap( ~ reg, scales = "free_x") + scale_y_continuous(breaks=seq(0,2,.25))
# Combine the two charts and the legend (and a main title)
grid.arrange(arrangeGrob(p1, p2, ncol = 1),
legend, widths = unit.c(unit(1, "npc") - legend$width, legend$width), nrow = 1,
main = textGrob("Kernel density distribution - with and without climate change",
vjust = 1, gp = gpar(fontface = "bold")))
Upvotes: 2