Reputation: 1117
I was looking for a solution to draw side by side boxplot
using facet_wrap
in R
. Though there are lots of good solutions, however, I didn't come across any that i wanted. I decided to draw a picture of the plot
that i would like to see of my two data.frame
. Data.frame
C has my calibration data for the four models of different meterics (i.e., KGE, NSE, PBIAS, and R-Sq) while Data.frame
V has my validation data. I want to see a separate plot
of each metrics using facet_wrap
of the ggplot2
functionality. Below is what i have done so far but its not taking me even closer.
graphics.off()
rm(list = ls())
library(tidyverse)
C = data.frame(KGE_M1 = runif(3, 0, 0.5), NSE_M1 = runif(3,0,0.5), R_Sq_M1 = runif(3,-1,0.3), PBIAS_M1 = runif(3, -0.25, 0.25),
KGE_M2 = runif(3, 0.2, 0.7), NSE_M2 = runif(3,0.2,0.7), R_Sq_M2 = runif(3,-0.5,0.7), PBIAS_M2 = runif(3, -0.15, 0.15),
KGE_M3 = runif(3, 0.3, 0.8), NSE_M3 = runif(3,0.3,0.8), R_Sq_M3 = runif(3,0.3,0.8), PBIAS_M3 = runif(3, -0.10, 0.10),
KGE_M4 = runif(3, 0.5, 1), NSE_M4 = runif(3,0.5,1), R_Sq_M4 = runif(3,0.5,1), PBIAS_M4 = runif(3, -0.05, 0.05),
Cal = rep("Calibration", 3))
V = data.frame(KGE_M1 = runif(3, 0, 0.5), NSE_M1 = runif(3,0,0.5), R_Sq_M1 = runif(3,-1,0.3), PBIAS_M1 = runif(3, -0.25, 0.25),
KGE_M2 = runif(3, 0.2, 0.7), NSE_M2 = runif(3,0.2,0.7), R_Sq_M2 = runif(3,-0.5,0.7), PBIAS_M2 = runif(3, -0.15, 0.15),
KGE_M3 = runif(3, 0.3, 0.8), NSE_M3 = runif(3,0.3,0.8), R_Sq_M3 = runif(3,0.3,0.8), PBIAS_M3 = runif(3, -0.10, 0.10),
KGE_M4 = runif(3, 0.5, 1), NSE_M4 = runif(3,0.5,1), R_Sq_M4 = runif(3,0.5,1), PBIAS_M4 = runif(3, -0.05, 0.05),
Val = rep("Validation", 3))
C = gather(C, key = "Variable", value = "Value", -Cal)
V = gather(V, key = "Variable", value = "Value", -Val)
ggplot(data = C)+
geom_boxplot(aes(x= Variable, y = Value))
+ facet_wrap(~Variable)
I would like to see a plot like below
Upvotes: 2
Views: 13943
Reputation: 16178
I think you need to split your Variable
before plotting in order to have one variable for M1, M2, M3 M4 and one variable for your conditions:
library(tidyverse)
C2 <- C %>% pivot_longer(., -Cal, names_to = "Variable", values_to = "Value") %>%
group_by(Variable) %>%
mutate(Variable2 = unlist(strsplit(Variable, "_M"))[2]) %>%
mutate(Variable2 = paste0("Cal_M",Variable2)) %>%
mutate(Variable1 = unlist(strsplit(Variable,"_M"))[1]) %>%
rename(., Type = Cal)
# A tibble: 6 x 5
# Groups: Variable [6]
Type Variable Value Variable2 Variable1
<fct> <chr> <dbl> <chr> <chr>
1 Calibration KGE_M1 0.246 Cal_M1 KGE
2 Calibration NSE_M1 0.476 Cal_M1 NSE
3 Calibration R_Sq_M1 -0.978 Cal_M1 R_Sq
4 Calibration PBIAS_M1 0.117 Cal_M1 PBIAS
5 Calibration KGE_M2 0.544 Cal_M2 KGE
6 Calibration NSE_M2 0.270 Cal_M2 NSE
Now, we are doing the same thing for the dataset V
V2 <- V %>% pivot_longer(., -Val, names_to = "Variable", values_to = "Value") %>%
group_by(Variable) %>%
mutate(Variable2 = unlist(strsplit(Variable, "_M"))[2]) %>%
mutate(Variable2 = paste0("Val_M",Variable2)) %>%
mutate(Variable1 = unlist(strsplit(Variable,"_M"))[1]) %>%
rename(., Type = Val)
# A tibble: 6 x 5
# Groups: Variable [6]
Type Variable Value Variable2 Variable1
<fct> <chr> <dbl> <chr> <chr>
1 Validation KGE_M1 0.459 Val_M1 KGE
2 Validation NSE_M1 0.105 Val_M1 NSE
3 Validation R_Sq_M1 -0.435 Val_M1 R_Sq
4 Validation PBIAS_M1 0.0281 Val_M1 PBIAS
5 Validation KGE_M2 0.625 Val_M2 KGE
6 Validation NSE_M2 0.332 Val_M2 NSE
We can now bind them together:
DF <- rbind(C2,V2)
Then, we can plot:
ggplot(DF, aes(x = Variable2, y = Value))+
geom_boxplot()+
facet_wrap(.~Variable1, scales = "free")+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
EDIT: Rename x axis, add empty column to separate Calibration and Validation values
To add an empty space between Calibration and Validation, you can simply add empty rows for each conditions of the Variable1 like this:
DF <- as.data.frame(DF) %>% add_row(Type = rep("Empty",4),
Variable = rep("Empty",4),
Value = rep(NA,4),
Variable2 = rep("Empty",4),
Variable1 = unique(DF$Variable1))
Also, if you want to rename the x axis labels, you can use scale_x_discrete
ggplot(DF, aes(x = Variable2, y = Value, fill = Type))+
geom_boxplot()+
facet_wrap(.~Variable1, scales = "free")+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
scale_x_discrete(labels = c("M1","M2","M3","M4","","M1","M2","M3","M4"))
Does it look what you are expecting ?
Upvotes: 3
Reputation: 1261
So here's a way with which you can do the required;
First we create the data that you have;
library(tidyverse)
# Creating first dataframe
C <-
data.frame(
KGE_M1 = runif(3, 0, 0.5),
NSE_M1 = runif(3,0,0.5),
R_Sq_M1 = runif(3,-1,0.3),
PBIAS_M1 = runif(3, -0.25, 0.25),
KGE_M2 = runif(3, 0.2, 0.7),
NSE_M2 = runif(3,0.2,0.7),
R_Sq_M2 = runif(3,-0.5,0.7),
PBIAS_M2 = runif(3, -0.15, 0.15),
KGE_M3 = runif(3, 0.3, 0.8),
NSE_M3 = runif(3,0.3,0.8),
R_Sq_M3 = runif(3,0.3,0.8),
PBIAS_M3 = runif(3, -0.10, 0.10),
KGE_M4 = runif(3, 0.5, 1),
NSE_M4 = runif(3,0.5,1),
R_Sq_M4 = runif(3,0.5,1),
PBIAS_M4 = runif(3, -0.05, 0.05),
Cal = rep("Calibration", 3),
stringsAsFactors = FALSE)
# Creating second dataframe
V <-
data.frame(
KGE_M1 = runif(3, 0, 0.5),
NSE_M1 = runif(3,0,0.5),
R_Sq_M1 = runif(3,-1,0.3),
PBIAS_M1 = runif(3, -0.25, 0.25),
KGE_M2 = runif(3, 0.2, 0.7),
NSE_M2 = runif(3,0.2,0.7),
R_Sq_M2 = runif(3,-0.5,0.7),
PBIAS_M2 = runif(3, -0.15, 0.15),
KGE_M3 = runif(3, 0.3, 0.8),
NSE_M3 = runif(3,0.3,0.8),
R_Sq_M3 = runif(3,0.3,0.8),
PBIAS_M3 = runif(3, -0.10, 0.10),
KGE_M4 = runif(3, 0.5, 1),
NSE_M4 = runif(3,0.5,1),
R_Sq_M4 = runif(3,0.5,1),
PBIAS_M4 = runif(3, -0.05, 0.05),
Val = rep("Validation", 3),
stringsAsFactors = FALSE)
Now we change format of the data and visualize it;
# Rename the variable to make it same
C <- rename(C, Identifier = Cal)
V <- rename(V, Identifier = Val)
data <-
# First we bind the two datasets
bind_rows(C, V) %>%
# We convert from wide format to long format
gather(key = "Variable", value = "Value", -Identifier) %>%
# We separate Variable into 2 columns at the last underscore
separate(Variable, into = c("Variable", "Number"), sep = "_(?=[^_]+$)")
data %>%
ggplot()+
geom_boxplot(aes(x = Number, y = Value,
group = interaction(Identifier, Number), fill = Identifier)) +
facet_wrap(~Variable)
Upvotes: 2