Reputation: 11
I'm trying to represent particle size distribution with geom_tile, like this plot : [enter image description here] (https://i.sstatic.net/zOGn9uT5.png)
My data represent concentration of particule for each bin of size and for each time. There is 137 bins of size per second.
So my df has 3 columns and 82337 rows. The first column is time from 10:45:00 to 10:56:00, then the numeric value of concentration per time (with value equal to 0 sometimes), and the numeric value of bin_column which represent bin size from 15 to 1375 per second. Here an example (I removed some lines)
Time | Bin_column | Concentration |
---|---|---|
10:45:00 | 15 | 1.583952e+04 |
10:45:00 | 25 | 5.800000e-01 |
10:45:00 | 1375 | 1.559140e+00 |
10:45:01 | 15 | 0.000000e+00 |
Here is my code :
library(ncdf4)
library(RNetCDF)
library(ggplot2)
library(data.table)
library(dplyr)
library(scales)
library(patchwork)
library(ggpubr)
library(cowplot)
library(tidyr)
library(purrr)
library(chron)
library(scico)
files <- c(
"base201204090056_2DS.nc",
"base201204093146_2DS.nc",
"base201204102044_2DS.nc",
"base201204105249_2DS.nc",
"base201204110956_2DS.nc",
"base201204130416_2DS.nc",
"base201204134204_2DS.nc",
"base201204141637_2DS.nc",
"base201204144715_2DS.nc",
"base201204151055_2DS.nc"
)
diameter_h<- seconds_h <- bins<-mvd_all<-numeric()
concentration_h<-size_distrib_h<-matrix()
for (file in files) {
nc_file <- nc_open(file)
diameter_h<-c(diameter_h, ncvar_get(nc_file, nc_file$var[[294]]))
size_distrib_h<-c(size_distrib_h, ncvar_get(nc_file, nc_file$var[[274]]))
mvd_all<-c(mvd_all, ncvar_get(nc_file, nc_file$var[[285]]))
seconds_h<-c(seconds_h, ncvar_get(nc_file, nc_file$var[[298]]))
concentration_h<-c(concentration_h, ncvar_get(nc_file, nc_file$var[[280]]))
bins<-c(bins, ncvar_get(nc_file, nc_file$var[[272]]))
nc_close(nc_file)
}
bins<-as.data.frame(bins)
mvd_all<-as.data.frame(mvd_all)
seconds_h<-as.data.frame(seconds_h)
diameter_h<-as.data.frame(diameter_h)
concentration_h<-as.data.frame(concentration_h)
size_distrib_h<-as.data.frame(size_distrib_h)
[...]
ggplot(df, aes(x = time, y =bin_column, fill = concentration_h)) +
geom_tile()+
scale_fill_scico(palette = 'batlow', trans = "log", labels = trans_format("log10", math_format(10^.x)), breaks = c(0.1,1,10,100,1000,10000), limits=c(0.1,10000))+
geom_vline(xintercept = as.numeric(time_1048), color = "red", linetype = "dashed", size = 1)+
geom_vline(xintercept = as.numeric(time_1052), color = "red", linetype = "dashed", size = 1)+
theme_bw() +
labs(x = "Time", y = "Max Diameter [μm]", fill = expression("Concentration " *" (#/L/"*mu*"m)")) +
scale_y_continuous(trans="log10",breaks = c(10,100,1000), limits = c(10,1000), labels = trans_format("log10", math_format(10^.x)))+
scale_x_datetime(breaks = date_breaks("2 min"), date_labels = "%H:%M")+
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.margin = unit(c(1, 1, 1, 1), "cm")) +
theme(axis.title.x = element_text(size = 18),
axis.title.y = element_text(size = 18),
axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 15))
And the plot : enter image description here
Do you know why my data flattened like that ?
Upvotes: 1
Views: 114
Reputation: 125897
As already mentioned in the comments the issue is due to the log scale. Instead I would suggest to use a geom_rect
which however requires some manual computation.
First, let's reproduce your issue using some fake random example data:
library(ggplot2)
library(dplyr, warn = FALSE)
set.seed(123)
dat <- expand.grid(
x = seq(10),
y = seq(10, 1000, length.out = 100)
)
dat$fill <- rnorm(nrow(dat))
dat |>
ggplot(aes(x, y, fill = fill)) +
geom_tile() +
scale_y_log10()
While I also tried with setting the height
in geom_tile
as suggested by @GregorThomas, this does not work well. Instead a geom_rect
allows for more flexibility:
dat |>
dplyr::mutate(
ylead = abs(y - dplyr::lead(y)),
ylag = abs(y - dplyr::lag(y)),
ymax = y + .5 * dplyr::coalesce(ylead, ylag),
ymin = y - .5 * dplyr::coalesce(ylag, ylead),
# Restrict lower bound to avoid the values from getting dropped
ymin = if_else(ymin <= 0, 1, ymin),
.by = x
) |>
ggplot(aes(fill = fill)) +
geom_rect(
aes(
xmin = x - .5, xmax = x + .5,
ymin = ymin, ymax = ymax
)
) +
scale_y_log10(
breaks = c(10, 100, 1000),
labels = scales::label_log()
)
Upvotes: 0