Lyndz
Lyndz

Reputation: 423

Multifaceted plots of histogram using ggplot in R

I have the following data with four columns: Year, Month, Day, Rainfall.

dat <- structure(list(Year = c(1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L), Month = c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L), Day = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 
26L, 27L, 28L, 29L, 30L, 31L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 
22L, 23L, 24L, 25L, 26L, 27L, 28L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 1L, 2L, 
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 
30L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 
27L, 28L, 29L, 30L, 31L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 
23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 
20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 
29L, 30L, 31L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 
25L, 26L, 27L, 28L, 29L, 30L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 
22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 1L, 2L, 3L, 
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 
18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 
28L, 29L, 30L, 31L), Rainfall = c(1, 35.5, 20.3, 2.5, 32, 66.8, 
0, 0, 1.8, 0, 5.3, 0, 0, 0, 11.7, 40.4, 45.7, 15.3, 21.6, 10.5, 
26.2, 54.1, 1.5, 26.9, 39.4, 21.6, 1.3, 95.6, 10.2, 0, 5.1, 0, 
4.1, 2.9, 0, 0.5, 2.1, 15.7, 14.2, 28.7, 134.2, 26.3, 0, 0, 0, 
2.3, 0, 2.8, 0.3, 0.8, 0, 0, 1.8, 0, 0, 0.8, 0, 0, 3.3, 13.6, 
32.9, 47.7, 1.8, 78.8, 27.1, 0, 0, 45.5, 2, 1.4, 0, 0.5, 0, 0, 
19.8, 11.4, 8.7, 0, 0, 0, 4.8, 0, 2.5, 10.5, 24.7, 0.8, 10.4, 
6.9, 13, 0, 0, 0, 3.1, 2.8, 23.9, 2.8, 0, 1.8, 7.4, 29.8, 0.5, 
0, 0, 27, 16.5, 0, 6.9, 0, 0, 0, 0, 6.6, 0.6, 1.3, 0, 0, 9.7, 
2, 15.3, 6.4, 11.1, 0.5, 16.8, 1.5, 0, 2.3, 1.3, 0, 3.6, 95.5, 
3.4, 1.3, 35.3, 0, 1.3, 1.8, 0, 0, 0, 36.3, 0, 6.1, 1.8, 0, 4.8, 
0, 0, 0, 0, 0, 0, 0, 0, 2.3, 5.9, 52.1, 2.5, 3.8, 15.7, 0, 7.9, 
8.9, 0, 0, 5.6, 0, 26.2, 9.1, 22.6, 1.8, 17.5, 68.1, 0, 2.3, 
3.1, 9.7, 105.9, 30.7, 3.8, 0, 31.2, 11.7, 0, 0, 18.8, 6.3, 3.6, 
0, 0, 0, 0, 43.3, 0.5, 1.3, 49.3, 1, 0, 0, 0, 4.3, 6.4, 5.4, 
0.3, 64.8, 0, 0, 0, 0, 0, 0, 38.1, 0, 8.4, 0, 0, 3.3, 0, 4.4, 
1.6, 0, 0, 7.4, 0, 0.5, 0, 0, 0, 0.8, 1.5, 3.3, 0, 0, 0, 2, 0, 
0, 6.4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36.6, 45.2, 13.9, 5.1, 
0, 0, 0, 1.8, 0, 0, 1.3, 0, 0, 0.5, 0, 0, 0, 2.3, 0, 0, 0, 25.7, 
0, 3.6, 0.3, 0, 0, 0, 0, 0, 19.1, 22.1, 24.2, 0.5, 5.1, 0, 0, 
0, 0, 0, 0, 2.3, 0, 0, 1.5, 11.5, 21.3, 28.2, 67.8, 55.7, 25.3, 
2.3, 27.5, 0, 0, 14.2, 10.4, 12.7, 14.7, 9.7, 1.8, 0, 0, 0, 0, 
0, 14.5, 0.5, 10.6, 0.5, 2, 1.5, 7.4, 14.3, 15.2, 0, 37.1, 18.3, 
0.5, 3.3, 4.9, 68.6, 1.5, 0, 4.1, 20.1, 25.3, 23.9, 6.3, 26.2, 
50.8, 15, 1.1, 44.2, 1.5, 0, 0, 0, 0, 14.5, 24.4, 0, 39.1, 151.7, 
26.4, 1, 3.6, 2.5, 0, 1.3, 4.1, 7.1, 1.8, 7.6, 84.7, 1.5, 0)), row.names = c(NA, 
365L), class = "data.frame")

This is just a subset of the data. I will apply this for my data from 1979-2017.

I want to plot the histogram of "duration" of Rainfall exceeding the 95th percentile of each month. So x-axis is the duration and y-axis is the frequency. I am using the rle() function to do this.

I have the following script:

library(dplyr)
dat2 <- jan %>%
   group_by(Year,Month) %>%
   mutate(extreme = Rainfall > quantile(Rainfall,0.95,na.rm=TRUE))

result <- rle(dat2$extreme)
hist(result$lengths[result$values],breaks = c(0:5), xlab = "Length of extreme events", main = "")

I am manually extracting the histogram per month by filtering them first like this:

jan <- dat[which(dat$Year==1),]

Then apply the above script on "jan".

I wonder if there is a more efficient way of doing this in R?

Note that the 95th percentile is calculated for same months from 1979-2017. For example, 95th percentile of all January from 1979-2017.

Basically I want two plots with 3 x 2 structure.

Plot 1:

Row 1: January to March

Row 2: April to June

Plot 2:

Row 1: July to September

Row 2: October to December

Here's the sample output for Plot 1.

Sample output

Any suggestions on how to do this R? I'll appreciate any help.

Upvotes: 0

Views: 239

Answers (1)

Duck
Duck

Reputation: 39613

I would suggest a facet approach. I re-used your code and computed the desired variable for extremes, in order to filter in a new dataframe. As your month is numeric you can break with cut() and using for facets. Here the code:

library(tidyverse)
dat2 <- dat %>%
  group_by(Year,Month) %>%
  mutate(extreme = Rainfall > quantile(Rainfall,0.95,na.rm=TRUE))
#Plot
dat3 <- dat2 %>% filter(extreme==T) %>% mutate(MonthC=cut(Month,breaks=c(1,3,6,9,12),include.lowest = T,
                                                          dig.lab = 10,right = T))
#Code
ggplot(dat3,aes(x=Rainfall))+
  geom_histogram(fill='cyan3',color='black')+
  facet_wrap(.~MonthC,ncol = 1)+
  theme_bw()

Output:

enter image description here

With this code you can have a matrix by month:

#Plot
dat3 <- dat2 %>% filter(extreme==T)
#Code
ggplot(dat3,aes(x=Rainfall))+
  geom_histogram(fill='cyan3',color='black')+
  facet_wrap(.~factor(Month),ncol = 4)+
  theme_bw()

Output:

enter image description here

Update: I have added a function to apply for each month the function rle() and then plot:

#Data
dat2 <- dat %>%
  group_by(Year,Month) %>%
  mutate(extreme = Rainfall > quantile(Rainfall,0.95,na.rm=TRUE))
#Plot
LS <- split(dat2,dat2$Month)
#Process
myfun <- function(x)
{
  v1 <- x$extreme
  result <- rle(v1)
  v2 <- result$lengths[result$values]
  #New dataframe
  df <- data.frame(Month=rep(unique(x$Month),length(v2)),v2)
  return(df)
}
#Apply
LS2 <- lapply(LS,myfun)
#Bind
ndf <- do.call(rbind,LS2)

#Code
ggplot(ndf,aes(x=v2))+
  geom_histogram(fill='cyan3',color='black')+
  facet_wrap(.~factor(Month),ncol = 4)+
  theme_bw()

Output:

enter image description here

Upvotes: 1

Related Questions