Reputation: 423
I have the following data with four columns: Year, Month, Day, Rainfall.
dat <- structure(list(Year = c(1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L), Month = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L), Day = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L,
22L, 23L, 24L, 25L, 26L, 27L, 28L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
30L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
27L, 28L, 29L, 30L, 31L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L,
23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 29L, 30L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L,
22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L,
28L, 29L, 30L, 31L), Rainfall = c(1, 35.5, 20.3, 2.5, 32, 66.8,
0, 0, 1.8, 0, 5.3, 0, 0, 0, 11.7, 40.4, 45.7, 15.3, 21.6, 10.5,
26.2, 54.1, 1.5, 26.9, 39.4, 21.6, 1.3, 95.6, 10.2, 0, 5.1, 0,
4.1, 2.9, 0, 0.5, 2.1, 15.7, 14.2, 28.7, 134.2, 26.3, 0, 0, 0,
2.3, 0, 2.8, 0.3, 0.8, 0, 0, 1.8, 0, 0, 0.8, 0, 0, 3.3, 13.6,
32.9, 47.7, 1.8, 78.8, 27.1, 0, 0, 45.5, 2, 1.4, 0, 0.5, 0, 0,
19.8, 11.4, 8.7, 0, 0, 0, 4.8, 0, 2.5, 10.5, 24.7, 0.8, 10.4,
6.9, 13, 0, 0, 0, 3.1, 2.8, 23.9, 2.8, 0, 1.8, 7.4, 29.8, 0.5,
0, 0, 27, 16.5, 0, 6.9, 0, 0, 0, 0, 6.6, 0.6, 1.3, 0, 0, 9.7,
2, 15.3, 6.4, 11.1, 0.5, 16.8, 1.5, 0, 2.3, 1.3, 0, 3.6, 95.5,
3.4, 1.3, 35.3, 0, 1.3, 1.8, 0, 0, 0, 36.3, 0, 6.1, 1.8, 0, 4.8,
0, 0, 0, 0, 0, 0, 0, 0, 2.3, 5.9, 52.1, 2.5, 3.8, 15.7, 0, 7.9,
8.9, 0, 0, 5.6, 0, 26.2, 9.1, 22.6, 1.8, 17.5, 68.1, 0, 2.3,
3.1, 9.7, 105.9, 30.7, 3.8, 0, 31.2, 11.7, 0, 0, 18.8, 6.3, 3.6,
0, 0, 0, 0, 43.3, 0.5, 1.3, 49.3, 1, 0, 0, 0, 4.3, 6.4, 5.4,
0.3, 64.8, 0, 0, 0, 0, 0, 0, 38.1, 0, 8.4, 0, 0, 3.3, 0, 4.4,
1.6, 0, 0, 7.4, 0, 0.5, 0, 0, 0, 0.8, 1.5, 3.3, 0, 0, 0, 2, 0,
0, 6.4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36.6, 45.2, 13.9, 5.1,
0, 0, 0, 1.8, 0, 0, 1.3, 0, 0, 0.5, 0, 0, 0, 2.3, 0, 0, 0, 25.7,
0, 3.6, 0.3, 0, 0, 0, 0, 0, 19.1, 22.1, 24.2, 0.5, 5.1, 0, 0,
0, 0, 0, 0, 2.3, 0, 0, 1.5, 11.5, 21.3, 28.2, 67.8, 55.7, 25.3,
2.3, 27.5, 0, 0, 14.2, 10.4, 12.7, 14.7, 9.7, 1.8, 0, 0, 0, 0,
0, 14.5, 0.5, 10.6, 0.5, 2, 1.5, 7.4, 14.3, 15.2, 0, 37.1, 18.3,
0.5, 3.3, 4.9, 68.6, 1.5, 0, 4.1, 20.1, 25.3, 23.9, 6.3, 26.2,
50.8, 15, 1.1, 44.2, 1.5, 0, 0, 0, 0, 14.5, 24.4, 0, 39.1, 151.7,
26.4, 1, 3.6, 2.5, 0, 1.3, 4.1, 7.1, 1.8, 7.6, 84.7, 1.5, 0)), row.names = c(NA,
365L), class = "data.frame")
This is just a subset of the data. I will apply this for my data from 1979-2017.
I want to plot the histogram of "duration" of Rainfall exceeding the 95th percentile of each month. So x-axis is the duration and y-axis is the frequency. I am using the rle() function to do this.
I have the following script:
library(dplyr)
dat2 <- jan %>%
group_by(Year,Month) %>%
mutate(extreme = Rainfall > quantile(Rainfall,0.95,na.rm=TRUE))
result <- rle(dat2$extreme)
hist(result$lengths[result$values],breaks = c(0:5), xlab = "Length of extreme events", main = "")
I am manually extracting the histogram per month by filtering them first like this:
jan <- dat[which(dat$Year==1),]
Then apply the above script on "jan".
I wonder if there is a more efficient way of doing this in R?
Note that the 95th percentile is calculated for same months from 1979-2017. For example, 95th percentile of all January from 1979-2017.
Basically I want two plots with 3 x 2 structure.
Plot 1:
Row 1: January to March
Row 2: April to June
Plot 2:
Row 1: July to September
Row 2: October to December
Here's the sample output for Plot 1.
Any suggestions on how to do this R? I'll appreciate any help.
Upvotes: 0
Views: 239
Reputation: 39613
I would suggest a facet
approach. I re-used your code and computed the desired variable for extremes, in order to filter in a new dataframe. As your month is numeric you can break with cut()
and using for facets. Here the code:
library(tidyverse)
dat2 <- dat %>%
group_by(Year,Month) %>%
mutate(extreme = Rainfall > quantile(Rainfall,0.95,na.rm=TRUE))
#Plot
dat3 <- dat2 %>% filter(extreme==T) %>% mutate(MonthC=cut(Month,breaks=c(1,3,6,9,12),include.lowest = T,
dig.lab = 10,right = T))
#Code
ggplot(dat3,aes(x=Rainfall))+
geom_histogram(fill='cyan3',color='black')+
facet_wrap(.~MonthC,ncol = 1)+
theme_bw()
Output:
With this code you can have a matrix by month:
#Plot
dat3 <- dat2 %>% filter(extreme==T)
#Code
ggplot(dat3,aes(x=Rainfall))+
geom_histogram(fill='cyan3',color='black')+
facet_wrap(.~factor(Month),ncol = 4)+
theme_bw()
Output:
Update: I have added a function to apply for each month the function rle()
and then plot:
#Data
dat2 <- dat %>%
group_by(Year,Month) %>%
mutate(extreme = Rainfall > quantile(Rainfall,0.95,na.rm=TRUE))
#Plot
LS <- split(dat2,dat2$Month)
#Process
myfun <- function(x)
{
v1 <- x$extreme
result <- rle(v1)
v2 <- result$lengths[result$values]
#New dataframe
df <- data.frame(Month=rep(unique(x$Month),length(v2)),v2)
return(df)
}
#Apply
LS2 <- lapply(LS,myfun)
#Bind
ndf <- do.call(rbind,LS2)
#Code
ggplot(ndf,aes(x=v2))+
geom_histogram(fill='cyan3',color='black')+
facet_wrap(.~factor(Month),ncol = 4)+
theme_bw()
Output:
Upvotes: 1