Reputation: 421
How to select the data that correspond to the last day of the month?
For example I have the dataset which contains the daily data of V1
from 2000 to 2016. All I need is to select the last day of each month, i.e the 31/01/2001
, 28/02/2001
etc for all years. The date format is DD/MM/YYYY
.
V1 V2
4.59 29/12/2000
4.59 01/01/2001
4.58 02/01/2001
4.52 03/01/2001
4.54 04/01/2001
4.58 05/01/2001
......
4.09 26/01/2001
4.50 27/01/2001
4.18 28/01/2001
4.11 29/01/2001
3.54 30/01/2001
4.98 31/01/2001 <- Select this row!
Upvotes: 0
Views: 8857
Reputation: 8413
library(data.table)
library(lubridate)
# for each unique combo of year-mon get the last entry
setDT(df)[order(V2), .(V1[which.max(V2)], V2[which.max(V2)]), by = .(year(V2), month(V2))]
# year month V1 V2
#1: 2000 12 4.59 2000-12-29
#2: 2001 1 4.98 2001-01-31
Upvotes: 3
Reputation: 38500
This can be accomplished with base R as well.
df[df$V2 %in% unique(as.Date(format(df$V2 + 28, "%Y-%m-01")) - 1),]
V1 V2
12 4.98 2001-01-31
This uses a trick I picked up from one of Dirk Dirk Eddelbuettel's SO answers. The idea is to set the date to the first day of the following month and then subtract 1 from it.
data
df <- structure(list(V1 = c(4.59, 4.59, 4.58, 4.52, 4.54, 4.58, 4.09,
4.5, 4.18, 4.11, 3.54, 4.98), V2 = structure(c(11320, 11323,
11324, 11325, 11326, 11327, 11348, 11349, 11350, 11351, 11352,
11353), class = "Date")), .Names = c("V1", "V2"), row.names = c(NA,
-12L), class = "data.frame")
proof of concept
# construct a vector of dates for 10 years, 2001 through 2010
myDates <- seq(as.Date("2001-01-01"), as.Date("2010-12-31"), by="day")
# pull off the final days of the month
finalDays <-
myDates[myDates %in% unique(as.Date(format(myDates + 28, "%Y-%m-01")) - 1)]
# Take a look at first 5 and last 5
c(head(finalDays, 5), tail(finalDays, 5))
[1] "2001-01-31" "2001-02-28" "2001-03-31" "2001-04-30" "2001-05-31"
[6] "2010-08-31" "2010-09-30" "2010-10-31" "2010-11-30" "2010-12-31"
# get length, 12 * 10 = 120
length(finalDays)
[1] 120
# make sure there are no repeated values
length(unique(finalDays))
[1] 120
Upvotes: 3
Reputation: 887048
We can use dplyr
library(dplyr)
library(lubridate)
library(zoo)
If we need only the last day of month and not the last day that is found in the dataset
df %>%
filter(dmy(V2) == as.Date(as.yearmon(dmy(V2)), frac=1))
# V1 V2
#1 4.98 31/01/2001
But, if we need to filter the last day found in the dataset for each month
df %>%
mutate(V3 = dmy(V2))%>%
group_by(month = month(V3), year = year(V3)) %>%
slice(which.max(day(V3))) %>%
ungroup() %>%
select(-month, -year, -V3)
# V1 V2
# <dbl> <chr>
#1 4.98 31/01/2001
#2 4.59 29/12/2000
If it is only grouped by 'month', just remove the year = year(V3))
in the group_by
and we will get
df %>%
mutate(V3 = dmy(V2))%>%
group_by(month = month(V3)) %>%
slice(which.max(day(V3))) %>%
ungroup() %>%
select(-month, -V3)
df <- structure(list(V1 = c(4.59, 4.59, 4.58, 4.52, 4.54, 4.58, 4.09,
4.5, 4.18, 4.11, 3.54, 4.98), V2 = c("29/12/2000", "01/01/2001",
"02/01/2001", "03/01/2001", "04/01/2001", "05/01/2001", "26/01/2001",
"27/01/2001", "28/01/2001", "29/01/2001", "30/01/2001", "31/01/2001"
)), .Names = c("V1", "V2"), class = "data.frame", row.names = c(NA,
-12L))
Upvotes: 2
Reputation: 57686
subset(df, as.POSIXlt(V2 + 1)$mday == 1)
## you don't have 31-Dec in your data
# V1 V2
# 1 4.98 31/01/2001
Upvotes: 1