Reputation: 316
I have a dataframe df1 like this :
timestamp
01-12-2015 00:04
01-12-2015 02:20
01-12-2015 02:43
01-12-2015 04:31
01-12-2015 08:51
01-12-2015 11:28
01-12-2015 20:53
01-12-2015 21:28
02-12-2015 00:30
02-12-2015 20:22
Which contains time stamps. I would want to get count by binning hours in 12 hours interval i.e(01-12-2015[0-9],01-12-2015[9-21], and so on.
output Sample:
DayOfMonth Group count
1 1 5
1 2 2
2 1 2
2 2 1
The day of month can be replaced by Serial Number also, starting with 1. Any help to solve this is highly appreciated.
Upvotes: 2
Views: 4064
Reputation: 57220
Another possible solution in base R :
timeStamp <- c("01-12-2015 00:04","01-12-2015 02:20","01-12-2015 02:43","01-12-2015 04:31",
"01-12-2015 08:51","01-12-2015 11:28","01-12-2015 20:53","01-12-2015 21:28",
"02-12-2015 00:30","02-12-2015 20:22")
times <- as.POSIXlt(timeStamp,format="%d-%m-%Y %H:%M",tz='GMT')
DF <- data.frame(Times=times)
DF$Group <- as.logical(times$hour > 12) + 1
DF$DayOfMonth <- times$mday
res <- aggregate(Times ~ DayOfMonth + Group,data=DF, FUN = length)
# res :
# DayOfMonth Group Times
# 1 1 1 6
# 2 2 1 1
# 3 1 2 2
# 4 2 2 1
Or if you want to include dates in hours range: [21-0] of previous day in the next day :
timeStamp <- c("01-12-2015 00:04","01-12-2015 02:20","01-12-2015 02:43","01-12-2015 04:31",
"01-12-2015 08:51","01-12-2015 11:28","01-12-2015 20:53","01-12-2015 21:28",
"02-12-2015 00:30","02-12-2015 20:22")
times <- as.POSIXlt(timeStamp,format="%d-%m-%Y %H:%M",tz='GMT')
h <- times$hour + times$min*1/60 + times$sec*1/3600
# here we add 3 hours to the dates in hours range [21-0] in this way we
# push them to the next day
times[h >= 21] <- times[h >= 21] + 3*3600
DF <- data.frame(Times=times)
DF$Group <- ifelse(h < 9,1,ifelse(h <= 21,2,NA))
DF$DayOfMonth <- times$mday
res <- aggregate(Times ~ DayOfMonth + Group,data=na.omit(DF), FUN = length)
# res :
# DayOfMonth Group Times
# 1 1 1 5
# 2 2 1 2
# 3 1 2 2
# 4 2 2 1
Upvotes: 2
Reputation: 4187
Adding to the several already presented options, the stringi
package has some date parsing functions as well:
library(stringi)
df1$timestamp <- stri_datetime_parse(df1$timestamp, format = 'dd-mm-yyyy HH:mm')
df1$DayOfMonth <- stri_datetime_format(df1$timestamp, format = 'd')
df1$Group <- stri_datetime_format(df1$timestamp, format = 'a')
After that you can get a count with for example the following two options:
# option 1:
aggregate(. ~ Group + DayOfMonth, df1, length) # copied from @ProcrastinatusMaximus
# option 2a:
library(dplyr)
df1 %>%
group_by(DayOfMonth, Group) %>%
tally()
# option 2b:
count(df1, DayOfMonth, Group)
The output of the latter:
DayOfMonth Group n
(chr) (chr) (int)
1 1 a.m. 6
2 1 p.m. 2
3 2 a.m. 1
4 2 p.m. 1
Upvotes: 1
Reputation: 887831
We can use lubridate
functions to convert to 'Datetime' class easily and with dplyr
to get the output efficiently compared to base R
methods.
library(lubridate)
library(dplyr)
df1 %>%
mutate(timestamp = dmy_hm(timestamp)) %>%
group_by(DayOfMonth = day(timestamp)) %>%
group_by(Group = as.numeric(cut(timestamp, breaks = "12 hour")),
add=TRUE) %>%
summarise(GroupCount = n())
# DayOfMonth Group GroupCount
# <int> <dbl> <int>
#1 1 1 6
#2 1 2 2
#3 2 1 1
#4 2 2 1
Or we can use a compact option with data.table
library(data.table)
setDT(df1)[, {t1 <- dmy_hm(timestamp); .(DayOfMonth = day(t1),
Group = (hour(t1)>12)+1L)}][, .(GroupCount = .N), .(DayOfMonth, Group)]
# DayOfMonth Group GroupCount
#1: 1 1 6
#2: 1 2 2
#3: 2 1 1
#4: 2 2 1
NOTE: The data.table
solution is done with just two steps...
df1 <- structure(list(timestamp = c("01-12-2015 00:04", "01-12-2015 02:20",
"01-12-2015 02:43", "01-12-2015 04:31", "01-12-2015 08:51", "01-12-2015 11:28",
"01-12-2015 20:53", "01-12-2015 21:28", "02-12-2015 00:30", "02-12-2015 20:22"
)), .Names = "timestamp", class = "data.frame", row.names = c(NA,-10L))
Upvotes: 2
Reputation: 83275
A possible solution in base R:
# convert the 'timestamp' column to a datetime format
df1$timestamp <- as.POSIXct(strptime(df1$timestamp, format = '%d-%m-%Y %H:%M'))
# create day.of.month variable
df1$day.of.month <- format(df1$timestamp, '%d')
# extract the 12 hour interval as am/pm values
df1$group <- gsub('[0-9: ]+','\\1',format(df1$timestamp, '%r'))
# aggregate
aggregate(. ~ group + day.of.month, df1, length)
which gives:
group day.of.month timestamp
1 am 01 6
2 pm 01 2
3 am 02 1
4 pm 02 1
Another solution using data.table
and and the pm
function of lubridate
:
library(lubridate)
library(data.table)
setDT(df1)[, timestamp := dmy_hm(timestamp)
][, group := pm(timestamp)+1
][, .N, .(day.of.month = day(timestamp),group)]
which gives:
day.of.month group N
1: 1 1 6
2: 1 2 2
3: 2 1 1
4: 2 2 1
Used data:
df1 <- structure(list(timestamp = c("01-12-2015 00:04", "01-12-2015 02:20", "01-12-2015 02:43", "01-12-2015 04:31", "01-12-2015 08:51",
"01-12-2015 11:28", "01-12-2015 20:53", "01-12-2015 21:28", "02-12-2015 00:30", "02-12-2015 20:22")),
.Names = "timestamp", class = "data.frame", row.names = c(NA,-10L))
Upvotes: 3