Javi_VM
Javi_VM

Reputation: 515

From Start Date and End Date, get the number at each Date

I want to get the number of people in an organization. I have the Date each of them started and ended their work there.

I would like to do it with R and if possible, using a tidyverse approach

Example:

Person Start End
John 01/01/1990 01/07/1992
Eve  12/27/1991 12/31/1992

This should give:

Date People
12/31/1989 0
01/01/1990 1
01/02/1990 1
...
12/27/1991 2
12/38/1991 2
...
01/07/1992 2
01/08/1992 1
...
12/31/1992 1
01/01/1993 0

Upvotes: 0

Views: 58

Answers (3)

Ronak Shah
Ronak Shah

Reputation: 388807

Using tidyverse we can create a sequence between Start and End for each Person and count the number of Date.

library(tidyverse)

df %>%
  mutate_at(-1,lubridate::mdy) %>%
  mutate(Date = map2(Start, End, seq, by = "1 day")) %>%
  unnest(Date) %>%
  count(Date)


# A tibble: 1,096 x 2
#   Date           n
#   <date>     <int>
# 1 1990-01-01     1
# 2 1990-01-02     1
# 3 1990-01-03     1
# 4 1990-01-04     1
# 5 1990-01-05     1
# 6 1990-01-06     1
# 7 1990-01-07     1
# 8 1990-01-08     1
# 9 1990-01-09     1
#10 1990-01-10     1
# … with 1,086 more rows

data

df <- structure(list(Person = structure(2:1, .Label = c("Eve", "John"
), class = "factor"), Start = structure(1:2, .Label = c("01/01/1990", 
"12/27/1991"), class = "factor"), End = structure(1:2, .Label = c("01/07/1992", 
"12/31/1992"), class = "factor")), class = "data.frame", row.names = c(NA, -2L))

Upvotes: 2

A. Suliman
A. Suliman

Reputation: 13125

library(dplyr)
library(lubridate)
df %>% 
    rowwise() %>% 
    mutate(Date= toString(seq.Date(mdy(Start), mdy(End), by=1))) %>% 
    separate_rows(Date, sep=',') %>% 
    mutate(Date=ymd(Date)) %>% 
    right_join(data.frame(Date=seq.Date(mdy('12/31/1989'), mdy('01/01/1993'),1))) %>% 
    group_by(Date) %>% 
    summarise(People=sum(!is.na(End)))

# A tibble: 1,098 x 2
   Date       People
   <date>      <int>
 1 1989-12-31      0
 2 1990-01-01      1
 3 1990-01-02      1
 4 1990-01-03      1
 5 1990-01-04      1
 6 1990-01-05      1
 7 1990-01-06      1
 8 1990-01-07      1
 9 1990-01-08      1
10 1990-01-09      1
# ... with 1,088 more rows

Upvotes: 0

Wimpel
Wimpel

Reputation: 27732

Here is a method using data.table::foverlaps()

#load libraries
library(data.table)
library(lubridate)

#create sample data
DT <- fread("Person Start End
John 01/01/1990 01/07/1992
Eve  12/27/1991 12/31/1992")
#set dates as POSIXct timestamps
DT[, `:=`( Start = as.POSIXct( Start, format = "%m/%d/%Y", tz = "UTC" ),
           End   = as.POSIXct( End, format = "%m/%d/%Y", tz = "UTC" ) ) ]

#create table with all days in periods from DT
DT.dates <- data.table( from = seq( min( DT$Start ), max( DT$End ), by = "1 day" ) )
DT.dates[, to := from %m+% days(1) - 1 ]

#set keys
setkey( DT, Start, End )
setkey( DT.dates, from, to )

#perform overlap join and summarise
ans <- foverlaps( DT, DT.dates )[, .(People = uniqueN( Person ) ), by = (Date = from)]

output

ans[720:740,]
#           Date People
#  1: 1991-12-21      1
#  2: 1991-12-22      1
#  3: 1991-12-23      1
#  4: 1991-12-24      1
#  5: 1991-12-25      1
#  6: 1991-12-26      1
#  7: 1991-12-27      2
#  8: 1991-12-28      2
#  9: 1991-12-29      2
# 10: 1991-12-30      2
# 11: 1991-12-31      2
# 12: 1992-01-01      2
# 13: 1992-01-02      2
# 14: 1992-01-03      2
# 15: 1992-01-04      2
# 16: 1992-01-05      2
# 17: 1992-01-06      2
# 18: 1992-01-07      2
# 19: 1992-01-08      1
# 20: 1992-01-09      1
# 21: 1992-01-10      1
#           Date People

Upvotes: 0

Related Questions