Blah505
Blah505

Reputation: 23

R lookup between values

I'm relatively new to R and I'm trying to do a lookup between multiple data frames. I have df1 which shows when my car enters/exits our fleet. df2 shows when it is being serviced. I want to create df3 which shows for a specific set of Cars (per colheaders) whether they are available at a given timestep (Yes/No or 1/0 result).

Example data is below, there would be a number of cars in df1 and df2 but not all of these would be in df3.

I've tried doing between and foverlaps but had no success and I'm beginning to wonder if this is actually something that can be done in R or if I need to revert to lots of if statements in excel.

df1

CarID Entry Exit
Car1  1   100
Car2  5   95

etc

df2 (shows service schedule)

CarID  ServiceType  Start  End
Car1  TypeA  10  20
Car1  TypeA  30  40
Car1  TypeB  45  46
Car2  TypeA  20  30

etc.

df3 (want to create/populate this table)

Date  Car1  Car2 
1
2

Upvotes: 2

Views: 134

Answers (2)

r2evans
r2evans

Reputation: 161085

Here's another option:

library(data.table)
setDT(df1)
setDT(df2)
df3 <- df1[, .(Date = seq(Entry, Exit)), by = CarID]
df3[df2, avail := ServiceType, on = .(CarID, Date >= Start, Date <= End)
  ][, avail := is.na(avail) ][]
#       CarID  Date  avail
#      <char> <int> <lgcl>
#   1:   Car1     1   TRUE
#   2:   Car1     2   TRUE
#   3:   Car1     3   TRUE
#   4:   Car1     4   TRUE
#   5:   Car1     5   TRUE
#   6:   Car1     6   TRUE
#   7:   Car1     7   TRUE
#   8:   Car1     8   TRUE
#   9:   Car1     9   TRUE
#  10:   Car1    10  FALSE
#  ---                    
# 182:   Car2    86   TRUE
# 183:   Car2    87   TRUE
# 184:   Car2    88   TRUE
# 185:   Car2    89   TRUE
# 186:   Car2    90   TRUE
# 187:   Car2    91   TRUE
# 188:   Car2    92   TRUE
# 189:   Car2    93   TRUE
# 190:   Car2    94   TRUE
# 191:   Car2    95   TRUE

And the final widening:

dcast(df3, Date ~ CarID, value.var = "avail")
#     Date  Car1  Car2
# 1      1  TRUE    NA
# 2      2  TRUE    NA
# 3      3  TRUE    NA
# 4      4  TRUE    NA
# 5      5  TRUE  TRUE
# 6      6  TRUE  TRUE
# 7      7  TRUE  TRUE
# 8      8  TRUE  TRUE
# 9      9  TRUE  TRUE
# 10    10 FALSE  TRUE
# 11    11 FALSE  TRUE
### ...

If you need those NAs to be FALSE instead (since they aren't available at all), then dcast(..., fill = FALSE).

Upvotes: 3

koolmees
koolmees

Reputation: 2783

Using data.table:

library(data.table)

df1 <- df1[, .(Date = seq(Entry, Exit, by = 1), Available = "Yes"), by=.(CarID)]
df2 <- df2[, .(Date = seq(Start, End, by = 1), Available ="No"), by=.(CarID, ServiceType, Start)][, -c(2,3)]
df2 <- dcast(df2, Date~CarID, value.var = "Available")
df3 <- dcast(df1, Date~CarID, value.var = "Available")
df3 <- merge(df3, df2, by = "Date", all = T)
for (i in unique(df1$CarID)) {
  df3[get(paste0(i, ".y")) == "No", paste0(i, ".x") := "No"]
  df3[, eval(i) := get(paste0(i, ".x"))]
}
df3 <- df3[, -(2:as.numeric(2*length(unique(df1$CarID))+1))]

Which results to:

> dput(df3)
structure(list(Date = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 
61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 
77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 
93, 94, 95, 96, 97, 98, 99, 100), Car1 = c("Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "No", "No", "No", "No", 
"No", "No", "No", "No", "No", "No", "No", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "No", "No", "No", "No", 
"No", "No", "No", "No", "No", "No", "No", "Yes", "Yes", "Yes", 
"Yes", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes"), Car2 = c(NA, NA, NA, NA, "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "No", "No", "No", "No", "No", "No", 
"No", "No", "No", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", NA, NA, NA, NA, NA)), sorted = "Date", class = c("data.table", 
"data.frame"), row.names = c(NA, -100L), .internal.selfref = <pointer: 0x0000017d31e21ef0>)

Upvotes: -1

Related Questions