Reputation: 23
I'm relatively new to R and I'm trying to do a lookup between multiple data frames. I have df1 which shows when my car enters/exits our fleet. df2 shows when it is being serviced. I want to create df3 which shows for a specific set of Cars (per colheaders) whether they are available at a given timestep (Yes/No or 1/0 result).
Example data is below, there would be a number of cars in df1 and df2 but not all of these would be in df3.
I've tried doing between and foverlaps but had no success and I'm beginning to wonder if this is actually something that can be done in R or if I need to revert to lots of if statements in excel.
df1
CarID Entry Exit
Car1 1 100
Car2 5 95
etc
df2 (shows service schedule)
CarID ServiceType Start End
Car1 TypeA 10 20
Car1 TypeA 30 40
Car1 TypeB 45 46
Car2 TypeA 20 30
etc.
df3 (want to create/populate this table)
Date Car1 Car2
1
2
Upvotes: 2
Views: 134
Reputation: 161085
Here's another option:
library(data.table)
setDT(df1)
setDT(df2)
df3 <- df1[, .(Date = seq(Entry, Exit)), by = CarID]
df3[df2, avail := ServiceType, on = .(CarID, Date >= Start, Date <= End)
][, avail := is.na(avail) ][]
# CarID Date avail
# <char> <int> <lgcl>
# 1: Car1 1 TRUE
# 2: Car1 2 TRUE
# 3: Car1 3 TRUE
# 4: Car1 4 TRUE
# 5: Car1 5 TRUE
# 6: Car1 6 TRUE
# 7: Car1 7 TRUE
# 8: Car1 8 TRUE
# 9: Car1 9 TRUE
# 10: Car1 10 FALSE
# ---
# 182: Car2 86 TRUE
# 183: Car2 87 TRUE
# 184: Car2 88 TRUE
# 185: Car2 89 TRUE
# 186: Car2 90 TRUE
# 187: Car2 91 TRUE
# 188: Car2 92 TRUE
# 189: Car2 93 TRUE
# 190: Car2 94 TRUE
# 191: Car2 95 TRUE
And the final widening:
dcast(df3, Date ~ CarID, value.var = "avail")
# Date Car1 Car2
# 1 1 TRUE NA
# 2 2 TRUE NA
# 3 3 TRUE NA
# 4 4 TRUE NA
# 5 5 TRUE TRUE
# 6 6 TRUE TRUE
# 7 7 TRUE TRUE
# 8 8 TRUE TRUE
# 9 9 TRUE TRUE
# 10 10 FALSE TRUE
# 11 11 FALSE TRUE
### ...
If you need those NA
s to be FALSE
instead (since they aren't available at all), then dcast(..., fill = FALSE)
.
Upvotes: 3
Reputation: 2783
Using data.table
:
library(data.table)
df1 <- df1[, .(Date = seq(Entry, Exit, by = 1), Available = "Yes"), by=.(CarID)]
df2 <- df2[, .(Date = seq(Start, End, by = 1), Available ="No"), by=.(CarID, ServiceType, Start)][, -c(2,3)]
df2 <- dcast(df2, Date~CarID, value.var = "Available")
df3 <- dcast(df1, Date~CarID, value.var = "Available")
df3 <- merge(df3, df2, by = "Date", all = T)
for (i in unique(df1$CarID)) {
df3[get(paste0(i, ".y")) == "No", paste0(i, ".x") := "No"]
df3[, eval(i) := get(paste0(i, ".x"))]
}
df3 <- df3[, -(2:as.numeric(2*length(unique(df1$CarID))+1))]
Which results to:
> dput(df3)
structure(list(Date = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92,
93, 94, 95, 96, 97, 98, 99, 100), Car1 = c("Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "Yes", "Yes", "Yes",
"Yes", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes"), Car2 = c(NA, NA, NA, NA, "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", NA, NA, NA, NA, NA)), sorted = "Date", class = c("data.table",
"data.frame"), row.names = c(NA, -100L), .internal.selfref = <pointer: 0x0000017d31e21ef0>)
Upvotes: -1