Reputation: 6118
I have two dataframes as follows:
testdata1 <- structure(list(datetime = structure(c(1413384900, 1413384540,
1426427880, 1426431120, 1441907100, 1441906380, 1439311500, 1439312400,
1418223480, 1418223600, 1439391360, 1439393580, 1441978260, 1441978440,
1418140200, 1418135400, 1418239800, 1418241000, 1429543500, 1429541400,
1418154300, 1418154300, 1413387240, 1413387600, 1426347900, 1426349700,
1441904580, 1441904160, 1408544520, 1408545300, 1429618080, 1429618920,
1429620420, 1429621260, 1439305680, 1439305920, 1441978440, 1426423980,
1426424280, 1441980180, 1429545720, 1429546740, 1439389920, 1439390160,
1408545900, 1408546320, 1426338060, 1426338360), class = c("POSIXct",
"POSIXt"), tzone = "America/New_York"), result_value = c(10,
13, 17, 9.6, 11, 12, 6.2, 8.5, 9.8, 20, 9.6, 6.3, 25, 26, 33,
22, 9.8, 30, 15, 13, 14, 25, 8, 9.5, 24, 18, 14, 17, 28, 28,
16, 8.3, 13, 14, 12, 7, 18, 17, 14, 15, 58, 17, 5, 12, 15, 28,
21, 40)), .Names = c("datetime", "result_value"), class = "data.frame", row.names = c(NA,
-48L))
> head(testdata1) datetime result_value 1 2014-10-15 10:55:00 10.0 2 2014-10-15 10:49:00 13.0 3 2015-03-15 09:58:00 17.0 4 2015-03-15 10:52:00 9.6 5 2015-09-10 13:45:00 11.0 6 2015-09-10 13:33:00 12.0
testdata2 <- structure(list(datetime = structure(c(1410300000, 1410300600,
1410301200, 1410301800, 1410302400, 1410303000, 1410303600, 1410304200,
1410304800, 1410305400, 1410306000, 1410306600, 1410307200, 1410307800,
1410308400, 1410309000, 1410309600, 1410310200, 1410310800, 1410311400,
1410312000, 1410312600, 1410313200, 1410313800, 1410314400, 1410315000,
1410315600, 1410316200, 1410316800, 1410317400, 1410318000, 1410318600,
1410319200, 1410319800, 1410320400, 1410321000, 1410321600, 1410322200,
1410322800, 1410323400, 1410324000, 1410324600, 1410325200, 1410325800,
1410326400, 1410327000, 1410327600, 1410328200, 1410328800, 1410329400,
1410330000, 1410330600, 1410331200, 1410331800, 1410332400, 1410333000,
1410333600, 1410334200, 1410334800, 1410335400, 1410336000, 1410336600,
1410337200, 1410337800, 1410338400, 1410339000, 1410339600, 1410340200,
1410340800, 1410341400, 1410342000, 1410342600, 1410343200, 1410343800,
1410344400, 1410345000, 1410345600, 1410346200, 1410346800, 1410347400,
1410348000, 1410348600, 1410349200, 1410349800, 1410350400, 1410351000,
1410351600, 1410352200, 1410352800, 1410353400, 1410354000, 1410354600,
1410355200, 1410355800, 1410356400, 1410357000, 1410357600, 1410358200,
1410358800, 1410359400, 1410360000, 1410360600, 1410361200, 1410361800,
1410362400, 1410363000, 1410363600, 1410364200, 1410364800, 1410365400,
1410366000, 1410366600, 1410367200, 1410367800, 1410368400, 1410369000,
1410369600, 1410370200, 1410370800, 1410371400, 1410372000, 1410372600,
1410373200, 1410373800, 1410374400, 1410375000, 1410375600, 1410376200,
1410376800, 1410377400, 1410378000, 1410378600, 1410379200, 1410379800,
1410380400, 1410381000, 1410381600, 1410382200, 1410382800, 1410383400,
1410384000, 1410384600, 1410385200, 1410385800, 1410386400, 1410387000,
1410387600, 1410388200, 1410388800, 1410389400, 1410390000, 1410390600,
1410391200, 1410391800, 1410392400, 1410393000, 1410393600, 1410394200,
1410394800, 1410395400, 1410396000, 1410396600, 1410397200, 1410397800,
1410398400, 1410399000, 1410399600, 1410400200, 1410400800, 1410401400,
1410402000, 1410402600, 1410403200, 1410403800, 1410404400, 1410405000,
1410405600, 1410406200, 1410406800, 1410407400, 1410408000, 1410408600,
1410409200, 1410409800, 1410410400, 1410411000, 1410411600, 1410412200,
1410412800, 1410413400, 1410414000, 1410414600, 1410415200, 1410415800,
1410416400, 1410417000, 1410417600, 1410418200, 1410418800, 1410419400,
1410420000, 1410420600, 1410421200, 1410421800, 1410422400, 1410423000,
1410423600, 1410424200, 1410424800, 1410425400, 1410426000, 1410426600,
1410427200, 1410427800, 1410428400, 1410429000, 1410429600, 1410430200,
1410430800, 1410431400, 1410432000, 1410432600, 1410433200, 1410433800,
1410434400, 1410435000, 1410435600, 1410436200, 1410436800, 1410437400,
1410438000, 1410438600, 1410439200, 1410439800, 1410440400, 1410441000,
1410441600, 1410442200, 1410442800, 1410443400, 1410444000, 1410444600,
1410445200, 1410445800, 1410446400, 1410447000, 1410447600, 1410448200,
1410448800, 1410449400, 1410450000, 1410450600, 1410451200, 1410451800,
1410452400, 1410453000), class = c("POSIXct", "POSIXt"), tzone = "America/New_York"),
`1.07m` = c(52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L,
52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L,
52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L,
52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L,
52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L,
52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L,
52L, 52L, 52L, 52L, 52L, 52L, 53L, 52L, 53L, 53L, 52L, 53L,
53L, 53L, 53L, 53L, 53L, 53L, 53L, 58L, 58L, 57L, 61L, 58L,
56L, 56L, 56L, 55L, 55L, 58L, 57L, 58L, 58L, 58L, 58L, 58L,
58L, 58L, 58L, 58L, 58L, 58L, 58L, 58L, 57L, 58L, 58L, 58L,
57L, 58L, 57L, 57L, 57L, 57L, 56L, 55L, 56L, 56L, 55L, 56L,
56L, 55L, 55L, 56L, 55L, 56L, 56L, 56L, 56L, 56L, 56L, 56L,
56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L,
56L, 56L, 56L, 56L, 56L, 56L, 57L, 56L, 56L, 56L, 56L, 56L,
56L, 56L, 56L, 54L, 54L, 54L, 54L, 54L, 55L, 55L, 55L, 55L,
55L, 54L, 54L, 55L, 55L, 55L, 54L, 55L, 55L, 55L, 55L, 55L,
55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L, 54L, 54L, 55L, 55L,
55L, 55L, 55L, 55L, 55L, 55L, 54L, 55L, 55L, 55L, 55L, 54L,
54L, 54L, 54L, 54L, 54L, 54L, 57L, 57L, 57L, 57L, 57L, 57L,
57L, 56L, 57L, 58L, 57L, 57L, 58L, 58L, 58L, 57L, 58L, 58L,
57L, 56L, 55L, 65L, 88L, 134L, 166L, 165L, 166L, 166L, 165L,
162L, 164L, 163L, 162L, 163L, 161L, 160L, 160L), `1.32m` = c(49L,
49L, 49L, 49L, 50L, 49L, 49L, 49L, 49L, 49L, 49L, 49L, 49L,
49L, 49L, 49L, 49L, 50L, 49L, 49L, 49L, 49L, 49L, 49L, 48L,
49L, 49L, 48L, 48L, 49L, 48L, 49L, 49L, 49L, 49L, 49L, 48L,
49L, 49L, 48L, 49L, 49L, 49L, 49L, 49L, 49L, 49L, 49L, 49L,
49L, 49L, 49L, 49L, 49L, 49L, 49L, 49L, 49L, 49L, 49L, 49L,
49L, 49L, 49L, 49L, 49L, 50L, 49L, 50L, 49L, 50L, 50L, 50L,
50L, 49L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 49L,
50L, 50L, 49L, 56L, 55L, 54L, 60L, 56L, 53L, 53L, 53L, 53L,
53L, 56L, 55L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L,
56L, 56L, 56L, 56L, 55L, 56L, 55L, 56L, 55L, 55L, 55L, 54L,
55L, 55L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L,
54L, 54L, 54L, 54L, 54L, 55L, 54L, 54L, 54L, 54L, 54L, 54L,
55L, 54L, 55L, 55L, 54L, 55L, 55L, 54L, 54L, 54L, 54L, 54L,
54L, 55L, 54L, 55L, 55L, 55L, 54L, 55L, 54L, 54L, 54L, 52L,
52L, 52L, 52L, 52L, 52L, 53L, 52L, 52L, 52L, 52L, 53L, 52L,
52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L,
52L, 52L, 53L, 53L, 52L, 52L, 53L, 52L, 53L, 53L, 53L, 53L,
52L, 52L, 52L, 53L, 52L, 52L, 52L, 53L, 52L, 52L, 52L, 53L,
53L, 52L, 54L, 55L, 55L, 55L, 55L, 56L, 54L, 55L, 55L, 56L,
56L, 56L, 56L, 56L, 56L, 55L, 56L, 56L, 56L, 55L, 54L, 64L,
88L, 136L, 164L, 162L, 162L, 163L, 162L, 159L, 160L, 159L,
158L, 159L, 158L, 159L, 160L)), .Names = c("datetime", "1.07m",
"1.32m"), class = "data.frame", row.names = c(NA, -256L))
> head(testdata2) datetime 1.07m 1.32m 1 2014-09-09 18:00:00 52 49 2 2014-09-09 18:10:00 52 49 3 2014-09-09 18:20:00 52 49 4 2014-09-09 18:30:00 52 49 5 2014-09-09 18:40:00 52 50 6 2014-09-09 18:50:00 52 49
Now I need to combine testdata1
and testdata2
such that the difference between datetime in testdata1
and testdata2
cannot be larger than 15 minutes.
I am familiar with merge function but couldn't incorporate the tolerate on that condition.
Upvotes: 0
Views: 339
Reputation: 131
I have a "possible" solution (more like an approach since I cannot verify because the test data provided seems to have no overlaps (i.e. no observation in testdata1 lies between +/- 15 mins of the observations in testdata2).
If you could provide a subset of the data with overlaps, that would be great!
This answer "joins" data from testdata1 to testdata2 (if that's what you need). Could easily be modified to obtain a data.frame that has variables: datetime.x
from testdata1 datetime.y
, from testdata2 ,1.07m
, 1.32m
, and result_value
data <- testdata1
dataStandard <- testdata2 # this contains the 'nice-looking/separated uniformly' time measurements
# this function takes a 'standardTime' from the dataStandard data.frame and returns the 'result' (closest to standardTime, if there exists such value in the +/- 15 min window)
result <- function(standardTime, data){
# create search window
low <- standardTime - minutes(15)
high <- standardTime + minutes(15)
# take subset of data that lies in the search window
results <- data[low < data$datetime & data$datetime < high,]
# find the closest one, if it exists
if (nrow(results) > 0) {
result <- results$result[which.min(abs(difftime(results$datetime, standardTime)))]
} else {
result <- NA
}
return(result)
}
listResults <- unlist(lapply(dataStandard$datetime, FUN = function(x) result(x, data)))
# add the results vector to the dataStandard
dataStandard$result_value <- listResults
Upvotes: 1