Jd Baba
Jd Baba

Reputation: 6118

Combine two datasets in R using the time tolerance

I have two dataframes as follows:

testdata1 <- structure(list(datetime = structure(c(1413384900, 1413384540, 
1426427880, 1426431120, 1441907100, 1441906380, 1439311500, 1439312400, 
1418223480, 1418223600, 1439391360, 1439393580, 1441978260, 1441978440, 
1418140200, 1418135400, 1418239800, 1418241000, 1429543500, 1429541400, 
1418154300, 1418154300, 1413387240, 1413387600, 1426347900, 1426349700, 
1441904580, 1441904160, 1408544520, 1408545300, 1429618080, 1429618920, 
1429620420, 1429621260, 1439305680, 1439305920, 1441978440, 1426423980, 
1426424280, 1441980180, 1429545720, 1429546740, 1439389920, 1439390160, 
1408545900, 1408546320, 1426338060, 1426338360), class = c("POSIXct", 
"POSIXt"), tzone = "America/New_York"), result_value = c(10, 
13, 17, 9.6, 11, 12, 6.2, 8.5, 9.8, 20, 9.6, 6.3, 25, 26, 33, 
22, 9.8, 30, 15, 13, 14, 25, 8, 9.5, 24, 18, 14, 17, 28, 28, 
16, 8.3, 13, 14, 12, 7, 18, 17, 14, 15, 58, 17, 5, 12, 15, 28, 
21, 40)), .Names = c("datetime", "result_value"), class = "data.frame", row.names = c(NA, 
-48L))
> head(testdata1)
             datetime result_value
1 2014-10-15 10:55:00         10.0
2 2014-10-15 10:49:00         13.0
3 2015-03-15 09:58:00         17.0
4 2015-03-15 10:52:00          9.6
5 2015-09-10 13:45:00         11.0
6 2015-09-10 13:33:00         12.0
testdata2 <- structure(list(datetime = structure(c(1410300000, 1410300600, 
1410301200, 1410301800, 1410302400, 1410303000, 1410303600, 1410304200, 
1410304800, 1410305400, 1410306000, 1410306600, 1410307200, 1410307800, 
1410308400, 1410309000, 1410309600, 1410310200, 1410310800, 1410311400, 
1410312000, 1410312600, 1410313200, 1410313800, 1410314400, 1410315000, 
1410315600, 1410316200, 1410316800, 1410317400, 1410318000, 1410318600, 
1410319200, 1410319800, 1410320400, 1410321000, 1410321600, 1410322200, 
1410322800, 1410323400, 1410324000, 1410324600, 1410325200, 1410325800, 
1410326400, 1410327000, 1410327600, 1410328200, 1410328800, 1410329400, 
1410330000, 1410330600, 1410331200, 1410331800, 1410332400, 1410333000, 
1410333600, 1410334200, 1410334800, 1410335400, 1410336000, 1410336600, 
1410337200, 1410337800, 1410338400, 1410339000, 1410339600, 1410340200, 
1410340800, 1410341400, 1410342000, 1410342600, 1410343200, 1410343800, 
1410344400, 1410345000, 1410345600, 1410346200, 1410346800, 1410347400, 
1410348000, 1410348600, 1410349200, 1410349800, 1410350400, 1410351000, 
1410351600, 1410352200, 1410352800, 1410353400, 1410354000, 1410354600, 
1410355200, 1410355800, 1410356400, 1410357000, 1410357600, 1410358200, 
1410358800, 1410359400, 1410360000, 1410360600, 1410361200, 1410361800, 
1410362400, 1410363000, 1410363600, 1410364200, 1410364800, 1410365400, 
1410366000, 1410366600, 1410367200, 1410367800, 1410368400, 1410369000, 
1410369600, 1410370200, 1410370800, 1410371400, 1410372000, 1410372600, 
1410373200, 1410373800, 1410374400, 1410375000, 1410375600, 1410376200, 
1410376800, 1410377400, 1410378000, 1410378600, 1410379200, 1410379800, 
1410380400, 1410381000, 1410381600, 1410382200, 1410382800, 1410383400, 
1410384000, 1410384600, 1410385200, 1410385800, 1410386400, 1410387000, 
1410387600, 1410388200, 1410388800, 1410389400, 1410390000, 1410390600, 
1410391200, 1410391800, 1410392400, 1410393000, 1410393600, 1410394200, 
1410394800, 1410395400, 1410396000, 1410396600, 1410397200, 1410397800, 
1410398400, 1410399000, 1410399600, 1410400200, 1410400800, 1410401400, 
1410402000, 1410402600, 1410403200, 1410403800, 1410404400, 1410405000, 
1410405600, 1410406200, 1410406800, 1410407400, 1410408000, 1410408600, 
1410409200, 1410409800, 1410410400, 1410411000, 1410411600, 1410412200, 
1410412800, 1410413400, 1410414000, 1410414600, 1410415200, 1410415800, 
1410416400, 1410417000, 1410417600, 1410418200, 1410418800, 1410419400, 
1410420000, 1410420600, 1410421200, 1410421800, 1410422400, 1410423000, 
1410423600, 1410424200, 1410424800, 1410425400, 1410426000, 1410426600, 
1410427200, 1410427800, 1410428400, 1410429000, 1410429600, 1410430200, 
1410430800, 1410431400, 1410432000, 1410432600, 1410433200, 1410433800, 
1410434400, 1410435000, 1410435600, 1410436200, 1410436800, 1410437400, 
1410438000, 1410438600, 1410439200, 1410439800, 1410440400, 1410441000, 
1410441600, 1410442200, 1410442800, 1410443400, 1410444000, 1410444600, 
1410445200, 1410445800, 1410446400, 1410447000, 1410447600, 1410448200, 
1410448800, 1410449400, 1410450000, 1410450600, 1410451200, 1410451800, 
1410452400, 1410453000), class = c("POSIXct", "POSIXt"), tzone = "America/New_York"), 
    `1.07m` = c(52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 
    52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 
    52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 
    52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 
    52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 
    52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 
    52L, 52L, 52L, 52L, 52L, 52L, 53L, 52L, 53L, 53L, 52L, 53L, 
    53L, 53L, 53L, 53L, 53L, 53L, 53L, 58L, 58L, 57L, 61L, 58L, 
    56L, 56L, 56L, 55L, 55L, 58L, 57L, 58L, 58L, 58L, 58L, 58L, 
    58L, 58L, 58L, 58L, 58L, 58L, 58L, 58L, 57L, 58L, 58L, 58L, 
    57L, 58L, 57L, 57L, 57L, 57L, 56L, 55L, 56L, 56L, 55L, 56L, 
    56L, 55L, 55L, 56L, 55L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 
    56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 
    56L, 56L, 56L, 56L, 56L, 56L, 57L, 56L, 56L, 56L, 56L, 56L, 
    56L, 56L, 56L, 54L, 54L, 54L, 54L, 54L, 55L, 55L, 55L, 55L, 
    55L, 54L, 54L, 55L, 55L, 55L, 54L, 55L, 55L, 55L, 55L, 55L, 
    55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L, 54L, 54L, 55L, 55L, 
    55L, 55L, 55L, 55L, 55L, 55L, 54L, 55L, 55L, 55L, 55L, 54L, 
    54L, 54L, 54L, 54L, 54L, 54L, 57L, 57L, 57L, 57L, 57L, 57L, 
    57L, 56L, 57L, 58L, 57L, 57L, 58L, 58L, 58L, 57L, 58L, 58L, 
    57L, 56L, 55L, 65L, 88L, 134L, 166L, 165L, 166L, 166L, 165L, 
    162L, 164L, 163L, 162L, 163L, 161L, 160L, 160L), `1.32m` = c(49L, 
    49L, 49L, 49L, 50L, 49L, 49L, 49L, 49L, 49L, 49L, 49L, 49L, 
    49L, 49L, 49L, 49L, 50L, 49L, 49L, 49L, 49L, 49L, 49L, 48L, 
    49L, 49L, 48L, 48L, 49L, 48L, 49L, 49L, 49L, 49L, 49L, 48L, 
    49L, 49L, 48L, 49L, 49L, 49L, 49L, 49L, 49L, 49L, 49L, 49L, 
    49L, 49L, 49L, 49L, 49L, 49L, 49L, 49L, 49L, 49L, 49L, 49L, 
    49L, 49L, 49L, 49L, 49L, 50L, 49L, 50L, 49L, 50L, 50L, 50L, 
    50L, 49L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 49L, 
    50L, 50L, 49L, 56L, 55L, 54L, 60L, 56L, 53L, 53L, 53L, 53L, 
    53L, 56L, 55L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 56L, 
    56L, 56L, 56L, 56L, 55L, 56L, 55L, 56L, 55L, 55L, 55L, 54L, 
    55L, 55L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 54L, 
    54L, 54L, 54L, 54L, 54L, 55L, 54L, 54L, 54L, 54L, 54L, 54L, 
    55L, 54L, 55L, 55L, 54L, 55L, 55L, 54L, 54L, 54L, 54L, 54L, 
    54L, 55L, 54L, 55L, 55L, 55L, 54L, 55L, 54L, 54L, 54L, 52L, 
    52L, 52L, 52L, 52L, 52L, 53L, 52L, 52L, 52L, 52L, 53L, 52L, 
    52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 52L, 
    52L, 52L, 53L, 53L, 52L, 52L, 53L, 52L, 53L, 53L, 53L, 53L, 
    52L, 52L, 52L, 53L, 52L, 52L, 52L, 53L, 52L, 52L, 52L, 53L, 
    53L, 52L, 54L, 55L, 55L, 55L, 55L, 56L, 54L, 55L, 55L, 56L, 
    56L, 56L, 56L, 56L, 56L, 55L, 56L, 56L, 56L, 55L, 54L, 64L, 
    88L, 136L, 164L, 162L, 162L, 163L, 162L, 159L, 160L, 159L, 
    158L, 159L, 158L, 159L, 160L)), .Names = c("datetime", "1.07m", 
"1.32m"), class = "data.frame", row.names = c(NA, -256L))
> head(testdata2)
             datetime 1.07m 1.32m
1 2014-09-09 18:00:00    52    49
2 2014-09-09 18:10:00    52    49
3 2014-09-09 18:20:00    52    49
4 2014-09-09 18:30:00    52    49
5 2014-09-09 18:40:00    52    50
6 2014-09-09 18:50:00    52    49

Now I need to combine testdata1 and testdata2 such that the difference between datetime in testdata1 and testdata2 cannot be larger than 15 minutes.

I am familiar with merge function but couldn't incorporate the tolerate on that condition.

Upvotes: 0

Views: 339

Answers (1)

carlo
carlo

Reputation: 131

I have a "possible" solution (more like an approach since I cannot verify because the test data provided seems to have no overlaps (i.e. no observation in testdata1 lies between +/- 15 mins of the observations in testdata2).

If you could provide a subset of the data with overlaps, that would be great!

This answer "joins" data from testdata1 to testdata2 (if that's what you need). Could easily be modified to obtain a data.frame that has variables: datetime.x from testdata1 datetime.y, from testdata2 ,1.07m, 1.32m, and result_value

data <- testdata1

dataStandard <- testdata2  # this contains the 'nice-looking/separated uniformly' time measurements

# this function takes a 'standardTime' from the dataStandard data.frame and returns the 'result' (closest to standardTime, if there exists such value in the +/- 15 min window)
result <- function(standardTime, data){
    # create search window
  low <- standardTime - minutes(15)
  high <- standardTime + minutes(15)

  # take subset of data that lies in the search window
  results <- data[low < data$datetime & data$datetime < high,]  

  # find the closest one, if it exists
  if (nrow(results) > 0) {
    result <- results$result[which.min(abs(difftime(results$datetime, standardTime)))]
  } else {
    result <- NA
  }
  return(result) 
  }


  listResults <- unlist(lapply(dataStandard$datetime, FUN = function(x) result(x, data)))

# add the results vector to the dataStandard 

dataStandard$result_value <- listResults

Upvotes: 1

Related Questions