user3969377
user3969377

Reputation:

Extracting a value from a row that has the closest date before or equal to the target date

In the language R, I have the following two data frames

sref_df

        unit        ft          event_time   cum_ft
7215  165755 0.0000000 01/03/2014 10:29:13 0.000000
7214  165755 0.0000000 01/06/2014 17:13:45 0.000000
7774  165755 0.0000000 01/09/2014 11:17:06 0.000000
8581  165755 0.0000000 01/10/2014 12:12:29 0.000000
10326 165755 1.2624167 01/10/2014 13:50:54 1.262417
7219  165755 1.0894306 01/10/2014 16:40:38 2.351847
7216  165755 0.0000000 01/11/2014 11:43:24 2.351847
2221  165755 0.0000000 01/12/2014 12:52:53 2.351847
1832  165755 1.0176389 01/13/2014 07:56:00 3.369486
1528  165755 0.9430278 01/13/2014 16:22:43 4.312514

event_df

        unit          event_time
8642  165755 01/03/2014 10:30:01
8643  165755 01/03/2014 10:31:01
8641  165755 01/06/2014 17:14:44
9318  165755 01/09/2014 11:17:49
10257 165755 01/10/2014 12:13:23
12333 165755 01/10/2014 13:51:48
8647  165755 01/10/2014 16:41:30
8644  165755 01/11/2014 11:44:06
2806  165755 01/12/2014 12:53:46
2292  165755 01/13/2014 07:56:54

Ref has different unit values, event only has one unit Ref has been sorted by unit and then by event_time For each row in the event data frame Extract cum_ft from reference data frame Where the event_time in the reference data frame is the closest prior or equal to event_time in the event data frame. Add the extracted cum_ft to the event_df

I'm trying the following, which doesn't run. I don't know how to write the "irow = " line.

bref_df <- data.frame(unit=integer(),ft=double(),
             event_time=as.Date(character()),
             cum_ft=double(),
             stringsAsFactors=FALSE) 
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.0000000, 
  event_time=strptime('01/03/2014 10:29:13',format='%m/%d/%Y %H:%M:%S'), cum_ft = 0.000000))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.0000000, 
  event_time=strptime('01/06/2014 17:13:45',format='%m/%d/%Y %H:%M:%S'), cum_ft = 0.000000))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.0000000, 
  event_time=strptime('01/09/2014 11:17:06',format='%m/%d/%Y %H:%M:%S'), cum_ft = 0.000000))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.0000000, 
  event_time=strptime('01/10/2014 12:12:29',format='%m/%d/%Y %H:%M:%S'), cum_ft = 0.000000))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 1.2624167, 
  event_time=strptime('01/10/2014 13:50:54',format='%m/%d/%Y %H:%M:%S'), cum_ft = 1.262417))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 1.0894306, 
  event_time=strptime('01/10/2014 16:40:38',format='%m/%d/%Y %H:%M:%S'), cum_ft = 2.351847))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.0000000, 
  event_time=strptime('01/11/2014 11:43:24',format='%m/%d/%Y %H:%M:%S'), cum_ft = 2.351847))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.0000000, 
  event_time=strptime('01/12/2014 12:52:53',format='%m/%d/%Y %H:%M:%S'), cum_ft = 2.351847))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 1.0176389, 
  event_time=strptime('01/13/2014 07:56:00',format='%m/%d/%Y %H:%M:%S'), cum_ft = 3.369486))
bref_df <- rbind(bref_df , data.frame(unit = 165755, ft = 0.9430278, 
  event_time=strptime('01/13/2014 16:22:43',format='%m/%d/%Y %H:%M:%S'), cum_ft = 4.312514))

eref_df <- data.frame(unit=integer(),ft=double(),
                 event_time=as.Date(character()),
                 stringsAsFactors=FALSE) 
eref_df <- rbind(eref_df , data.frame(unit = 165755,  event_time=strptime('01/03/2014 10:30:01',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755,  event_time=strptime('01/03/2014 10:31:01',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755,  event_time=strptime('01/06/2014 17:14:44',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755,  event_time=strptime('01/09/2014 11:17:49',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755,  event_time=strptime('01/10/2014 12:13:23',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755,  event_time=strptime('01/10/2014 13:51:48',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755,  event_time=strptime('01/10/2014 16:41:30',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755,  event_time=strptime('01/11/2014 11:44:06',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755,  event_time=strptime('01/12/2014 12:53:46',format='%m/%d/%Y %H:%M:%S')))
eref_df <- rbind(eref_df , data.frame(unit = 165755,  event_time=strptime('01/13/2014 07:56:54',format='%m/%d/%Y %H:%M:%S')))

sref_df<-bref_df[with(bref_df, order(unit, event_time)), ]
print(sref_df)

uUnit = 165755
event_df=eref_df[eref_df$unit==uUnit,]
sevent_df=eref_df[with(event_df, order(event_time)), ]
print(sevent_df)

for (iTime in seq(sevent_df$event_time)) {
  aTime = sevent_df$event_time[iTime]
  irow = which(max(sref_df$event_time[sref_df$event_time<=aTime]))
  sevent_df$matchRow[iTime] = irow 
  sevent_df$cum_ft[iTime] = sref_df$cum_ft[irow]
}

print(sevent_df)

The desired output is

index   unit    event_time  match   cum_ft
1   165755  1/3/2014 10:30  1   0
2   165755  1/3/2014 10:31  1   0
3   165755  1/6/2014 17:14  2   0
4   165755  1/9/2014 11:17  3   0
5   165755  1/10/2014 12:13 4   0
6   165755  1/10/2014 13:51 5   1.262417
7   165755  1/10/2014 16:41 6   2.351847
8   165755  1/11/2014 11:44 7   2.351847
9   165755  1/12/2014 12:53 8   2.351847
10  165755  1/13/2014 7:56  9   3.369486

event_df has 24600 rows of search criteria (event_time and unit) to match.
sref_df has 20600 rows containing the event_time and unit to search through for the matching unit and closest prior event_time in order to to extract the matching row and cum_ft

Upvotes: 0

Views: 855

Answers (1)

Jota
Jota

Reputation: 17621

Here's one way:

diff_matrix <- sapply(event_df$event_time, function(x) x-sref_df$event_time)
diff_matrix[diff_matrix < 0] <- NA

event_df$cum_ft <- 
  sref_df$cum_ft[apply(diff_matrix, 2, function(x) which(x == min(x, na.rm=TRUE)))]

#        unit          event_time   cum_ft
#8642  165755 2014-01-03 10:30:01 0.000000
#8643  165755 2014-01-03 10:31:01 0.000000
#8641  165755 2014-01-06 17:14:44 0.000000
#9318  165755 2014-01-09 11:17:49 0.000000
#10257 165755 2014-01-10 12:13:23 0.000000
#12333 165755 2014-01-10 13:51:48 1.262417
#8647  165755 2014-01-10 16:41:30 2.351847
#8644  165755 2014-01-11 11:44:06 2.351847
#2806  165755 2014-01-12 12:53:46 2.351847
#2292  165755 2014-01-13 07:56:54 3.369486

You can add the match column you have in your desired output like so:

event_df$match <- apply(diff_matrix, 2, function(x) which(x == min(x, na.rm=TRUE)))

Upvotes: 1

Related Questions