Howdyouride
Howdyouride

Reputation: 175

R data.table update last values of parameters with some dynamics by id, using shift etc

Here's a data.table, with some parameters for each id by some regular quarterly dates. It's originally shuffled randomly, but, at first, let say, it is sorted by fab_date and id.

set.seed(1)
dt_to_fun <- data.table(fab_date = structure(c(18993, 19174, 19358, 19539, 
                            18993, 19174, 19358, 19539, 18993, 19174, 19358, 19539, 18993, 
                            19174, 19358, 19539, 18993, 19174, 19358, 19539), class = "Date"), 
     id = c("n_01", "n_01", "n_01", "n_01", "n_02", "n_02", "n_02", 
                 "n_02", "n_03", "n_03", "n_03", "n_03", "n_04", "n_04", "n_04", 
                 "n_04", "n_05", "n_05", "n_05", "n_05"), 
     param_01 = sample(c(10,20, 30, 40, 50), 20, replace = TRUE), 
     param_02 = sample(c(10,20, 30, 40, 50), 20, replace = TRUE), 
     param_03 = sample(c(10,20, 30, 40, 50), 20, replace = TRUE))

dt_to_fun

       fab_date   id param_01 param_02 param_03
 1: 2022-01-01 n_01       10       50       30
 2: 2022-07-01 n_01       40       20       20
 3: 2023-01-01 n_01       10       20       20
 4: 2023-07-01 n_01       20       10       50
 5: 2022-01-01 n_02       50       40       20
 6: 2022-07-01 n_02       30       10       10
 7: 2023-01-01 n_02       20       40       30
 8: 2023-07-01 n_02       30       30       30
 9: 2022-01-01 n_03       30       20       40
10: 2022-07-01 n_03       10       20       30
11: 2023-01-01 n_03       50       40       10
12: 2023-07-01 n_03       50       40       40
13: 2022-01-01 n_04       20       40       50
14: 2022-07-01 n_04       20       20       10
15: 2023-01-01 n_04       10       40       10
16: 2023-07-01 n_04       50       10       40
17: 2022-01-01 n_05       50       10       50
18: 2022-07-01 n_05       10       40       50
19: 2023-01-01 n_05       10       10       40
20: 2023-07-01 n_05       50       20       50
>

The goal is: for each id replace (last date) parameter value like this: param_01(last date) = param_01(last date) + param_01(-1 period) - param_01(-2 period) For example, for id n_05 last value of param_02 is 20, previous is 10, two times earlier is 40, so the result should be 20 + 10 - 40 = -10. The same for all id's and all param columns. The last date is '2023-07-01', so only parameters in rows with this date should be updated.

I managed to do this calculation, but the way I do it is applied for all dates, but it needs to be done only for the last date of each id. Here is the function:

quarterly_process_fun <- function(dt) {
  param_cols <- c("param_01", "param_02", "param_03")
  dt[, (param_cols) := lapply(
    .SD,
    \(x) (x + data.table::shift(x, n = 1L, fill = x[1L], type = "lag") - data.table::shift(x, n = 2L, fill = x[1L], type = "lag")
    )),
    by = .(id),
    .SDcols = param_cols
  ]
  return(dt)
} 
quarterly_process_fun(dt_to_fun)

and the result:

dt_to_fun
        fab_date   id param_01 param_02 param_03
     1: 2022-01-01 n_01       10       50       30
     2: 2022-07-01 n_01       40       20       20
     3: 2023-01-01 n_01       40      -10       10
     4: 2023-07-01 n_01      -10       10       50
     5: 2022-01-01 n_02       50       40       20
     6: 2022-07-01 n_02       30       10       10
     7: 2023-01-01 n_02        0       10       20
     8: 2023-07-01 n_02       20       60       50
     9: 2022-01-01 n_03       30       20       40
    10: 2022-07-01 n_03       10       20       30
    11: 2023-01-01 n_03       30       40        0
    12: 2023-07-01 n_03       90       60       20
    13: 2022-01-01 n_04       20       40       50
    14: 2022-07-01 n_04       20       20       10
    15: 2023-01-01 n_04       10       20      -30
    16: 2023-07-01 n_04       40       30       40
    17: 2022-01-01 n_05       50       10       50
    18: 2022-07-01 n_05       10       40       50
    19: 2023-01-01 n_05      -30       40       40
    20: 2023-07-01 n_05       50      -10       40

So, how can I adjust it, so that it's only calculates and replaces differences for last dates for each id?

The other question is, if this can be managed on a shuffled data?

    set.seed(1)
    dt_to_fun <- data.table(fab_date = structure(c(18993, 19174, 19358, 19539, 
                                18993, 19174, 19358, 19539, 18993, 19174, 19358, 19539, 18993, 
                                19174, 19358, 19539, 18993, 19174, 19358, 19539), class = "Date"), 
         id = c("n_01", "n_01", "n_01", "n_01", "n_02", "n_02", "n_02", 
                     "n_02", "n_03", "n_03", "n_03", "n_03", "n_04", "n_04", "n_04", 
                     "n_04", "n_05", "n_05", "n_05", "n_05"), 
         param_01 = sample(c(10,20, 30, 40, 50), 20, replace = TRUE), 
         param_02 = sample(c(10,20, 30, 40, 50), 20, replace = TRUE), 
         param_03 = sample(c(10,20, 30, 40, 50), 20, replace = TRUE))
    
#shuffle rows
dt_to_fun <- dt_to_fun[sample(nrow(dt_to_fun)),]
     fab_date   id param_01 param_02 param_03
 1: 2023-07-01 n_03       50       40       40
 2: 2023-07-01 n_04       50       10       40
 3: 2022-01-01 n_01       10       50       30
 4: 2022-01-01 n_04       20       40       50
 5: 2022-01-01 n_02       50       40       20
 6: 2023-01-01 n_04       10       40       10
 7: 2022-07-01 n_02       30       10       10
 8: 2022-07-01 n_05       10       40       50
 9: 2022-01-01 n_03       30       20       40
10: 2023-01-01 n_02       20       40       30
11: 2023-01-01 n_03       50       40       10
12: 2023-01-01 n_01       10       20       20
13: 2022-07-01 n_04       20       20       10
14: 2022-07-01 n_01       40       20       20
15: 2022-07-01 n_03       10       20       30
16: 2023-07-01 n_05       50       20       50
17: 2023-07-01 n_01       20       10       50
18: 2023-07-01 n_02       30       30       30
19: 2023-01-01 n_05       10       10       40
20: 2022-01-01 n_05       50       10       50

Upvotes: 1

Views: 70

Answers (2)

langtang
langtang

Reputation: 24845

Here is another approach:

dt_to_fun[order(id,fab_date), lapply(.SD, \(v) {
  v[length(v)] <- sum(last(v,2)) -v[length(v)-2];v
}), id, .SDcols=patterns("^p")][,fab_date:=dt_to_fun[order(id,fab_date), fab_date]][]

Output:

      id param_01 param_02 param_03   fab_date
 1: n_01       10       50       30 2022-01-01
 2: n_01       40       20       20 2022-07-01
 3: n_01       10       20       20 2023-01-01
 4: n_01      -10       10       50 2023-07-01
 5: n_02       50       40       20 2022-01-01
 6: n_02       30       10       10 2022-07-01
 7: n_02       20       40       30 2023-01-01
 8: n_02       20       60       50 2023-07-01
 9: n_03       30       20       40 2022-01-01
10: n_03       10       20       30 2022-07-01
11: n_03       50       40       10 2023-01-01
12: n_03       90       60       20 2023-07-01
13: n_04       20       40       50 2022-01-01
14: n_04       20       20       10 2022-07-01
15: n_04       10       40       10 2023-01-01
16: n_04       40       30       40 2023-07-01
17: n_05       50       10       50 2022-01-01
18: n_05       10       40       50 2022-07-01
19: n_05       10       10       40 2023-01-01
20: n_05       50      -10       40 2023-07-01

Upvotes: 1

Roland
Roland

Reputation: 132969

quarterly_process_fun <- function(dt) {
  setorder(dt, id, fab_date)
  param_cols <- c("param_01", "param_02", "param_03")
  dt[, (param_cols) := lapply(
    .SD,
    \(x) c(head(x, -1), tail(x, 3) %*% c(-1, 1, 1))),
    by = .(id),
    .SDcols = param_cols
  ]
  return(dt[])
} 

quarterly_process_fun(dt_to_fun)
#       fab_date   id param_01 param_02 param_03
#  1: 2022-01-01 n_01       10       50       30
#  2: 2022-07-01 n_01       40       20       20
#  3: 2023-01-01 n_01       10       20       20
#  4: 2023-07-01 n_01      -10       10       50
#  5: 2022-01-01 n_02       50       40       20
#  6: 2022-07-01 n_02       30       10       10
#  7: 2023-01-01 n_02       20       40       30
#  8: 2023-07-01 n_02       20       60       50
#  9: 2022-01-01 n_03       30       20       40
# 10: 2022-07-01 n_03       10       20       30
# 11: 2023-01-01 n_03       50       40       10
# 12: 2023-07-01 n_03       90       60       20
# 13: 2022-01-01 n_04       20       40       50
# 14: 2022-07-01 n_04       20       20       10
# 15: 2023-01-01 n_04       10       40       10
# 16: 2023-07-01 n_04       40       30       40
# 17: 2022-01-01 n_05       50       10       50
# 18: 2022-07-01 n_05       10       40       50
# 19: 2023-01-01 n_05       10       10       40
# 20: 2023-07-01 n_05       50      -10       40

Upvotes: 2

Related Questions