ira
ira

Reputation: 2644

How to shift values in rows of data.table based on value in the same row

I have a data.table like:

structure(list(level = c(1, 2, 1, 3, 1, 1), step_destination_step_1 = c(3105, 
2689, 1610, 4897, 129, 161), step_destination_step_2 = c(2689, 
3201, 6730, 3105, 2689, 673), step_destination_step_3 = c(2945, 
NA, NA, 1057, 2945, NA), step_destination_step_4 = c(NA, NA, 
NA, NA, 3201, NA)), row.names = c(NA, -6L), class = c("data.table", 
"data.frame"), .internal.selfref = <pointer: 0x000001a52ad81ef0>)

This looks like:

   level step_destination_step_1 step_destination_step_2 step_destination_step_3
1:     1                    3105                    2689                    2945
2:     2                    2689                    3201                      NA
3:     1                    1610                    6730                      NA
4:     3                    4897                    3105                    1057
5:     1                     129                    2689                    2945
6:     1                     161                     673                      NA
   step_destination_step_4
1:                      NA
2:                      NA
3:                      NA
4:                      NA
5:                    3201
6:                      NA

I would like to shift values in columns step_destination_step_* by level - 1. This will require adding at least some new columns to the data.table

I want to add NA values to the left of the numerical values whenever shift to the right happens.

So the resulting output might look like:

   level_1 level_2 level_3 level_4 level_5 level_6
1:    3105    2689    2945      NA      NA      NA
2:      NA    2689    3201      NA      NA      NA
3:    1610    6730      NA      NA      NA      NA
4:      NA      NA    4897    3105    1057      NA
5:     129    2689    2945    3201      NA      NA
6:     161     673      NA      NA      NA      NA

I can achieve this result by writing a for loop, which is definitely not the right way to go:

# create a placeholder data.table:
hold = data.table(
  level_1 = as.double(rep(NA, 6)), level_2 = as.double(rep(NA, 6)),
  level_3 = as.double(rep(NA, 6)), level_4 = as.double(rep(NA, 6)),
  level_5 = as.double(rep(NA, 6)), level_6 = as.double(rep(NA, 6))
  )

# loop over every row of the tables:

for (i in 1:6)
{
  hold[i, (test_out_2[i, level]):(test_out_2[i, level] + 3)] = test_out_2[i, 2:5]
}

Where test_out_2 is the name of the original data.table (just assign to it the output of dput provided at the top)

Upvotes: 1

Views: 364

Answers (2)

s_baldur
s_baldur

Reputation: 33498

In base R you could do:

nlvls <- 6L
test <- t(apply(
      DT, 
      1, 
      function(x) {
        out <- rep(NA_real_, nlvls)
        input <- x[-1][!is.na(x[-1])]
        out[seq_along(input) + x[1] - 1L] <- input
        out
      }))
test

     [,1] [,2] [,3] [,4] [,5] [,6]
[1,] 3105 2689 2945   NA   NA   NA
[2,]   NA 2689 3201   NA   NA   NA
[3,] 1610 6730   NA   NA   NA   NA
[4,]   NA   NA 4897 3105 1057   NA
[5,]  129 2689 2945 3201   NA   NA
[6,]  161  673   NA   NA   NA   NA

And playing around with data.table:

DT[, c(rep(NA_real_, .SD[["level"]] - 1L), unlist(.SD)[-1]), by = .(row = seq_len(nrow(DT)))
   ][, dcast(.SD, row ~ paste0("level_", rowid(row)), value.var = "V1")]


   row level_1 level_2 level_3 level_4 level_5 level_6
1:   1    3105    2689    2945      NA      NA      NA
2:   2      NA    2689    3201      NA      NA      NA
3:   3    1610    6730      NA      NA      NA      NA
4:   4      NA      NA    4897    3105    1057      NA
5:   5     129    2689    2945    3201      NA      NA
6:   6     161     673      NA      NA      NA      NA

Upvotes: 0

chinsoon12
chinsoon12

Reputation: 25225

A possible approach:

library(data.table)
#convert into long format
mDT <- melt(setDT(DT)[, rn:=.I], id.vars=c("rn", "level"))

#pivot into desired output
dcast(
    #pad the front with NA depending on level
    mDT[, .(lvl=c(rep(NA_integer_, level[1L]-1L), value)), by=.(rn)],
    rn ~ rowid(rn),
    value.var="lvl")[, -"rn"]

output:

      1    2    3    4    5  6
1: 3105 2689 2945   NA   NA NA
2:   NA 2689 3201   NA   NA NA
3: 1610 6730   NA   NA   NA NA
4:   NA   NA 4897 3105 1057 NA
5:  129 2689 2945 3201   NA NA
6:  161  673   NA   NA   NA NA

data:

DT <- structure(list(level = c(1, 2, 1, 3, 1, 1), step_destination_step_1 = c(3105,
    2689, 1610, 4897, 129, 161), step_destination_step_2 = c(2689,
        3201, 6730, 3105, 2689, 673), step_destination_step_3 = c(2945,
            NA, NA, 1057, 2945, NA), step_destination_step_4 = c(NA, NA,
                NA, NA, 3201, NA)), row.names = c(NA, -6L), class = c("data.table",
                    "data.frame"))

Upvotes: 2

Related Questions