user11916948
user11916948

Reputation: 954

Name values in a column depending on previous values, no systematic order

I have a problem I don't know how to solve. I know how to code it when there is a system in it, for example every second row, but in this dataset there is no order. This is a example from a bigger dataset so it is not possible to make any system out of it. Appreciate help!

    id <- rep(c(300, 450),times=c(20,17))
    visit <- c(1,1,2,2,2,2,2,2,3,3,4,5,5,5,5,5,6,6,6,1,2,2,2,2,3,3,4,4,4,5,5,5,5,5,5,5,6)
    trt <- c(0,0,"A","A","A","A","A","A", 0,0, "B", 0, 0,0,0,0,"C", "C","C", 0, "B", "B","B","B", 0,0,"C", "C","C",0,0,0,0, 0, 0,0,"A")
    q1 <- c(4,6,10,11,14,11,15,19,3,2,7,4,5,3,4,1,4,5,4,3,6,7,3,4,5,4,5,4,3,3,4,2,6,5,4,3,18)
    df <- data.frame(id,visit, trt, q1)
    df
       id   visit trt q1
    1  300     1   0  4
    2  300     1   0  6
    3  300     2   A 10
    4  300     2   A 11
    5  300     2   A 14
    6  300     2   A 11
    7  300     2   A 15
    8  300     2   A 19
    9  300     3   0  3
    10 300     3   0  2
    11 300     4   B  7
    12 300     5   0  4
    13 300     5   0  5
    14 300     5   0  3
    15 300     5   0  4
    16 300     5   0  1
    17 300     6   C  4
    18 300     6   C  5
    19 300     6   C  4
    20 300     1   0  3
    21 450     2   B  6
    22 450     2   B  7
    23 450     2   B  3
    24 450     2   B  4
    25 450     3   0  5
    26 450     3   0  4
    27 450     4   C  5
    28 450     4   C  4
    29 450     4   C  3
    30 450     5   0  3
    31 450     5   0  4
    32 450     5   0  2
    33 450     5   0  6
    34 450     5   0  5
    35 450     5   0  4
    36 450     5   0  3
    37 450     6   A 18

It should look like:

    df
        id visit trt q1
    1  300     1  A0  4
    2  300     1  A0  6
    3  300     2   A 10
    4  300     2   A 11
    5  300     2   A 14
    6  300     2   A 11
    7  300     2   A 15
    8  300     2   A 19
    9  300     3  B0  3
    10 300     3  B0  2
    11 300     4   B  7
    12 300     5  C0  4
    13 300     5  C0  5
    14 300     5  C0  3
    15 300     5  C0  4
    16 300     5  C0  1
    17 300     6   C  4
    18 300     6   C  5
    19 300     6   C  4
    20 300     1  B0  3
    21 450     2   B  6
    22 450     2   B  7
    23 450     2   B  3
    24 450     2   B  4
    25 450     3  C0  5
    26 450     3  C0  4
    27 450     4   C  5
    28 450     4   C  4
    29 450     4   C  3
    30 450     5  A0  3
    31 450     5  A0  4
    32 450     5  A0  2
    33 450     5  A0  6
    34 450     5  A0  5
    35 450     5  A0  4
    36 450     5  A0  3
    37 450     6   A 18

Upvotes: 0

Views: 61

Answers (2)

user2974951
user2974951

Reputation: 10375

I think this is what you are looking for, replacing all the "0"'s with the next available letter and pasting "0" at the end.

    df$tmp=as.character(df$trt)

    for (i in 1:(nrow(df)-1)) {
      if (df$trt[i]=="0") {
        j=i+1
        while (df$trt[j]=="0" & j<=nrow(df)) {
          j=j+1
        }
        df$tmp[i]=paste0(df$trt[j],df$trt[i])
      }
    }

and the result in column tmp

    id visit trt q1 tmp
1  300     1   0  4  A0
2  300     1   0  6  A0
3  300     2   A 10   A
4  300     2   A 11   A
5  300     2   A 14   A
6  300     2   A 11   A
7  300     2   A 15   A
8  300     2   A 19   A
9  300     3   0  3  B0
10 300     3   0  2  B0
11 300     4   B  7   B
12 300     5   0  4  C0
13 300     5   0  5  C0
14 300     5   0  3  C0
15 300     5   0  4  C0
16 300     5   0  1  C0
17 300     6   C  4   C
18 300     6   C  5   C
19 300     6   C  4   C
20 300     1   0  3  B0
21 450     2   B  6   B
22 450     2   B  7   B
23 450     2   B  3   B
24 450     2   B  4   B
25 450     3   0  5  C0
26 450     3   0  4  C0
27 450     4   C  5   C
28 450     4   C  4   C
29 450     4   C  3   C
30 450     5   0  3  A0
31 450     5   0  4  A0
32 450     5   0  2  A0
33 450     5   0  6  A0
34 450     5   0  5  A0
35 450     5   0  4  A0
36 450     5   0  3  A0
37 450     6   A 18   A

Upvotes: 1

Ronak Shah
Ronak Shah

Reputation: 389047

We can use dplyr and tidyr::fill. Replace 0 values in trt to NA, use fill in "up" direction and paste the 0's with the filled values.

library(dplyr)

df %>%
  mutate(trt1 = replace(trt, trt == 0, NA)) %>%
  tidyr::fill(trt1, .direction = "up") %>%
  mutate(trt1 = ifelse(trt == 0, paste0(trt1, trt), trt))

#    id visit trt q1 trt1
#1  300     1   0  4   A0
#2  300     1   0  6   A0
#3  300     2   A 10    A
#4  300     2   A 11    A
#5  300     2   A 14    A
#6  300     2   A 11    A
#7  300     2   A 15    A
#8  300     2   A 19    A
#9  300     3   0  3   B0
#10 300     3   0  2   B0
#....

data

Reading data as characters and not factors by using stringsAsFactors = FALSE.

df <- data.frame(id,visit, trt, q1, stringsAsFactors = FALSE)

Upvotes: 0

Related Questions