Adam
Adam

Reputation: 453

Expand dataset long ways using start and stop dates

I have a dataset that looks like the following:

      ID  RECN EXSTDAT    EXSTPDAT   EXONGO EX2LD DOSEA DOSFRM  DOSFRQ ADURN STUDYST    EXSTDAY EXSTPDAY
   <int> <dbl> <date>     <date>      <dbl> <dbl> <dbl> <chr>   <chr>  <dbl> <date>       <dbl>    <dbl>
 1     1     1 2022-07-08 2022-07-27      0     0    50 Capsule QD        19 2022-07-08       0       19
 2     1     2 2022-07-28 2022-08-14      0     1    50 Capsule QD        17 2022-07-08      20       37
 3     2     2 2022-06-09 2022-06-09      0     0    50 Capsule QD         0 2022-06-09       0        0
 4     2     1 2022-06-14 2022-08-02      0     0    50 Capsule QD        49 2022-06-09       5       54
 5     2     3 2022-08-03 2022-08-14      0     0     0 Capsule QD        11 2022-06-09      55       66
 6     2     5 2022-08-15 2022-09-26      0     0    50 Capsule QD        42 2022-06-09      67      109
 7     2     4 2022-09-27 2023-02-15      1     0   100 Capsule QD       141 2022-06-09     110      251
 8     3     1 2022-06-30 2022-08-03      0     1    50 Capsule QD        34 2022-06-30       0       34
 9     4     1 2022-08-24 2022-10-04      0     1   100 Capsule QD        41 2022-08-24       0       41
10     5     1 2022-12-30 2023-01-19      0     1   200 Capsule QD        20 2022-12-30       0       20

I would like to generate an observation for each day between the intervals of EXSTDAY and EXSTPDAY, keeping ID, DOSEA, DOSFRM, and DOSFRQ. Below is an example of the desired results for up to study day (STDAY <= 8) for ID 1 & 2:

ID  DOSEA   DOSFRM  DOSFRQ  STDAY
1   50      Capsule QD      0
1   50      Capsule QD      1
1   50      Capsule QD      2
1   50      Capsule QD      3
1   50      Capsule QD      4
1   50      Capsule QD      5 
1   50      Capsule QD      6
1   50      Capsule QD      7 
1   50      Capsule QD      8
2   50      Capsule QD      0
2   50      Capsule QD      5
2   50      Capsule QD      6
2   50      Capsule QD      7
2   50      Capsule QD      8 

I have no idea where to start, so any advice is much appreciated!

dput of original dataset:

structure(list(ID = c(1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 4L, 5L, 
6L, 7L, 8L, 8L, 9L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 
12L, 13L, 14L, 14L, 14L, 14L, 14L, 14L), RECN = c(1, 2, 2, 1, 
3, 5, 4, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 3, 4, 1, 2, 3, 1, 2, 
1, 1, 2, 3, 4, 5, 6), EXSTDAT = structure(c(19181, 19201, 19152, 
19157, 19207, 19219, 19262, 19173, 19228, 19356, 19356, 19377, 
19303, 19326, 19363, 19216, 19220, 19346, 19362, 19365, 19264, 
19277, 19282, 19219, 19226, 19310, 19345, 19351, 19352, 19354, 
19355, 19370), class = "Date"), EXSTPDAT = structure(c(19200, 
19218, 19152, 19206, 19218, 19261, 19403, 19207, 19269, 19376, 
19376, 19403, 19325, 19328, 19383, 19216, 19361, 19366, 19364, 
19403, 19275, 19281, 19403, 19225, 19226, 19338, 19350, 19351, 
19353, 19354, 19369, 19370), class = "Date"), EXONGO = c(0, 0, 
0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 
0, 0, 0, 0, 0, 0, 0, 0, 0), EX2LD = c(0, 1, 0, 0, 0, 0, 0, 1, 
1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 
0, 0, 0), DOSEA = c(50, 50, 50, 50, 0, 50, 100, 50, 100, 200, 
200, 100, 100, 100, 200, 100, 100, 100, 100, 100, 100, 100, 100, 
50, 0, 100, 200, 0, 200, 0, 200, 0), DOSFRM = c("Capsule", "Capsule", 
"Capsule", "Capsule", "Capsule", "Capsule", "Capsule", "Capsule", 
"Capsule", "Capsule", "Capsule", "Capsule", "Capsule", "Tablet", 
"Capsule", "Capsule", "Capsule", "Capsule", "Tablet", "Tablet", 
"Tablet", "Tablet", "Tablet", "Capsule", "Capsule", "Capsule", 
"Capsule", "Capsule", "Capsule", "Capsule", "Capsule", "Capsule"
), DOSFRQ = c("QD", "QD", "QD", "QD", "QD", "QD", "QD", "QD", 
"QD", "QD", "QD", "QD", "QD", "QD", "QD", "QD", "QD", "QD", "QD", 
"QD", "QD", "QD", "QD", "QD", "QD", "QD", "QD", "QD", "QD", "QD", 
"QD", "QD"), ADURN = c(19, 17, 0, 49, 11, 42, 141, 34, 41, 20, 
20, 26, 22, 2, 20, 0, 141, 20, 2, 38, 11, 4, 121, 6, 0, 28, 5, 
0, 1, 0, 14, 0), STUDYST = structure(c(19181, 19181, 19152, 19152, 
19152, 19152, 19152, 19173, 19228, 19356, 19356, 19377, 19303, 
19303, 19363, 19216, 19216, 19216, 19216, 19216, 19264, 19264, 
19264, 19219, 19219, 19310, 19345, 19345, 19345, 19345, 19345, 
19345), class = "Date"), EXSTDAY = c(0, 20, 0, 5, 55, 67, 110, 
0, 0, 0, 0, 0, 0, 23, 0, 0, 4, 130, 146, 149, 0, 13, 18, 0, 7, 
0, 0, 6, 7, 9, 10, 25), EXSTPDAY = c(19, 37, 0, 54, 66, 109, 
251, 34, 41, 20, 20, 26, 22, 25, 20, 0, 145, 150, 148, 187, 11, 
17, 139, 6, 7, 28, 5, 6, 8, 9, 24, 25)), row.names = c(NA, -32L
), class = c("tbl_df", "tbl", "data.frame"))

Upvotes: 1

Views: 52

Answers (2)

Ben
Ben

Reputation: 30474

If you want to generate a sequence between EXSTDAY and EXSTPDAY one approach could be using map2 from purrr as follows. The final select will indicate which columns you wish to retain in the end.

library(tidyverse)

df %>%
  mutate(STDAY = map2(EXSTDAY, EXSTPDAY, seq)) %>%
  unnest(STDAY) %>%
  select(ID, DOSEA, DOSFRM, DOSFRQ, STDAY) 

Output

      ID DOSEA DOSFRM  DOSFRQ STDAY
   <int> <dbl> <chr>   <chr>  <int>
 1     1    50 Capsule QD         0
 2     1    50 Capsule QD         1
 3     1    50 Capsule QD         2
 4     1    50 Capsule QD         3
 5     1    50 Capsule QD         4
 6     1    50 Capsule QD         5
 7     1    50 Capsule QD         6
 8     1    50 Capsule QD         7
 9     1    50 Capsule QD         8
10     1    50 Capsule QD         9
# … with 877 more rows

Upvotes: 0

Allan Cameron
Allan Cameron

Reputation: 173803

If I understand you correctly, you want one row in your data frame for each day that each subject was participating in your study. If that's the case then you can do:

library(tidyverse)

df %>%
  rowwise() %>%
  reframe(ID, DOSEA, DOSFRM, DOSFRQ,
          STDAY = as.numeric(if(EXSTPDAT - EXSTDAT == 0) EXSTDAT - STUDYST else
                    seq(EXSTPDAT - EXSTDAT) + EXSTDAT - STUDYST - 1)) 
#># A tibble: 861 x 5
#>      ID DOSEA DOSFRM  DOSFRQ STDAY
#>   <int> <dbl> <chr>   <chr>  <dbl>
#> 1     1    50 Capsule QD         0
#> 2     1    50 Capsule QD         1
#> 3     1    50 Capsule QD         2
#> 4     1    50 Capsule QD         3
#> 5     1    50 Capsule QD         4
#> 6     1    50 Capsule QD         5
#> 7     1    50 Capsule QD         6
#> 8     1    50 Capsule QD         7
#> 9     1    50 Capsule QD         8
#>10     1    50 Capsule QD         9
#># ... with 851 more rows
#># i Use `print(n = ...)` to see more rows

Upvotes: 2

Related Questions