Reputation: 453
I have a dataset that looks like the following:
ID RECN EXSTDAT EXSTPDAT EXONGO EX2LD DOSEA DOSFRM DOSFRQ ADURN STUDYST EXSTDAY EXSTPDAY
<int> <dbl> <date> <date> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <date> <dbl> <dbl>
1 1 1 2022-07-08 2022-07-27 0 0 50 Capsule QD 19 2022-07-08 0 19
2 1 2 2022-07-28 2022-08-14 0 1 50 Capsule QD 17 2022-07-08 20 37
3 2 2 2022-06-09 2022-06-09 0 0 50 Capsule QD 0 2022-06-09 0 0
4 2 1 2022-06-14 2022-08-02 0 0 50 Capsule QD 49 2022-06-09 5 54
5 2 3 2022-08-03 2022-08-14 0 0 0 Capsule QD 11 2022-06-09 55 66
6 2 5 2022-08-15 2022-09-26 0 0 50 Capsule QD 42 2022-06-09 67 109
7 2 4 2022-09-27 2023-02-15 1 0 100 Capsule QD 141 2022-06-09 110 251
8 3 1 2022-06-30 2022-08-03 0 1 50 Capsule QD 34 2022-06-30 0 34
9 4 1 2022-08-24 2022-10-04 0 1 100 Capsule QD 41 2022-08-24 0 41
10 5 1 2022-12-30 2023-01-19 0 1 200 Capsule QD 20 2022-12-30 0 20
I would like to generate an observation for each day between the intervals of EXSTDAY and EXSTPDAY, keeping ID, DOSEA, DOSFRM, and DOSFRQ. Below is an example of the desired results for up to study day (STDAY <= 8) for ID 1 & 2:
ID DOSEA DOSFRM DOSFRQ STDAY
1 50 Capsule QD 0
1 50 Capsule QD 1
1 50 Capsule QD 2
1 50 Capsule QD 3
1 50 Capsule QD 4
1 50 Capsule QD 5
1 50 Capsule QD 6
1 50 Capsule QD 7
1 50 Capsule QD 8
2 50 Capsule QD 0
2 50 Capsule QD 5
2 50 Capsule QD 6
2 50 Capsule QD 7
2 50 Capsule QD 8
I have no idea where to start, so any advice is much appreciated!
dput
of original dataset:
structure(list(ID = c(1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 8L, 9L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 12L,
12L, 13L, 14L, 14L, 14L, 14L, 14L, 14L), RECN = c(1, 2, 2, 1,
3, 5, 4, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 3, 4, 1, 2, 3, 1, 2,
1, 1, 2, 3, 4, 5, 6), EXSTDAT = structure(c(19181, 19201, 19152,
19157, 19207, 19219, 19262, 19173, 19228, 19356, 19356, 19377,
19303, 19326, 19363, 19216, 19220, 19346, 19362, 19365, 19264,
19277, 19282, 19219, 19226, 19310, 19345, 19351, 19352, 19354,
19355, 19370), class = "Date"), EXSTPDAT = structure(c(19200,
19218, 19152, 19206, 19218, 19261, 19403, 19207, 19269, 19376,
19376, 19403, 19325, 19328, 19383, 19216, 19361, 19366, 19364,
19403, 19275, 19281, 19403, 19225, 19226, 19338, 19350, 19351,
19353, 19354, 19369, 19370), class = "Date"), EXONGO = c(0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0), EX2LD = c(0, 1, 0, 0, 0, 0, 0, 1,
1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0), DOSEA = c(50, 50, 50, 50, 0, 50, 100, 50, 100, 200,
200, 100, 100, 100, 200, 100, 100, 100, 100, 100, 100, 100, 100,
50, 0, 100, 200, 0, 200, 0, 200, 0), DOSFRM = c("Capsule", "Capsule",
"Capsule", "Capsule", "Capsule", "Capsule", "Capsule", "Capsule",
"Capsule", "Capsule", "Capsule", "Capsule", "Capsule", "Tablet",
"Capsule", "Capsule", "Capsule", "Capsule", "Tablet", "Tablet",
"Tablet", "Tablet", "Tablet", "Capsule", "Capsule", "Capsule",
"Capsule", "Capsule", "Capsule", "Capsule", "Capsule", "Capsule"
), DOSFRQ = c("QD", "QD", "QD", "QD", "QD", "QD", "QD", "QD",
"QD", "QD", "QD", "QD", "QD", "QD", "QD", "QD", "QD", "QD", "QD",
"QD", "QD", "QD", "QD", "QD", "QD", "QD", "QD", "QD", "QD", "QD",
"QD", "QD"), ADURN = c(19, 17, 0, 49, 11, 42, 141, 34, 41, 20,
20, 26, 22, 2, 20, 0, 141, 20, 2, 38, 11, 4, 121, 6, 0, 28, 5,
0, 1, 0, 14, 0), STUDYST = structure(c(19181, 19181, 19152, 19152,
19152, 19152, 19152, 19173, 19228, 19356, 19356, 19377, 19303,
19303, 19363, 19216, 19216, 19216, 19216, 19216, 19264, 19264,
19264, 19219, 19219, 19310, 19345, 19345, 19345, 19345, 19345,
19345), class = "Date"), EXSTDAY = c(0, 20, 0, 5, 55, 67, 110,
0, 0, 0, 0, 0, 0, 23, 0, 0, 4, 130, 146, 149, 0, 13, 18, 0, 7,
0, 0, 6, 7, 9, 10, 25), EXSTPDAY = c(19, 37, 0, 54, 66, 109,
251, 34, 41, 20, 20, 26, 22, 25, 20, 0, 145, 150, 148, 187, 11,
17, 139, 6, 7, 28, 5, 6, 8, 9, 24, 25)), row.names = c(NA, -32L
), class = c("tbl_df", "tbl", "data.frame"))
Upvotes: 1
Views: 52
Reputation: 30474
If you want to generate a seq
uence between EXSTDAY
and EXSTPDAY
one approach could be using map2
from purrr
as follows. The final select
will indicate which columns you wish to retain in the end.
library(tidyverse)
df %>%
mutate(STDAY = map2(EXSTDAY, EXSTPDAY, seq)) %>%
unnest(STDAY) %>%
select(ID, DOSEA, DOSFRM, DOSFRQ, STDAY)
Output
ID DOSEA DOSFRM DOSFRQ STDAY
<int> <dbl> <chr> <chr> <int>
1 1 50 Capsule QD 0
2 1 50 Capsule QD 1
3 1 50 Capsule QD 2
4 1 50 Capsule QD 3
5 1 50 Capsule QD 4
6 1 50 Capsule QD 5
7 1 50 Capsule QD 6
8 1 50 Capsule QD 7
9 1 50 Capsule QD 8
10 1 50 Capsule QD 9
# … with 877 more rows
Upvotes: 0
Reputation: 173803
If I understand you correctly, you want one row in your data frame for each day that each subject was participating in your study. If that's the case then you can do:
library(tidyverse)
df %>%
rowwise() %>%
reframe(ID, DOSEA, DOSFRM, DOSFRQ,
STDAY = as.numeric(if(EXSTPDAT - EXSTDAT == 0) EXSTDAT - STUDYST else
seq(EXSTPDAT - EXSTDAT) + EXSTDAT - STUDYST - 1))
#># A tibble: 861 x 5
#> ID DOSEA DOSFRM DOSFRQ STDAY
#> <int> <dbl> <chr> <chr> <dbl>
#> 1 1 50 Capsule QD 0
#> 2 1 50 Capsule QD 1
#> 3 1 50 Capsule QD 2
#> 4 1 50 Capsule QD 3
#> 5 1 50 Capsule QD 4
#> 6 1 50 Capsule QD 5
#> 7 1 50 Capsule QD 6
#> 8 1 50 Capsule QD 7
#> 9 1 50 Capsule QD 8
#>10 1 50 Capsule QD 9
#># ... with 851 more rows
#># i Use `print(n = ...)` to see more rows
Upvotes: 2