Reputation: 21
I have a database with date variables by row. Each row includes a new admission date for each case. There is a unique id number for each case and repeated admissions are allowed. There is also another category variable. I ordered the database by id and grouping category. Then I subtracted every following admission from the previous one to estimate days between two admissions. This estimation created NA's for the first admission. I need cumulative sums for consecutive admissions to see the whole range.
A sample data frame is below:
ID Admission Date Cat Date_diff Admission_Rank
<dbl> <dttm> <dbl> <Period>
1326 2015-06-08 00:00:00 1 NA 1
1466 2017-11-27 00:00:00 1 NA 1
1656 2016-10-06 00:00:00 1 NA 1
1594 2015-04-20 00:00:00 2 NA 1
1347 2013-02-14 00:00:00 1 NA 1
1347 2013-03-18 00:00:00 1 32d 0H 0M 0S 2
1347 2013-04-03 00:00:00 1 16d 0H 0M 0S 3
1347 2013-08-15 00:00:00 1 134d 0H 0M 0S 4
1347 2014-05-16 00:00:00 1 274d 0H 0M 0S 5
1347 2014-05-16 00:00:00 1 0S 6
A sample data frame is below:
ID Admission Date Cat Date_diff Admission_Rank Cum_Sum
1326 2015-06-08 00:00:00 1 NA 1 NA
1466 2017-11-27 00:00:00 1 NA 1 NA
1656 2016-10-06 00:00:00 1 NA 1 NA
1594 2015-04-20 00:00:00 2 NA 1 NA
1347 2013-02-14 00:00:00 1 NA 1 NA
1347 2013-03-18 00:00:00 1 32d 0H 0M 0S 2 32d 0H 0M 0S
1347 2013-04-03 00:00:00 1 16d 0H 0M 0S 3 48d 0H 0M 0S
1347 2013-08-15 00:00:00 1 134d 0H 0M 0S 4 172d 0H 0M 0S
1347 2014-05-16 00:00:00 1 274d 0H 0M 0S 5 446d 0H 0M 0S
1347 2014-05-16 00:00:00 1 0S 6 446d 0H 0M 0S
Upvotes: 0
Views: 65
Reputation: 3535
Apart from the difference in formatting of the duration (below in seconds), does this do what you want to do?
library(tidyverse)
df <- tribble(
~id, ~admission_date, ~cat,
1326, "2015-06-08 00:00:00", 1,
1466, "2017-11-27 00:00:00", 1,
1656, "2016-10-06 00:00:00", 1,
1594, "2015-04-20 00:00:00", 2,
1347, "2013-02-14 00:00:00", 1,
1347, "2013-03-18 00:00:00", 1,
1347, "2013-04-03 00:00:00", 1,
1347, "2013-08-15 00:00:00", 1,
1347, "2014-05-16 00:00:00", 1,
1347, "2014-05-16 00:00:00", 1) |>
mutate(id = as.integer(id),
admission_date = as.POSIXct(admission_date),
cat = as.integer(cat))
df |>
group_by(id) |>
arrange(admission_date) |>
mutate(date_diff = admission_date - lag(admission_date),
cum_sum = cumsum(coalesce(date_diff, dseconds(0)))) |>
ungroup()
#> # A tibble: 10 × 5
#> id admission_date cat date_diff cum_sum
#> <int> <dttm> <int> <drtn> <dbl>
#> 1 1347 2013-02-14 00:00:00 1 NA secs 0
#> 2 1347 2013-03-18 00:00:00 1 2761200 secs 2761200
#> 3 1347 2013-04-03 00:00:00 1 1382400 secs 4143600
#> 4 1347 2013-08-15 00:00:00 1 11577600 secs 15721200
#> 5 1347 2014-05-16 00:00:00 1 23673600 secs 39394800
#> 6 1347 2014-05-16 00:00:00 1 0 secs 39394800
#> 7 1594 2015-04-20 00:00:00 2 NA secs 0
#> 8 1326 2015-06-08 00:00:00 1 NA secs 0
#> 9 1656 2016-10-06 00:00:00 1 NA secs 0
#> 10 1466 2017-11-27 00:00:00 1 NA secs 0
Created on 2023-05-16 with reprex v2.0.2
And here is a database version (code will vary slightly according to the backend):
library(tidyverse)
library(DBI)
library(dbplyr, warn.conflicts = FALSE)
db <- dbConnect(RPostgres::Postgres())
df <- tribble(
~id, ~admission_date, ~cat,
1326, "2015-06-08 00:00:00", 1,
1466, "2017-11-27 00:00:00", 1,
1656, "2016-10-06 00:00:00", 1,
1594, "2015-04-20 00:00:00", 2,
1347, "2013-02-14 00:00:00", 1,
1347, "2013-03-18 00:00:00", 1,
1347, "2013-04-03 00:00:00", 1,
1347, "2013-08-15 00:00:00", 1,
1347, "2014-05-16 00:00:00", 1,
1347, "2014-05-16 00:00:00", 1) |>
mutate(id = as.integer(id),
admission_date = as.POSIXct(admission_date),
cat = as.integer(cat))
df_db <- copy_to(db, df = df, overwrite = TRUE)
df_db |>
group_by(id) |>
window_order(admission_date) |>
mutate(date_diff = admission_date - lag(admission_date),
cum_sum = cumsum(coalesce(date_diff,
seconds(0)))) |>
ungroup() |>
compute() |>
arrange(cum_sum)
#> # Source: SQL [10 x 5]
#> # Database: postgres [iangow@/tmp:5432/iangow]
#> # Ordered by: cum_sum
#> id admission_date cat date_diff cum_sum
#> <int> <dttm> <int> <chr> <chr>
#> 1 1326 2015-06-08 04:00:00 1 <NA> 00:00:00
#> 2 1347 2013-02-14 05:00:00 1 <NA> 00:00:00
#> 3 1466 2017-11-27 05:00:00 1 <NA> 00:00:00
#> 4 1594 2015-04-20 04:00:00 2 <NA> 00:00:00
#> 5 1656 2016-10-06 04:00:00 1 <NA> 00:00:00
#> 6 1347 2013-03-18 04:00:00 1 31 days 23:00:00 31 days 23:00:00
#> 7 1347 2013-04-03 04:00:00 1 16 days 47 days 23:00:00
#> 8 1347 2013-08-15 04:00:00 1 134 days 181 days 23:00:00
#> 9 1347 2014-05-16 04:00:00 1 274 days 455 days 23:00:00
#> 10 1347 2014-05-16 04:00:00 1 00:00:00 455 days 23:00:00
Created on 2023-05-16 with reprex v2.0.2
Upvotes: 0