bvowe
bvowe

Reputation: 3394

R DPLYR Fill Down Values

I have df with ID, X1 and wish to create WANT, which adds ‘3’ to the next value when X1 == NA.

library(dplyr)

df <-
  tibble(
    ID = c(1, 1, 1, 1, 2, 2, 3, 3, 3),
    X1 = c(5, NA, 11, NA, 7, NA, 4, 7, 10),
    WANT = c(5, 8, 11, 14, 7, 10, 4, 7, 10)
  )

The desired outcome is in column WANT:

df
#> # A tibble: 9 × 3
#>      ID    X1  WANT
#>   <dbl> <dbl> <dbl>
#> 1     1     5     5
#> 2     1    NA     8
#> 3     1    NA    11
#> 4     1    NA    14
#> 5     2     7     7
#> 6     2    NA    10
#> 7     3     4     4
#> 8     3     7     7
#> 9     3    10    10

Here is what I tried:

df %>%
  group_by(ID) %>%
  mutate(WANT = ifelse(is.na(X1), lead(X1) + 3, X1)) %>%
  ungroup()
#> # A tibble: 9 × 3
#>      ID    X1  WANT
#>   <dbl> <dbl> <dbl>
#> 1     1     5     5
#> 2     1    NA    NA
#> 3     1    NA    NA
#> 4     1    NA    NA
#> 5     2     7     7
#> 6     2    NA    NA
#> 7     3     4     4
#> 8     3     7     7
#> 9     3    10    10

Upvotes: 2

Views: 50

Answers (2)

Ben
Ben

Reputation: 30494

Here is an alternative option using accumulate from purrr:

library(dplyr)

df %>%
  mutate(NEW_WANT = accumulate(
    .x = X1,
    .f = ~ ifelse(is.na(.y), .x + 3, .y)
  ), .by = ID)

Output

     ID    X1  WANT NEW_WANT
  <dbl> <dbl> <dbl>    <dbl>
1     1     5     5        5
2     1    NA     8        8
3     1    11    11       11
4     1    NA    14       14
5     2     7     7        7
6     2    NA    10       10
7     3     4     4        4
8     3     7     7        7
9     3    10    10       10

Upvotes: 1

Till
Till

Reputation: 6663

library(tidyverse)
library(vctrs)

desired_WANT <- c(5, 8, 11, 14, 7, 10, 4, 7, 10)

df1 <-
  tibble(
    ID = c(1, 1, 1, 1, 2, 2, 3, 3, 3),
    X1 = c(5, NA, NA, NA, 7, NA, 4, 7, 10)
  )

df2 <-
  data.frame(
    ID = c(1, 1, 1, 1, 2, 2, 3, 3, 3),
    X1 = c(5, NA, 11, NA, 7, NA, 4, 7, 10)
  )

process_data <- function(data) {
  data |> 
    mutate(ID_NA = vec_group_id(paste0(ID, is.na(X1))),
           WANT1 = ifelse(is.na(X1), lag(X1) + 3, X1),
           .by = ID) |> 
    mutate(
      WANT2 = coalesce(WANT1, 3),
      WANT = ifelse(is.na(WANT1), cumsum(WANT2), WANT1), .by = c(ID, ID_NA)) |> 
    select(-c(WANT1, WANT2, ID_NA))
}

Run function and test results

res1 <- process_data(df1)  
all(res1$WANT == desired_WANT)
#> [1] TRUE

res2 <- process_data(df2)  
all(res2$WANT == desired_WANT)
#> [1] TRUE

Results

res1
#> # A tibble: 9 × 3
#>      ID    X1  WANT
#>   <dbl> <dbl> <dbl>
#> 1     1     5     5
#> 2     1    NA     8
#> 3     1    NA    11
#> 4     1    NA    14
#> 5     2     7     7
#> 6     2    NA    10
#> 7     3     4     4
#> 8     3     7     7
#> 9     3    10    10
res2
#>   ID X1 WANT
#> 1  1  5    5
#> 2  1 NA    8
#> 3  1 11   11
#> 4  1 NA   14
#> 5  2  7    7
#> 6  2 NA   10
#> 7  3  4    4
#> 8  3  7    7
#> 9  3 10   10

Upvotes: 1

Related Questions