Reputation: 2443
I would like to retain, for each group of a data frame, the first non-missing element until the last element (even if the last is missing). For example:
df <- structure(list(Ind = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), result = c(81,
90, NA, NA, 84, 86, 101, 134, NA, 78, NA, 103, NA, 112, NA, 111,
NA, 110, 256, 123, NA, NA, 194, 301, 98.5, 151, NA, 105, NA,
158, NA, 190, NA, 167, NA, 190, NA, NA, 113, 157, NA, 197, 155,
NA, 178, 146, NA, 153, 153, 147, NA, 170, NA, 157)), row.names = c(NA,
-54L), class = c("tbl_df", "tbl", "data.frame"))
In this example, group 3 should start from the 3rd observation and 1 and 2 should remain the same.
Upvotes: 3
Views: 141
Reputation: 51974
You can use dplyr::cumany
, which will set to TRUE
the first and subsequent values that are not NAs (!is.na(result)
).
df %>%
group_by(Ind) %>%
filter(cumany(!is.na(result)))
Use Reduce
with accumulate = T
to mimic dplyr::cumany(!is.na(result))
and ave
to do it by group.
df[as.logical(ave(df$result, df$Ind, FUN = function(x) Reduce(any, !is.na(x), accumulate = TRUE))),]
output
# A tibble: 52 x 2
# Groups: Ind [3]
Ind result
<int> <dbl>
1 1 81
2 1 90
3 1 NA
4 1 NA
5 1 84
6 1 86
7 1 101
8 1 134
9 1 NA
10 1 78
11 1 NA
12 1 103
13 1 NA
14 1 112
15 1 NA
16 1 111
17 1 NA
18 1 110
19 2 256
20 2 123
21 2 NA
22 2 NA
23 2 194
24 2 301
25 2 98.5
26 2 151
27 2 NA
28 2 105
29 2 NA
30 2 158
31 2 NA
32 2 190
33 2 NA
34 2 167
35 2 NA
36 2 190
37 3 113
38 3 157
39 3 NA
40 3 197
41 3 155
42 3 NA
43 3 178
44 3 146
45 3 NA
46 3 153
47 3 153
48 3 147
49 3 NA
50 3 170
51 3 NA
52 3 157
Upvotes: 1
Reputation: 25323
Another possible solution:
library(tidyverse)
df <- structure(list(Ind = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), result = c(81,
90, NA, NA, 84, 86, 101, 134, NA, 78, NA, 103, NA, 112, NA, 111,
NA, 110, 256, 123, NA, NA, 194, 301, 98.5, 151, NA, 105, NA,
158, NA, 190, NA, 167, NA, 190, NA, NA, 113, 157, NA, 197, 155,
NA, 178, 146, NA, 153, 153, 147, NA, 170, NA, 157)), row.names = c(NA,
-54L), class = c("tbl_df", "tbl", "data.frame"))
df %>%
group_by(Ind) %>%
mutate(aux = which.min(is.na(result))) %>%
slice(unique(aux):n()) %>% select(-aux) %>%
ungroup
#> Ind result
#> 1 1 81.0
#> 2 1 90.0
#> 3 1 NA
#> 4 1 NA
#> 5 1 84.0
#> 6 1 86.0
#> 7 1 101.0
#> 8 1 134.0
#> 9 1 NA
#> 10 1 78.0
#> 11 1 NA
#> 12 1 103.0
#> 13 1 NA
#> 14 1 112.0
#> 15 1 NA
#> 16 1 111.0
#> 17 1 NA
#> 18 1 110.0
#> 19 2 256.0
#> 20 2 123.0
#> 21 2 NA
#> 22 2 NA
#> 23 2 194.0
#> 24 2 301.0
#> 25 2 98.5
#> 26 2 151.0
#> 27 2 NA
#> 28 2 105.0
#> 29 2 NA
#> 30 2 158.0
#> 31 2 NA
#> 32 2 190.0
#> 33 2 NA
#> 34 2 167.0
#> 35 2 NA
#> 36 2 190.0
#> 37 3 113.0
#> 38 3 157.0
#> 39 3 NA
#> 40 3 197.0
#> 41 3 155.0
#> 42 3 NA
#> 43 3 178.0
#> 44 3 146.0
#> 45 3 NA
#> 46 3 153.0
#> 47 3 153.0
#> 48 3 147.0
#> 49 3 NA
#> 50 3 170.0
#> 51 3 NA
#> 52 3 157.0
Upvotes: 1