wt2020
wt2020

Reputation: 11

R: is there a way to find previous row to a specific value?

I am looking for the entry that occurs prior to every first entry of a specific value in my dataset.

df:
 encounter mode value
1          1    a   100
2          1    a    99
3          1    b    98
4          2    a   100
5          2    c    86
6          2    b    87
7          2    b    89
8          3    c   100
9          3    c    99
10         3    b    87
11         3    b    99
12         4    a    98
13         4    a    97
14         4    b    94
15         4    b    95
16         4    b    90
17         4    b    89
18         5    a    91
19         5    b    92
20         5    c   100   

for every set of encounters i want the row that occurs before the first occurrence of "b" ie I want rows, 2,5,9,13,18

  encounter mode value
1         1    a    99
2         2    c    86
3         3    c    99
4         4    a    97
5         5    a    91

As a beginner i dont really know where to start with this one! Many Thanks

Upvotes: 1

Views: 144

Answers (3)

akrun
akrun

Reputation: 886938

We can use slice

library(dplyr)
df %>%
    group_by(encounter) %>%
    slice(match('b', mode) - 1)
# A tibble: 5 x 3
# Groups:   encounter [5]
#  encounter mode  value
#      <int> <chr> <int>
#1         1 a        99
#2         2 c        86
#3         3 c        99
#4         4 a        97
#5         5 a        91

data

df <- structure(list(encounter = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L), mode = c("a", 
"a", "b", "a", "c", "b", "b", "c", "c", "b", "b", "a", "a", "b", 
"b", "b", "b", "a", "b", "c"), value = c(100L, 99L, 98L, 100L, 
86L, 87L, 89L, 100L, 99L, 87L, 99L, 98L, 97L, 94L, 95L, 90L, 
89L, 91L, 92L, 100L)), class = "data.frame", row.names = c(NA, -20L))

Upvotes: 0

ThomasIsCoding
ThomasIsCoding

Reputation: 101034

Here is a base R option using ave+ which

df[
  with(
    df,
    which(
      ave(
        mode == "b",
        encounter,
        FUN = function(x) min(which(x)) == seq_along(x)
      )
    ) - 1
  ),
]

giving

   encounter mode value
2          1    a    99
5          2    c    86
9          3    c    99
13         4    a    97
18         5    a    91

Data

> dput(df)
structure(list(encounter = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L), mode = c("a",
"a", "b", "a", "c", "b", "b", "c", "c", "b", "b", "a", "a", "b",
"b", "b", "b", "a", "b", "c"), value = c(100L, 99L, 98L, 100L,
86L, 87L, 89L, 100L, 99L, 87L, 99L, 98L, 97L, 94L, 95L, 90L,
89L, 91L, 92L, 100L)), class = "data.frame", row.names = c(NA,
-20L))

Upvotes: 0

Ronak Shah
Ronak Shah

Reputation: 388807

You can use match to get index of first occurrence of b for each encounter.

library(dplyr)

df %>%
  group_by(encounter) %>%
  summarise(value = value[match('b', mode) - 1]) -> df1

df1

#  encounter value
#      <int> <int>
#1         1    99
#2         2    86
#3         3    99
#4         4    97
#5         5    91

This can be written in data.table as :

library(data.table)
setDT(df)[, .(value = value[match('b', mode) - 1]), encounter]

data

df <- structure(list(encounter = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L), mode = c("a", 
"a", "b", "a", "c", "b", "b", "c", "c", "b", "b", "a", "a", "b", 
"b", "b", "b", "a", "b", "c"), value = c(100L, 99L, 98L, 100L, 
86L, 87L, 89L, 100L, 99L, 87L, 99L, 98L, 97L, 94L, 95L, 90L, 
89L, 91L, 92L, 100L)), class = "data.frame", row.names = c(NA, -20L))

Upvotes: 1

Related Questions