bvowe
bvowe

Reputation: 3384

R Custom Max Function

data=data.frame("person"=c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4),
                "score"=c(1,2,3,2,2,1,2,3,2,3,1,4,1,4,3),
                "WANT"=c(1,2,3,3,2,1,2,3,2,3,3,4,1,4,NA))

For each person we track their scores until they reach "proficient" (3) or excellence (4). If a person scores excellence (4) then that value is kept and the persons next score is removed (see person 4; instead of NA would just want to remove that row however). If a person scores proficient (3) that value is carried forward UNLESS they score 4 which would replace the score of 3. Otherwise the values stay how they are. I tried cummax func but not sure how to apply such a rule.

It is my attempt below

data$want = ave(data$score, data$person, FUN = function(x) cummax(replace(x, is.na(x), -1)))

Upvotes: 1

Views: 290

Answers (3)

akrun
akrun

Reputation: 886948

An option with data.table. Convert the 'data.frame' to 'data.table' (setDT(data)), grouped by 'person', specify the i to select rows where 'score' is greater than or equal to 3, get the cummax of 'score' and assign it to 'WANT2', replace the NA elements with the 'score' value (pmax), set the duplicated '4' values to NA for each 'person'

library(data.table)
setDT(data)[score >=3,  WANT2 := cummax(score), person
     ][, WANT2 := pmax(score, WANT2, na.rm = TRUE), person]
i1 <- data[WANT2 == 4, .I[duplicated(WANT2)], person]$V1
data[i1, WANT2 := NA]     
data
#    person score WANT WANT2
# 1:      1     1    1     1
# 2:      1     2    2     2
# 3:      1     3    3     3
# 4:      1     2    3     2
# 5:      2     2    2     2
# 6:      2     1    1     1
# 7:      2     2    2     2
# 8:      2     3    3     3
# 9:      3     2    2     2
#10:      3     3    3     3
#11:      3     1    3     1
#12:      3     4    4     4
#13:      4     1    1     1
#14:      4     4    4     4
#15:      4     3   NA    NA

Or create an index where the value 4 is duplicated

setDT(data)[, ind := NA^(duplicated(cumsum(score == 4)) & 
         shift(score) == 4), person]

then get the cumulative max of 'score' by person while specifying the i to select rows where score is greater than or equal to 4

data[score >=3, WANT2 := cummax(score), person]

and replace the NA elements with the 'score' using pmax while making sure that duplicated 4 is NA (using the 'ind')

data[, WANT2 := pmax(score, WANT2, na.rm = TRUE) * ind]

Upvotes: 0

Ronak Shah
Ronak Shah

Reputation: 388817

We could use cummax for score higher than 3 or more or keep the score as it is. To get NAs after first 4 we can replace the duplicated 4 values as NA.

library(dplyr)

data %>%
  group_by(person) %>%
    mutate(WANT2 = ifelse(score >= 3 | (row_number() >= which.max(score == 3)), 
                   cummax(score), score), 
            WANT2 = replace(WANT2, duplicated(WANT2 == 4) & WANT2 == 4, NA))


#   person score  WANT WANT2
#    <dbl> <dbl> <dbl> <dbl>
# 1      1     1     1     1
# 2      1     2     2     2
# 3      1     3     3     3
# 4      1     2     3     3
# 5      2     2     2     2
# 6      2     1     1     1
# 7      2     2     2     2
# 8      2     3     3     3
# 9      3     2     2     2
#10      3     3     3     3
#11      3     1     3     3
#12      3     4     4     4
#13      4     1     1     1
#14      4     4     4     4
#15      4     3    NA    NA

Upvotes: 0

Sada93
Sada93

Reputation: 2835

You can write a custom function with the rules you specified and apply it on every person in a group by as follows,

library(dplyr)

count_3_4 <- function(vec){
  max = 0
  for(i in seq_along(vec)){
    if(vec[i] > max){
      max = vec[i]
      if(max == 3 || max == 4){
        vec[i] = max
      }
    }else if(max == 4 && vec[i] < max){
      vec[i] = NA
    }else{
      if(max == 3 || max == 4){
        vec[i] = max
      }
    }
  }
  return(vec)
}

data=data.frame("person"=c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4),
                "score"=c(1,2,3,2,2,1,2,3,2,3,1,4,1,4,3),
                "WANT"=c(1,2,3,3,2,1,2,3,2,3,3,4,1,4,NA))

data%>%
  group_by(person)%>%
  mutate(TestVal = count_3_4(score)) %>%
  ungroup()

Upvotes: 2

Related Questions