Reputation: 3384
data=data.frame("person"=c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4),
"score"=c(1,2,3,2,2,1,2,3,2,3,1,4,1,4,3),
"WANT"=c(1,2,3,3,2,1,2,3,2,3,3,4,1,4,NA))
For each person we track their scores until they reach "proficient" (3) or excellence (4). If a person scores excellence (4) then that value is kept and the persons next score is removed (see person 4; instead of NA would just want to remove that row however). If a person scores proficient (3) that value is carried forward UNLESS they score 4 which would replace the score of 3. Otherwise the values stay how they are. I tried cummax func but not sure how to apply such a rule.
It is my attempt below
data$want = ave(data$score, data$person, FUN = function(x) cummax(replace(x, is.na(x), -1)))
Upvotes: 1
Views: 290
Reputation: 886948
An option with data.table
. Convert the 'data.frame' to 'data.table' (setDT(data)
), grouped by 'person', specify the i
to select rows where 'score' is greater than or equal to 3, get the cummax
of 'score' and assign it to 'WANT2', replace the NA
elements with the 'score' value (pmax
), set the duplicated '4' values to NA
for each 'person'
library(data.table)
setDT(data)[score >=3, WANT2 := cummax(score), person
][, WANT2 := pmax(score, WANT2, na.rm = TRUE), person]
i1 <- data[WANT2 == 4, .I[duplicated(WANT2)], person]$V1
data[i1, WANT2 := NA]
data
# person score WANT WANT2
# 1: 1 1 1 1
# 2: 1 2 2 2
# 3: 1 3 3 3
# 4: 1 2 3 2
# 5: 2 2 2 2
# 6: 2 1 1 1
# 7: 2 2 2 2
# 8: 2 3 3 3
# 9: 3 2 2 2
#10: 3 3 3 3
#11: 3 1 3 1
#12: 3 4 4 4
#13: 4 1 1 1
#14: 4 4 4 4
#15: 4 3 NA NA
Or create an index where the value 4 is duplicated
setDT(data)[, ind := NA^(duplicated(cumsum(score == 4)) &
shift(score) == 4), person]
then get the cumulative max of 'score' by person while specifying the i
to select rows where score is greater than or equal to 4
data[score >=3, WANT2 := cummax(score), person]
and replace the NA
elements with the 'score' using pmax
while making sure that duplicated 4 is NA
(using the 'ind')
data[, WANT2 := pmax(score, WANT2, na.rm = TRUE) * ind]
Upvotes: 0
Reputation: 388817
We could use cummax
for score higher than 3 or more or keep the score
as it is. To get NA
s after first 4 we can replace
the duplicated
4 values as NA
.
library(dplyr)
data %>%
group_by(person) %>%
mutate(WANT2 = ifelse(score >= 3 | (row_number() >= which.max(score == 3)),
cummax(score), score),
WANT2 = replace(WANT2, duplicated(WANT2 == 4) & WANT2 == 4, NA))
# person score WANT WANT2
# <dbl> <dbl> <dbl> <dbl>
# 1 1 1 1 1
# 2 1 2 2 2
# 3 1 3 3 3
# 4 1 2 3 3
# 5 2 2 2 2
# 6 2 1 1 1
# 7 2 2 2 2
# 8 2 3 3 3
# 9 3 2 2 2
#10 3 3 3 3
#11 3 1 3 3
#12 3 4 4 4
#13 4 1 1 1
#14 4 4 4 4
#15 4 3 NA NA
Upvotes: 0
Reputation: 2835
You can write a custom function with the rules you specified and apply it on every person in a group by as follows,
library(dplyr)
count_3_4 <- function(vec){
max = 0
for(i in seq_along(vec)){
if(vec[i] > max){
max = vec[i]
if(max == 3 || max == 4){
vec[i] = max
}
}else if(max == 4 && vec[i] < max){
vec[i] = NA
}else{
if(max == 3 || max == 4){
vec[i] = max
}
}
}
return(vec)
}
data=data.frame("person"=c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4),
"score"=c(1,2,3,2,2,1,2,3,2,3,1,4,1,4,3),
"WANT"=c(1,2,3,3,2,1,2,3,2,3,3,4,1,4,NA))
data%>%
group_by(person)%>%
mutate(TestVal = count_3_4(score)) %>%
ungroup()
Upvotes: 2