new variable based on value range of other column R

Question

I know there are a lot of similiar questions like this but I couldn't get an answer.

What i need to do is to group a numerical variable into three levels.

What I tried next to some other things is the following:


data_long$average_success_grouped <- recode(data_long$average_success, <0.5 = no success, >0.5 & <0.9 = little success, >0.9 = success)

I have values ranging from 0 - 1 and i need to cut them off at 0.5 and 0.9 for the three groups.

Can someone help?

current Error: unexpected '<' in "data_long$averagre_success <- recode(data_long$average_success, <"

dput(data_long_migraine)
structure(list(average_success = c(0.333333333333333, 0.416666666666667, 0, 0.25, 
0.166666666666667, 0.133333333333333, 0.0285714285714286, 0, 
0.266666666666667, 1, 0.214285714285714, 0.472222222222222, 0.0142857142857143, 
0.305555555555556, 0.861111111111111, 0.614285714285714, 0.371428571428571, 
1, 0.694444444444444, 0, 0.5, 1, 0.9, 0.0571428571428571, 0.128571428571429, 
0.583333333333333, 0.194444444444444, 0.333333333333333, 0.416666666666667, 
0, 0.25, 0.166666666666667, 0.133333333333333, 0.0285714285714286, 
0, 0.266666666666667, 1, 0.214285714285714, 0.472222222222222, 
0.0142857142857143, 0.305555555555556, 0.861111111111111, 0.614285714285714, 
0.371428571428571, 1, 0.694444444444444, 0, 0.5, 1, 0.9, 0.0571428571428571, 
0.128571428571429, 0.583333333333333, 0.194444444444444, 0.333333333333333, 
0.416666666666667, 0, 0.25, 0.166666666666667, 0.133333333333333, 
0.0285714285714286, 0, 0.266666666666667, 1, 0.214285714285714, 
0.472222222222222, 0.0142857142857143, 0.305555555555556, 0.861111111111111, 
0.614285714285714, 0.371428571428571, 1, 0.694444444444444, 0, 
0.5, 1, 0.9, 0.0571428571428571, 0.128571428571429, 0.583333333333333, 
0.194444444444444, 0.333333333333333, 0.416666666666667, 0, 0.25, 
0.166666666666667, 0.133333333333333, 0.0285714285714286, 0, 
0.266666666666667, 1, 0.214285714285714, 0.472222222222222, 0.0142857142857143, 
0.305555555555556, 0.861111111111111, 0.614285714285714, 0.371428571428571, 
1, 0.694444444444444, 0, 0.5, 1, 0.9, 0.0571428571428571, 0.128571428571429, 
0.583333333333333, 0.194444444444444), month = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("bad_days_1_month", 
"bad_days_2_month", "bad_days_3_month", "bad_days_4_month"
), class = "factor"), bad_days = c(5, 3, 8, 5, 0, 13, 2, 
3, 10, 13, 7, 3, 2, 23, 5, 4, 6, 17, 4, 3, 13, 10, 4, 8, 15, 
18, 2, 7, 7, 10, 1, 2, 10, 3, 0, 3, 16, 8, 4, 4, 26, 2, 6, 10, 
25, 5, 3, 11, 7, 4, 6, 11, 18, 4, 5, 7, 6, 7, 2, 11, 6, 0, 5, 
20, 4, 2, 4, 20, 0, 2, 2, 24, 6, 4, 4, 5, 3, 7, 8, 6, 2, 9, 8, 
8, 7, 3, 8, 6, 0, 5, 20, 9, 8, 2, 22, 1, 1, 5, 25, 3, 1, 6, 3, 
3, 4, 8, 11, 0), average_success_grouped = c(2, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)), row.names = c(NA, 
-108L), class = "data.frame")

I have tried something else before which led me to the average_success_grouped with only "2" within but I can't recall exactly

Shawn Hemelstrand · Accepted Answer

Load tidyverse library and data

Sounds like you need an ifelse statement. First, load the tidyverse package to add a new variable for levels:

library(tidyverse)

I first saved your dput to an object called df:

df <- structure(list(average_success = c(0.333333333333333, 0.416666666666667, 0, 0.25, 0.166666666666667, 0.133333333333333, 0.0285714285714286, 0, 0.266666666666667, 1, 0.214285714285714, 0.472222222222222, 0.0142857142857143, 0.305555555555556, 0.861111111111111, 0.614285714285714, 0.371428571428571, 1, 0.694444444444444, 0, 0.5, 1, 0.9, 0.0571428571428571, 0.128571428571429, 0.583333333333333, 0.194444444444444, 0.333333333333333, 0.416666666666667, 0, 0.25, 0.166666666666667, 0.133333333333333, 0.0285714285714286, 0, 0.266666666666667, 1, 0.214285714285714, 0.472222222222222, 0.0142857142857143, 0.305555555555556, 0.861111111111111, 0.614285714285714, 0.371428571428571, 1, 0.694444444444444, 0, 0.5, 1, 0.9, 0.0571428571428571, 0.128571428571429, 0.583333333333333, 0.194444444444444, 0.333333333333333, 0.416666666666667, 0, 0.25, 0.166666666666667, 0.133333333333333, 0.0285714285714286, 0, 0.266666666666667, 1, 0.214285714285714, 0.472222222222222, 0.0142857142857143, 0.305555555555556, 0.861111111111111, 0.614285714285714, 0.371428571428571, 1, 0.694444444444444, 0, 0.5, 1, 0.9, 0.0571428571428571, 0.128571428571429, 0.583333333333333, 0.194444444444444, 0.333333333333333, 0.416666666666667, 0, 0.25, 0.166666666666667, 0.133333333333333, 0.0285714285714286, 0, 0.266666666666667, 1, 0.214285714285714, 0.472222222222222, 0.0142857142857143, 0.305555555555556, 0.861111111111111, 0.614285714285714, 0.371428571428571, 1, 0.694444444444444, 0, 0.5, 1, 0.9, 0.0571428571428571, 0.128571428571429, 0.583333333333333, 0.194444444444444), month = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("bad_days_1_month", "bad_days_2_month", "bad_days_3_month", "bad_days_4_month" ), class = "factor"), bad_days = c(5, 3, 8, 5, 0, 13, 2, 3, 10, 13, 7, 3, 2, 23, 5, 4, 6, 17, 4, 3, 13, 10, 4, 8, 15, 18, 2, 7, 7, 10, 1, 2, 10, 3, 0, 3, 16, 8, 4, 4, 26, 2, 6, 10, 25, 5, 3, 11, 7, 4, 6, 11, 18, 4, 5, 7, 6, 7, 2, 11, 6, 0, 5, 20, 4, 2, 4, 20, 0, 2, 2, 24, 6, 4, 4, 5, 3, 7, 8, 6, 2, 9, 8, 8, 7, 3, 8, 6, 0, 5, 20, 9, 8, 2, 22, 1, 1, 5, 25, 3, 1, 6, 3, 3, 4, 8, 11, 0), average_success_grouped = c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)), row.names = c(NA, -108L), class = "data.frame")

New data frame:

Then create the new variable with an if/then statement using mutate and ifelse:

df2 <- df %>%
  mutate(success_level = ifelse(average_success >.9 , "high success", 
                                ifelse(average_success <.5, "no success", "little")))

View result

Which if you now use View(df2), you get this new data frame:

new variable based on value range of other column R

Answers (1)

Load tidyverse library and data

New data frame:

View result

Related Questions