GaB
GaB

Reputation: 1134

how to add a new column based on certain conditions with tidyverse?

I am trying to create a new column based on whether a respondent is healthy or not.

Here it the type fo data I have:

test <- structure(list(`cutree(hc_diana, k = 4)` = c(1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), id = c("117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15", 
"117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15", 
"3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", 
"3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "8a594e9340", 
"8a594e9340"), covid_tested = c("positive", "positive", "positive", 
"positive", "positive", "positive", "positive", "positive", "positive", 
"positive", "positive", "positive", "positive", "positive", "positive", 
"positive", "positive", "positive", "positive", "positive"), 
    age = c(51, 51, 51, 51, 51, 51, 51, 51, 51, 28, 28, 28, 28, 
    28, 28, 28, 28, 28, 28, 28), gender = structure(c(1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L), .Label = c("Female", "Male", "Other"), class = "factor"), 
    number_morbidities = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 1, 1), chills = structure(c(1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L), .Label = c("No", "Yes"), class = "factor"), cough = structure(c(2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    diarrhoea = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), fatigue = structure(c(2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    headache = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), loss_smell_taste = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"), 
    muscle_ache = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), nasal_congestion = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    nausea_vomiting = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), shortness_breath = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    sore_throat = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), sputum = structure(c(1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L), .Label = c("No", "Yes"), class = "factor"), temperature = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    loss_appetite = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), chest_pain = structure(c(1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    itchy_eyes = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), joint_pain = structure(c(1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    comorbidities = c("asthma", "diabetes_type_one", "diabetes_type_two", 
    "obesity", "hypertension", "heart_disease", "lung_condition", 
    "liver_disease", "kidney_disease", "asthma", "diabetes_type_one", 
    "diabetes_type_two", "obesity", "hypertension", "heart_disease", 
    "lung_condition", "liver_disease", "kidney_disease", "asthma", 
    "diabetes_type_one"), bolean_yes_no = c("No", "No", "No", 
    "Yes", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
    "No", "No", "No", "No", "No", "No", "No")), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame"))

I have 15 rows with 3 unique id's in Yet, I want to get new column based on several conditions:

  1. if they have a comorbidity -> then select only the row with that comorbidity in question and add it into the new column with its name, yet all the other should have NA
  2. as you can see the second id , does not have whatsoever any comorbidity, therefore I want a new category for it and treat it as a "healthy" category and the rest of the rows pertaining to this patient to appear as NA. This is the same for the third responder.

How do I do this with tidyverse?

A sample of how I want the new column to look like is here, check the last column that summarises the above points.

structure(list(id = c("117dbbbf15", "117dbbbf15", "117dbbbf15", 
"117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15", 
"117dbbbf15", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", 
"3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", 
"8a594e9340", "8a594e9340"), number_morbidities = c(1, 1, 1, 
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1), chills = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), cough = structure(c(2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), diarrhoea = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), fatigue = structure(c(2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), headache = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), loss_smell_taste = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"), muscle_ache = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), nasal_congestion = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), nausea_vomiting = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), shortness_breath = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), sore_throat = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), sputum = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), temperature = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), loss_appetite = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), chest_pain = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), itchy_eyes = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), joint_pain = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), comorbidities = c("asthma", 
"diabetes_type_one", "diabetes_type_two", "obesity", "hypertension", 
"heart_disease", "lung_condition", "liver_disease", "kidney_disease", 
"asthma", "diabetes_type_one", "diabetes_type_two", "obesity", 
"hypertension", "heart_disease", "lung_condition", "liver_disease", 
"kidney_disease", "asthma", "diabetes_type_one"), bolean_yes_no = c("No", 
"No", "No", "Yes", "No", "No", "No", "No", "No", "No", "No", 
"No", "No", "No", "No", "No", "No", "No", "No", "No"), morbiditiy_healthy = c(NA, 
NA, NA, "obesity", NA, NA, NA, NA, NA, "healthy", NA, NA, NA, 
NA, NA, NA, NA, NA, "healthy", NA)), row.names = c(NA, -20L), class = c("tbl_df", 
"tbl", "data.frame"))

Upvotes: 1

Views: 470

Answers (1)

akrun
akrun

Reputation: 887961

We group by 'id', create the 'morbidity_healthy' with case_when where we check for 'Yes' in 'bolean_yes_no' column, if it is TRUE, then get the corresponding 'comorbidities', and if there are not (!) any 'Yes' and the row_number is 1, then return the 'healthy' for that row

library(dplyr)
test %>% 
    group_by(id) %>%
    mutate(morbidity_healthy = case_when(bolean_yes_no == 'Yes' ~ comorbidities,
      (!any(bolean_yes_no == 'Yes')) & row_number()==1 ~ 'healthy')) 

Upvotes: 1

Related Questions