Reputation: 587

Create a new column with values from other columns in the dataset

I would like to create a column filled with information from either one or other column from my dataset, dependent on a third column.

My dataset is full of 0 and 1 values, looks like this:

df <- data.frame(PatientID = c("0002" ,"0004", "0005", "0006" ,"0009" ,"0010" ,"0018", "0019" ,"0020" ,"0027", "0039" ,"0041" ,"0042", "0043" ,"0044" ,"0045", "0046", "0047" ,"0048" ,"0049", "0055"),
                RCA = c( 1 , 1 , 0 , 1 , 1, 1  ,0 , 0 , 0  ,0,  0 , 0 , 0 , 0  ,1 , 1 , 1 , 0 , 1 , 1  ,1), 
                RCB= c(1 , 1 , 1 , 1 , 0, 0  ,0 , 0 , 0  ,0,  0 , 0 , 0 , 0  ,1 , 0 , 1 , 0 , 1 , 1  ,1),
                RCC = c( 1 , 1 , 1 , 1 , 0, 0  ,0 , 0 , 0  ,1 , 1 , 1  , 0 , 0  ,1 , 1 , 1 , 1  , 1 , 1  ,1), stringsAsFactors = F)

The fourth column df$RCD, would need to follow the following condition:

If df$RCC is 1 then df$RCD= df$RCB, and if df$RCC is 0 then df$RCD= df$RCA

Hope this makes sense. I have attached an example output for clarity.

Thank you so much! Best regards

Upvotes: 0

Answers (3)

AnilGoyal

Reputation: 26238

Just to show that there can be too many ways to do this

df <- data.frame(PatientID = c("0002" ,"0004", "0005", "0006" ,"0009" ,"0010" ,"0018", "0019" ,"0020" ,"0027", "0039" ,"0041" ,"0042", "0043" ,"0044" ,"0045", "0046", "0047" ,"0048" ,"0049", "0055"),
                 RCA = c( 1 , 1 , 0 , 1 , 1, 1  ,0 , 0 , 0  ,0,  0 , 0 , 0 , 0  ,1 , 1 , 1 , 0 , 1 , 1  ,1), 
                 RCB= c(1 , 1 , 1 , 1 , 0, 0  ,0 , 0 , 0  ,0,  0 , 0 , 0 , 0  ,1 , 0 , 1 , 0 , 1 , 1  ,1),
                 RCC = c( 1 , 1 , 1 , 1 , 0, 0  ,0 , 0 , 0  ,1 , 1 , 1  , 0 , 0  ,1 , 1 , 1 , 1  , 1 , 1  ,1), stringsAsFactors = F)

library(tidyverse)

df %>% rowwise %>%
  mutate(RCD = get(c('RCA', 'RCB')[1 + RCC]))
#> # A tibble: 21 x 5
#> # Rowwise: 
#>    PatientID   RCA   RCB   RCC   RCD
#>    <chr>     <dbl> <dbl> <dbl> <dbl>
#>  1 0002          1     1     1     1
#>  2 0004          1     1     1     1
#>  3 0005          0     1     1     1
#>  4 0006          1     1     1     1
#>  5 0009          1     0     0     1
#>  6 0010          1     0     0     1
#>  7 0018          0     0     0     0
#>  8 0019          0     0     0     0
#>  9 0020          0     0     0     0
#> 10 0027          0     0     1     0
#> # ... with 11 more rows

^{Created on 2021-06-21 by the reprex package (v2.0.0)}

Upvotes: 1

Roman

Reputation: 17678

you can try a tidyverse

library(tidyverse)
df %>% 
  mutate(RCD = case_when(RCC == 1 ~ RCB,
                         RCC == 0 ~ RCA))
   PatientID RCA RCB RCC RCD
1       0002   1   1   1   1
2       0004   1   1   1   1
3       0005   0   1   1   1
4       0006   1   1   1   1
5       0009   1   0   0   1
6       0010   1   0   0   1
7       0018   0   0   0   0
8       0019   0   0   0   0
9       0020   0   0   0   0
10      0027   0   0   1   0
11      0039   0   0   1   0
12      0041   0   0   1   0
13      0042   0   0   0   0
14      0043   0   0   0   0
15      0044   1   1   1   1
16      0045   1   0   1   0
17      0046   1   1   1   1
18      0047   0   0   1   0
19      0048   1   1   1   1
20      0049   1   1   1   1
21      0055   1   1   1   1

In base R use only ifelse

df$RCD <- ifelse(df$RCC == 1, df$RCB, df$RCA)

Upvotes: 1

yogevmh

Reputation: 346

library(dplyr)
df %>% 
  mutate(
    RCD = ifelse(test = RCC == 1, yes = RCB, no = RCA)
  ))

Upvotes: 1

Create a new column with values from other columns in the dataset

Answers (3)

Related Questions