GaB
GaB

Reputation: 1132

how to bring one column string into the first one, with library tidyverse?

I am trying to get one column string from med_name_two attached to column name med_name_one based. But on a condition that in first column the string Piperacilin is followed by Tazobactam in the second one. An example of dataset is bellow. As one can see, the last row in med_name_one is Pipperalicin followed by Tazobactam in med_name_two. I want to attach 'Tazobactam' to 'Piperacilin' in this format 'Piperacilin-tazobactam'. Want to achieve this only with tidyverse library, in R

structure(list(id = c(1, 1, 2, 2, 3, 4, 5, 6, 7), med_name_one = c("Co-amoxiclav", 
"doxycycline", "Gentamicin", "Co-trimoxazole", "Gentamicin", 
"Co-trimoxazole", "Sodium Chloride", "Piperacillin", "Piperacillin"
), med_name_two = c(NA, "Gentamicin", "Co-trimoxazole", NA, NA, 
NA, NA, NA, "Tazobactam"), mg_one = c("411 mg", "120 mg", "11280 mg", 
"8 mg", "11280 mg", "8 mg", "411 mg", "120 mg", NA), mg_two = c(NA, 
"11280 mg", "8 mg", NA, NA, NA, NA, NA, NA), administration_datetime = c("2020-01-03 10:08", 
"2020-01-01 11:08", "2020-01-02 19:08", "2020-01-08 20:08", "2020-01-02 19:08", 
"2020-01-08 20:08", "2019-01-30 08:08", "2020-01-03 09:08", "2020-01-03 09:08"
)), row.names = c(NA, -9L), class = c("tbl_df", "tbl", "data.frame"
))

The output I want Piperacilin-tazobactam. See table below.

 id med_name_one            med_name_two   mg_one   mg_two   administration_datetime
<dbl> <chr>                   <chr>          <chr>    <chr>    <chr>                  
1     1 Co-amoxiclav            NA             411 mg   NA       2020-01-03 10:08       
2     1 doxycycline             Gentamicin     120 mg   11280 mg 2020-01-01 11:08       
3     2 Gentamicin              Co-trimoxazole 11280 mg 8 mg     2020-01-02 19:08       
4     2 Co-trimoxazole          NA             8 mg     NA       2020-01-08 20:08       
5     3 Gentamicin              NA             11280 mg NA       2020-01-02 19:08       
6     4 Co-trimoxazole          NA             8 mg     NA       2020-01-08 20:08       
7     5 Sodium Chloride         NA             411 mg   NA       2019-01-30 08:08       
8     6 Piperacillin            NA             120 mg   NA       2020-01-03 09:08       
9     7 Piperacillin-tazobactam NA             NA       NA       2020-01-03 09:08    

Upvotes: 1

Views: 60

Answers (2)

akrun
akrun

Reputation: 886948

We can use case_when to create a logical vector and paste the two columns

library(dplyr)
library(stringr)
df1 %>%
    mutate(i1 = med_name_one == 'Piperacillin' & 
          med_name_two %in% 'Tazobactam',  med_name_one = case_when(i1 ~ 
     str_c(med_name_one, tolower(med_name_two), sep='-'), TRUE ~ med_name_one),  
     med_name_two = replace(med_name_two, i1, NA_character_), i1 = NULL)

-output

# A tibble: 9 x 6
#     id med_name_one            med_name_two   mg_one   mg_two   administration_datetime
#  <dbl> <chr>                   <chr>          <chr>    <chr>    <chr>                  
#1     1 Co-amoxiclav            <NA>           411 mg   <NA>     2020-01-03 10:08       
#2     1 doxycycline             Gentamicin     120 mg   11280 mg 2020-01-01 11:08       
#3     2 Gentamicin              Co-trimoxazole 11280 mg 8 mg     2020-01-02 19:08       
#4     2 Co-trimoxazole          <NA>           8 mg     <NA>     2020-01-08 20:08       
#5     3 Gentamicin              <NA>           11280 mg <NA>     2020-01-02 19:08       
#6     4 Co-trimoxazole          <NA>           8 mg     <NA>     2020-01-08 20:08       
#7     5 Sodium Chloride         <NA>           411 mg   <NA>     2019-01-30 08:08       
#8     6 Piperacillin            <NA>           120 mg   <NA>     2020-01-03 09:08       
#9     7 Piperacillin-tazobactam <NA>           <NA>     <NA>     2020-01-03 09:08       

Or use base R

i1 <- with(df1, med_name_one == 'Piperacillin' &  
          med_name_two %in% 'Tazobactam')

df1$med_name_one[i1] <- paste(df1$med_name_one[i1], 
        tolower(df1$med_name_two[i1]), sep='-')
df1$med_name_two[i1] <- NA

Upvotes: 2

Onyambu
Onyambu

Reputation: 79198

Another base R approach:

a <- do.call(paste, c(df, sep=","))
b <- sub("Piperacillin,Tazobactam", "Piperacillin-tazobactam,NA", a)
read.csv(text = a, col.names = names(df), header = FALSE)

 id            med_name_one   med_name_two   mg_one   mg_two administration_datetime
1  1            Co-amoxiclav           <NA>   411 mg     <NA>        2020-01-03 10:08
2  1             doxycycline     Gentamicin   120 mg 11280 mg        2020-01-01 11:08
3  2              Gentamicin Co-trimoxazole 11280 mg     8 mg        2020-01-02 19:08
4  2          Co-trimoxazole           <NA>     8 mg     <NA>        2020-01-08 20:08
5  3              Gentamicin           <NA> 11280 mg     <NA>        2020-01-02 19:08
6  4          Co-trimoxazole           <NA>     8 mg     <NA>        2020-01-08 20:08
7  5         Sodium Chloride           <NA>   411 mg     <NA>        2019-01-30 08:08
8  6            Piperacillin           <NA>   120 mg     <NA>        2020-01-03 09:08
9  7 Piperacillin-tazobactam           <NA>     <NA>     <NA>        2020-01-03 09:08

Upvotes: 1

Related Questions