user16368421
user16368421

Reputation:

How to Append 2 columns within 2 different datasets by Row Name

I have two separate dataframes:

> head(data)

  Drug.Substance.Calcium Drug.Substance.Glycine Drug.Substance.Histidine
    1                             45.0                                120.0                                                              
    2                             43.0                                100.0                                                               
    3                             33.7                                 9.6                                                              

I want to add units to the column names. The units can be found in a different dataset (in the UOM column)

> head(ag_parameter_6[10:13,])
    X                PARAMETER DATA_TYPE PARAMETER_TYPE    UOM
10  9   Drug Substance.Calcium    double              D mmol/L
11 10 Drug Substance.Endotoxin    string              D       
12 11   Drug Substance.Glycine    double              D  mg/mL
13 12 Drug Substance.Histidine    double              D mmol/L

How can I modify the data dataframe so that each column has the correct units (found in UOM)?

Upvotes: 1

Views: 55

Answers (4)

Ronak Shah
Ronak Shah

Reputation: 389355

You can use rename_with -

library(dplyr)

data %>%
  rename_with(~paste(., ag_parameter_6$UOM[match(., ag_parameter_6$PARAMETER)]))

#  Drug.Substance.Calcium mmol/L Drug.Substance.Glycine mg/mL Drug.Substance.Histidine mmol/L
#1                             1                         45.0                           120.0
#2                             2                         43.0                           100.0
#3                             3                         33.7                             9.6

Upvotes: 0

Jon Spring
Jon Spring

Reputation: 67050

I bet there's a much more concise base R way to do this, but here's a bit of a long-winded tidyverse approach:

library(tidyverse)
data %>%
  mutate(row = row_number()) %>%  # allows us to pivot wider later

  # reshape long, join to other data, and rename headers
  pivot_longer(-row) %>%
  left_join(ag_parameter_6, by = c("name" = "PARAMETER")) %>%
  mutate(name = paste(name, UOM)) %>%

  # pivot back to original wide format
  select(row, name:value) %>%
  pivot_wider(names_from = name, values_from = value)

Result

# A tibble: 3 x 4
    row `Drug.Substance.Calcium mmol/L` `Drug.Substance.Glycine mg/mL` `Drug.Substance.Histidine mmol/L`
  <int>                           <dbl>                          <dbl>                             <dbl>
1     1                               1                           45                               120  
2     2                               2                           43                               100  
3     3                               3                           33.7                               9.6

Source data (Please include it in a reproducible fashion for future questions.)

data <- data.frame(
    Drug.Substance.Calcium = c(1L, 2L, 3L),
    Drug.Substance.Glycine = c(45, 43, 33.7),
  Drug.Substance.Histidine = c(120, 100, 9.6)
       )


ag_parameter_6 <- data.frame(
  stringsAsFactors = FALSE,
                           PARAMETER = c("Drug.Substance.Calcium",
                                         "Drug.Substance.Endotoxin",
                                         "Drug.Substance.Glycine","Drug.Substance.Histidine"),
         DATA_TYPE = c("double", "string", "double", "double"),
    PARAMETER_TYPE = c("D", "D", "D", "D"),
               UOM = c("mmol/L", NA, "mg/mL", "mmol/L")
                  )

Upvotes: 1

TarJae
TarJae

Reputation: 79271

Maybe we could do a left_join after tweaking the data:

We could remove name or PARAMETER

library(tidyverse)

data1 <- data %>% 
    pivot_longer(
        cols = everything()
    )

ag_parameter_6_1 <- ag_parameter_6 %>% 
    mutate(DATA_TYPE = str_replace(PARAMETER, " ", "."))

left_join(data1, ag_parameter_6_1, by=c("name"="DATA_TYPE"))

Output:

  name                     value     X PARAMETER                PARAMETER_TYPE UOM   
  <chr>                    <dbl> <dbl> <chr>                    <chr>          <chr> 
1 Drug.Substance.Calcium     1       9 Drug Substance.Calcium   D              mmol/L
2 Drug.Substance.Glycine    45      11 Drug Substance.Glycine   D              mg/mL 
3 Drug.Substance.Histidine 120      12 Drug Substance.Histidine D              mmol/L
4 Drug.Substance.Calcium     2       9 Drug Substance.Calcium   D              mmol/L
5 Drug.Substance.Glycine    43      11 Drug Substance.Glycine   D              mg/mL 
6 Drug.Substance.Histidine 100      12 Drug Substance.Histidine D              mmol/L
7 Drug.Substance.Calcium     3       9 Drug Substance.Calcium   D              mmol/L
8 Drug.Substance.Glycine    33.7    11 Drug Substance.Glycine   D              mg/mL 
9 Drug.Substance.Histidine   9.6    12 Drug Substance.Histidine D              mmol/L

data:

data <- structure(list(Drug.Substance.Calcium = 1:3, 
                       Drug.Substance.Glycine = c(45, 43, 33.7), 
                       Drug.Substance.Histidine = c(120, 100, 9.6)), 
                  class = "data.frame", row.names = c(NA, -3L))


ag_parameter_6 <-  structure(list(X = c(9, 10, 11, 12), PARAMETER = c("Drug Substance.Calcium", 
"Drug Substance.Endotoxin", "Drug Substance.Glycine", "Drug Substance.Histidine"
), DATA_TYPE = c("double", "string", "double", "double"), PARAMETER_TYPE = c("D", 
"D", "D", "D"), UOM = c("mmol/L", NA, "mg/mL", "mmol/L")), row.names = c(NA, 
-4L), class = c("tbl_df", "tbl", "data.frame"))

Upvotes: 1

akrun
akrun

Reputation: 887991

We can do this in base R

i1 <- match(names(data), ag_parameter_6$PARAMETER)
names(data) <- paste(names(data), ag_parameter_6$UOM[i1])

-output

> data
  Drug.Substance.Calcium mmol/L Drug.Substance.Glycine mg/mL Drug.Substance.Histidine mmol/L
1                             1                         45.0                           120.0
2                             2                         43.0                           100.0
3                             3                         33.7                             9.6

data

data <- data.frame(
    Drug.Substance.Calcium = c(1L, 2L, 3L),
    Drug.Substance.Glycine = c(45, 43, 33.7),
  Drug.Substance.Histidine = c(120, 100, 9.6)
       )


ag_parameter_6 <- data.frame(
  stringsAsFactors = FALSE,
                           PARAMETER = c("Drug.Substance.Calcium",
                                         "Drug.Substance.Endotoxin",
                                         "Drug.Substance.Glycine","Drug.Substance.Histidine"),
         DATA_TYPE = c("double", "string", "double", "double"),
    PARAMETER_TYPE = c("D", "D", "D", "D"),
               UOM = c("mmol/L", NA, "mg/mL", "mmol/L")
                  )

Upvotes: 2

Related Questions