Reputation:
I have two separate dataframes:
> head(data)
Drug.Substance.Calcium Drug.Substance.Glycine Drug.Substance.Histidine
1 45.0 120.0
2 43.0 100.0
3 33.7 9.6
I want to add units to the column names. The units can be found in a different dataset (in the UOM
column)
> head(ag_parameter_6[10:13,])
X PARAMETER DATA_TYPE PARAMETER_TYPE UOM
10 9 Drug Substance.Calcium double D mmol/L
11 10 Drug Substance.Endotoxin string D
12 11 Drug Substance.Glycine double D mg/mL
13 12 Drug Substance.Histidine double D mmol/L
How can I modify the data
dataframe so that each column has the correct units (found in UOM)?
Upvotes: 1
Views: 55
Reputation: 389355
You can use rename_with
-
library(dplyr)
data %>%
rename_with(~paste(., ag_parameter_6$UOM[match(., ag_parameter_6$PARAMETER)]))
# Drug.Substance.Calcium mmol/L Drug.Substance.Glycine mg/mL Drug.Substance.Histidine mmol/L
#1 1 45.0 120.0
#2 2 43.0 100.0
#3 3 33.7 9.6
Upvotes: 0
Reputation: 67050
I bet there's a much more concise base R way to do this, but here's a bit of a long-winded tidyverse approach:
library(tidyverse)
data %>%
mutate(row = row_number()) %>% # allows us to pivot wider later
# reshape long, join to other data, and rename headers
pivot_longer(-row) %>%
left_join(ag_parameter_6, by = c("name" = "PARAMETER")) %>%
mutate(name = paste(name, UOM)) %>%
# pivot back to original wide format
select(row, name:value) %>%
pivot_wider(names_from = name, values_from = value)
Result
# A tibble: 3 x 4
row `Drug.Substance.Calcium mmol/L` `Drug.Substance.Glycine mg/mL` `Drug.Substance.Histidine mmol/L`
<int> <dbl> <dbl> <dbl>
1 1 1 45 120
2 2 2 43 100
3 3 3 33.7 9.6
Source data (Please include it in a reproducible fashion for future questions.)
data <- data.frame(
Drug.Substance.Calcium = c(1L, 2L, 3L),
Drug.Substance.Glycine = c(45, 43, 33.7),
Drug.Substance.Histidine = c(120, 100, 9.6)
)
ag_parameter_6 <- data.frame(
stringsAsFactors = FALSE,
PARAMETER = c("Drug.Substance.Calcium",
"Drug.Substance.Endotoxin",
"Drug.Substance.Glycine","Drug.Substance.Histidine"),
DATA_TYPE = c("double", "string", "double", "double"),
PARAMETER_TYPE = c("D", "D", "D", "D"),
UOM = c("mmol/L", NA, "mg/mL", "mmol/L")
)
Upvotes: 1
Reputation: 79271
Maybe we could do a left_join
after tweaking the data:
We could remove name
or PARAMETER
library(tidyverse)
data1 <- data %>%
pivot_longer(
cols = everything()
)
ag_parameter_6_1 <- ag_parameter_6 %>%
mutate(DATA_TYPE = str_replace(PARAMETER, " ", "."))
left_join(data1, ag_parameter_6_1, by=c("name"="DATA_TYPE"))
Output:
name value X PARAMETER PARAMETER_TYPE UOM
<chr> <dbl> <dbl> <chr> <chr> <chr>
1 Drug.Substance.Calcium 1 9 Drug Substance.Calcium D mmol/L
2 Drug.Substance.Glycine 45 11 Drug Substance.Glycine D mg/mL
3 Drug.Substance.Histidine 120 12 Drug Substance.Histidine D mmol/L
4 Drug.Substance.Calcium 2 9 Drug Substance.Calcium D mmol/L
5 Drug.Substance.Glycine 43 11 Drug Substance.Glycine D mg/mL
6 Drug.Substance.Histidine 100 12 Drug Substance.Histidine D mmol/L
7 Drug.Substance.Calcium 3 9 Drug Substance.Calcium D mmol/L
8 Drug.Substance.Glycine 33.7 11 Drug Substance.Glycine D mg/mL
9 Drug.Substance.Histidine 9.6 12 Drug Substance.Histidine D mmol/L
data:
data <- structure(list(Drug.Substance.Calcium = 1:3,
Drug.Substance.Glycine = c(45, 43, 33.7),
Drug.Substance.Histidine = c(120, 100, 9.6)),
class = "data.frame", row.names = c(NA, -3L))
ag_parameter_6 <- structure(list(X = c(9, 10, 11, 12), PARAMETER = c("Drug Substance.Calcium",
"Drug Substance.Endotoxin", "Drug Substance.Glycine", "Drug Substance.Histidine"
), DATA_TYPE = c("double", "string", "double", "double"), PARAMETER_TYPE = c("D",
"D", "D", "D"), UOM = c("mmol/L", NA, "mg/mL", "mmol/L")), row.names = c(NA,
-4L), class = c("tbl_df", "tbl", "data.frame"))
Upvotes: 1
Reputation: 887991
We can do this in base R
i1 <- match(names(data), ag_parameter_6$PARAMETER)
names(data) <- paste(names(data), ag_parameter_6$UOM[i1])
-output
> data
Drug.Substance.Calcium mmol/L Drug.Substance.Glycine mg/mL Drug.Substance.Histidine mmol/L
1 1 45.0 120.0
2 2 43.0 100.0
3 3 33.7 9.6
data <- data.frame(
Drug.Substance.Calcium = c(1L, 2L, 3L),
Drug.Substance.Glycine = c(45, 43, 33.7),
Drug.Substance.Histidine = c(120, 100, 9.6)
)
ag_parameter_6 <- data.frame(
stringsAsFactors = FALSE,
PARAMETER = c("Drug.Substance.Calcium",
"Drug.Substance.Endotoxin",
"Drug.Substance.Glycine","Drug.Substance.Histidine"),
DATA_TYPE = c("double", "string", "double", "double"),
PARAMETER_TYPE = c("D", "D", "D", "D"),
UOM = c("mmol/L", NA, "mg/mL", "mmol/L")
)
Upvotes: 2