Reputation: 1382
I want to create a new column that take the value of one of two similarly named columns, depending on a third column. There are many such columns to create. Here's my data.
dt <- structure(list(malvol_left_1_w1 = c("1", "1", "4", "3", "4",
"4", "1", "4", "4", "3", "1", "4", "4", "3", "4", "4", "5", "2",
"4", "2"), malvol_left_2_w1 = c("1", "1", "4", "3", "4", "4",
"1", "3", "4", "2", "2", "2", "4", "1", "5", "4", "5", "2", "4",
"2"), malvol_right_1_w1 = c("1", "1", "4", "3", "4", "4", "1",
"3", "4", "2", "1", "4", "4", "5", "5", "4", "2", "6", "4", "1"
), malvol_right_2_w1 = c("1", "1", "4", "3", "4", "4", "1", "3",
"4", "2", "1", "2", "4", "5", "5", "4", "5", "5", "4", "5"),
malvol_left_1_w2 = c("1", "1", "3", "3", "4", "4", "1", "5",
"4", "4", "4", "2", "1", "4", "5", "4", "3", "2", "4", "4"
), malvol_left_2_w2 = c("1", "1", "3", "3", "4", "4", "7",
"5", "4", "2", "3", "1", "1", "4", "4", "4", "3", "4", "4",
"4"), malvol_right_1_w2 = c("1", "3", "3", "3", "4", "4",
"1", "4", "4", "3", "2", "2", "4", "1", "4", "4", "5", "5",
"4", "4"), malvol_right_2_w2 = c("1", "2", "3", "3", "4",
"4", "1", "2", "4", "2", "3", "2", "4", "1", "4", "4", "5",
"4", "4", "3"), leftright_w1 = c("right", "right", "left",
"right", "right", "right", "left", "right", "right", "left",
"left", "left", "left", "right", "left", "left", "right",
"right", "right", "left"), leftright_w2 = c("right", "right",
"left", "left", "right", "left", "left", "right", "right",
"left", "left", "left", "left", "right", "left", "left",
"right", "right", "left", "left")), class = "data.frame", row.names = c("12",
"15", "69", "77", "95", "96", "112", "122", "150", "163", "184",
"216", "221", "226", "240", "298", "305", "354", "370", "379"
))
Now I can do this in dplyr like:
dt <- dt %>%
mutate(
malvol_1_w1 = case_when(
leftright_w1 == "left" ~ malvol_right_1_w1,
leftright_w1 == "right" ~ malvol_left_1_w1),
malvol_2_w1 = case_when(
leftright_w1 == "left" ~ malvol_right_2_w1,
leftright_w1 == "right" ~ malvol_left_2_w1),
malvol_1_w2 = case_when(
leftright_w2 == "left" ~ malvol_right_1_w2,
leftright_w2 == "right" ~ malvol_left_1_w2),
malvol_2_w2 = case_when(
leftright_w2 == "left" ~ malvol_right_2_w2,
leftright_w2 == "right" ~ malvol_left_2_w2))
However, it's not really a feasible solution, because there will be more of both numbers defining a variable (e.g. both malvol_3_w1
and malvol_1_w3
will need to be created).
One solution is to this with a loop:
for (wave in 1:2) {
for (var in 1:2) {
dt[, paste0("malvol_", var, "_w", wave)] <- dt[, paste0("malvol_right_", var, "_w", wave)]
dt[dt[[paste0("leftright_w", wave)]] == "right", paste0("malvol_", var, "_w", wave)] <-
dt[dt[[paste0("leftright_w", wave)]] == "right", paste0("malvol_left_", var, "_w", wave)]
}
}
However, what is a tidyverse solution?
UPDATE:
I came up with a tidyverse
solution myself, however, not every elegant. Still looking for more canonical solutions.
dt <- dt %>%
mutate(
malvol_1_w1 = NA, malvol_2_w1 = NA,
malvol_1_w2 = NA, malvol_2_w2 = NA) %>%
mutate(
across(matches("malvol_\\d"),
~ case_when(
eval(parse(text = paste0("leftright_", str_extract(cur_column(), "w.")))) == "left" ~
eval(parse(text = paste0(str_split(cur_column(), "_\\d", simplify = T)[1],
"_right", str_split(cur_column(), "malvol", simplify = T)[2]))),
eval(parse(text = paste0("leftright_", str_extract(cur_column(), "w.")))) == "right" ~
eval(parse(text = paste0(str_split(cur_column(), "_\\d", simplify = T)[1],
"_left", str_split(cur_column(), "malvol", simplify = T)[2]))))))
Upvotes: 2
Views: 150
Reputation: 707
What makes your problem difficult is that a lot of information is hidden in variable names rather than data cells. Hence, you need some steps to transform your data into "tidy" format. In the code below, the crucial part is (1) to turn the variables [malvol]_[lr]_[num]_[w]
into four separate columns malvol
, lr
, num
, w
(all prefixed with m_
), and (2) from the variables leftright_[w]
extract variable w
(prefixed with l_
) using the functions pivot_longer
and than separate
.
# Just adding a row_id to your data, for later joining
dt <- dt %>% mutate(id = row_number())
df <- dt %>%
# Tidy the column "malvol"
pivot_longer(cols = starts_with('malvol'), names_to = "m_var", values_to = "m_val") %>%
separate(m_var, into = c("m_malvol", "m_lr", "m_num", "m_w")) %>%
# They the column "leftright"
pivot_longer(cols = starts_with('leftright'), names_to = 'l_var', values_to = 'l_lr') %>%
separate(l_var, into = c(NA, "l_w")) %>%
# Implement the logic
filter(l_w == m_w) %>%
filter(l_lr != m_lr) %>%
# Pivot into original wide format
select(-c(l_w, l_lr, m_lr)) %>%
pivot_wider(names_from = c(m_malvol, m_num, m_w), values_from = m_val)
# Merging back results to original data
dt <- dt %>% mutate(id = row_number()) %>% inner_join(df, by="id")
Although I pivoted the data back into your desired format in the end (to check whether results are in line with your desired results), I would suggest you leave the data in the long format, which is "tidy" and more easy to work with, compared to your "wide" format. So maybe skip the last pivot_wider
operation.
Upvotes: 3