broti
broti

Reputation: 1382

Create (many) columns conditional on similarly named columns

I want to create a new column that take the value of one of two similarly named columns, depending on a third column. There are many such columns to create. Here's my data.

dt <- structure(list(malvol_left_1_w1 = c("1", "1", "4", "3", "4", 
"4", "1", "4", "4", "3", "1", "4", "4", "3", "4", "4", "5", "2", 
"4", "2"), malvol_left_2_w1 = c("1", "1", "4", "3", "4", "4", 
"1", "3", "4", "2", "2", "2", "4", "1", "5", "4", "5", "2", "4", 
"2"), malvol_right_1_w1 = c("1", "1", "4", "3", "4", "4", "1", 
"3", "4", "2", "1", "4", "4", "5", "5", "4", "2", "6", "4", "1"
), malvol_right_2_w1 = c("1", "1", "4", "3", "4", "4", "1", "3", 
"4", "2", "1", "2", "4", "5", "5", "4", "5", "5", "4", "5"), 
    malvol_left_1_w2 = c("1", "1", "3", "3", "4", "4", "1", "5", 
    "4", "4", "4", "2", "1", "4", "5", "4", "3", "2", "4", "4"
    ), malvol_left_2_w2 = c("1", "1", "3", "3", "4", "4", "7", 
    "5", "4", "2", "3", "1", "1", "4", "4", "4", "3", "4", "4", 
    "4"), malvol_right_1_w2 = c("1", "3", "3", "3", "4", "4", 
    "1", "4", "4", "3", "2", "2", "4", "1", "4", "4", "5", "5", 
    "4", "4"), malvol_right_2_w2 = c("1", "2", "3", "3", "4", 
    "4", "1", "2", "4", "2", "3", "2", "4", "1", "4", "4", "5", 
    "4", "4", "3"), leftright_w1 = c("right", "right", "left", 
    "right", "right", "right", "left", "right", "right", "left", 
    "left", "left", "left", "right", "left", "left", "right", 
    "right", "right", "left"), leftright_w2 = c("right", "right", 
    "left", "left", "right", "left", "left", "right", "right", 
    "left", "left", "left", "left", "right", "left", "left", 
    "right", "right", "left", "left")), class = "data.frame", row.names = c("12", 
"15", "69", "77", "95", "96", "112", "122", "150", "163", "184", 
"216", "221", "226", "240", "298", "305", "354", "370", "379"
))

Now I can do this in dplyr like:

dt <- dt %>% 
  mutate(
    malvol_1_w1 = case_when(
      leftright_w1 == "left" ~ malvol_right_1_w1, 
      leftright_w1 == "right" ~ malvol_left_1_w1),
    malvol_2_w1 = case_when(
      leftright_w1 == "left" ~ malvol_right_2_w1, 
      leftright_w1 == "right" ~ malvol_left_2_w1),
    malvol_1_w2 = case_when(
      leftright_w2 == "left" ~ malvol_right_1_w2, 
      leftright_w2 == "right" ~ malvol_left_1_w2),
    malvol_2_w2 = case_when(
      leftright_w2 == "left" ~ malvol_right_2_w2, 
      leftright_w2 == "right" ~ malvol_left_2_w2))

However, it's not really a feasible solution, because there will be more of both numbers defining a variable (e.g. both malvol_3_w1 and malvol_1_w3 will need to be created).

One solution is to this with a loop:

for (wave in 1:2) {
  for (var in 1:2) {
    dt[, paste0("malvol_", var, "_w", wave)] <- dt[, paste0("malvol_right_", var, "_w", wave)]
    dt[dt[[paste0("leftright_w", wave)]] == "right", paste0("malvol_", var, "_w", wave)] <- 
      dt[dt[[paste0("leftright_w", wave)]] == "right", paste0("malvol_left_", var, "_w", wave)]
  }
}

However, what is a tidyverse solution?


UPDATE:

I came up with a tidyverse solution myself, however, not every elegant. Still looking for more canonical solutions.

dt <- dt %>% 
  mutate(
    malvol_1_w1 = NA, malvol_2_w1 = NA, 
    malvol_1_w2 = NA, malvol_2_w2 = NA) %>%
  mutate(
    across(matches("malvol_\\d"), 
           ~ case_when(
             eval(parse(text = paste0("leftright_",  str_extract(cur_column(), "w.")))) == "left" ~ 
               eval(parse(text = paste0(str_split(cur_column(), "_\\d", simplify = T)[1],
                                        "_right", str_split(cur_column(), "malvol", simplify = T)[2]))),
             eval(parse(text = paste0("leftright_",  str_extract(cur_column(), "w.")))) == "right" ~ 
               eval(parse(text = paste0(str_split(cur_column(), "_\\d", simplify = T)[1],
                                        "_left", str_split(cur_column(), "malvol", simplify = T)[2]))))))

Upvotes: 2

Views: 150

Answers (1)

Till
Till

Reputation: 707

What makes your problem difficult is that a lot of information is hidden in variable names rather than data cells. Hence, you need some steps to transform your data into "tidy" format. In the code below, the crucial part is (1) to turn the variables [malvol]_[lr]_[num]_[w] into four separate columns malvol, lr, num, w (all prefixed with m_), and (2) from the variables leftright_[w] extract variable w (prefixed with l_) using the functions pivot_longer and than separate.

# Just adding a row_id to your data, for later joining
dt <- dt %>% mutate(id = row_number())

df <- dt %>% 
  # Tidy the column "malvol" 
  pivot_longer(cols = starts_with('malvol'), names_to = "m_var", values_to = "m_val") %>%
  separate(m_var, into = c("m_malvol", "m_lr", "m_num", "m_w")) %>%
  
  # They the column "leftright"
  pivot_longer(cols = starts_with('leftright'), names_to = 'l_var', values_to = 'l_lr') %>%
  separate(l_var, into = c(NA, "l_w")) %>%

  # Implement the logic
  filter(l_w == m_w) %>%
  filter(l_lr != m_lr) %>%
  
  # Pivot into original wide format
  select(-c(l_w, l_lr, m_lr)) %>%
  pivot_wider(names_from = c(m_malvol, m_num, m_w), values_from = m_val)

# Merging back results to original data   
dt <- dt %>% mutate(id = row_number()) %>% inner_join(df, by="id")

Although I pivoted the data back into your desired format in the end (to check whether results are in line with your desired results), I would suggest you leave the data in the long format, which is "tidy" and more easy to work with, compared to your "wide" format. So maybe skip the last pivot_wider operation.

Upvotes: 3

Related Questions