esperaporque
esperaporque

Reputation: 55

Find matching values across columns

This is for a guessing game and these values will change. I am trying to find / write a function that will replace all those if_else statements to improve this code. So I don't have to copy and paste into the mutate.

library(dplyr)

df <- tribble(
  ~your_name,      ~who_did_you_get, ~barbara, ~brent,   ~emma,      ~joe,     ~liis,    ~shane,
      "liis",   "shane",  "brent", "emma",   "joe",   "shane", "barbara", "barbara",
   "barbara",   "brent",  "shane", "liis",   "joe",    "emma",   "brent", "barbara",
     "brent",    "emma",  "shane", "liis",   "joe",    "emma",   "brent", "barbara",
      "emma",     "joe",  "brent", "emma", "shane", "barbara", "barbara",    "emma",
       "joe",    "liis",  "shane", "liis",   "joe",    "emma",   "brent", "barbara",
     "shane", "barbara",   "emma", "emma",  "liis",   "shane", "barbara",     "joe"
  )

paste(df$key, collapse = ", ") # paste this output to make the following

df %>% 
  mutate(barbara_q = if_else(barbara == "brent"             , 1, 0),
           brent_q = if_else(brent   == "emma"              , 1, 0),
            emma_q = if_else(emma    == "joe"               , 1, 0),
             joe_q = if_else(joe     == "liis"              , 1, 0),
            liis_q = if_else(liis    == "shane"             , 1, 0),
           shane_q = if_else(shane   == "barbara"           , 1, 0),
         ) %>%   
   mutate(score = rowSums(select(., barbara_q:shane_q))) %>% 
  arrange(desc(score))

Upvotes: 2

Views: 381

Answers (3)

www
www

Reputation: 39154

Here is another tidyverse solution. We can use map2_dfc to loop through the your_name and who_did_you_get columns and use transmute to apply your original ifelse operation.

library(tidyverse)

df2 <- map2_dfc(df$your_name, df$who_did_you_get, function(x, y, dat = df){
  dat_temp <- dat %>%
    transmute("{x}_q" := if_else(.data[[x]] == y, 1, 0))
  return(dat_temp)
}) %>%
  bind_cols(df, .) %>%
  mutate(score = rowSums(select(., barbara_q:shane_q))) 
df2
# # A tibble: 6 x 15
#   your_name who_did_you_get barbara brent emma  joe     liis    shane   liis_q barbara_q brent_q emma_q joe_q shane_q score
#   <chr>     <chr>           <chr>   <chr> <chr> <chr>   <chr>   <chr>    <dbl>     <dbl>   <dbl>  <dbl> <dbl>   <dbl> <dbl>
# 1 liis      shane           brent   emma  joe   shane   barbara barbara      0         1       1      1     0       1     4
# 2 barbara   brent           shane   liis  joe   emma    brent   barbara      0         0       0      1     0       1     2
# 3 brent     emma            shane   liis  joe   emma    brent   barbara      0         0       0      1     0       1     2
# 4 emma      joe             brent   emma  shane barbara barbara emma         0         1       1      0     0       0     2
# 5 joe       liis            shane   liis  joe   emma    brent   barbara      0         0       0      1     0       1     2
# 6 shane     barbara         emma    emma  liis  shane   barbara joe          0         0       1      0     0       0     1

Upvotes: 1

akrun
akrun

Reputation: 886938

A base R approach is to subset the data based on 'your_name', do a comparison with the corresponding 'who_did_you_get', convert the logical to binary (+), create new columns in the data, and get the rowSums of those columns

lst1 <- Map(function(x, y) +(df[[x]] == y), df$your_name, df$who_did_you_get)
nm1 <- paste0(names(lst1), "_q")
df[nm1] <- lst1
df$score <- rowSums(df[nm1])
df <- df[order(-df$score),]
row.names(df) <- NULL

Or using tidyverse with map2

library(dplyr)
library(purrr)
library(stringr)
map2_dfc(df$your_name, df$who_did_you_get, ~
     df %>% 
       transmute( !! str_c(.x, "_q") := +(!!rlang::sym(.x) == .y))) %>%
   bind_cols(df, .)

-output

# A tibble: 6 x 14
#  your_name who_did_you_get barbara brent emma  joe     liis    shane   liis_q barbara_q brent_q emma_q joe_q shane_q
#  <chr>     <chr>           <chr>   <chr> <chr> <chr>   <chr>   <chr>    <int>     <int>   <int>  <int> <int>   <int>
#1 liis      shane           brent   emma  joe   shane   barbara barbara      0         1       1      1     0       1
#2 barbara   brent           shane   liis  joe   emma    brent   barbara      0         0       0      1     0       1
#3 brent     emma            shane   liis  joe   emma    brent   barbara      0         0       0      1     0       1
#4 emma      joe             brent   emma  shane barbara barbara emma         0         1       1      0     0       0
#5 joe       liis            shane   liis  joe   emma    brent   barbara      0         0       0      1     0       1
#6 shane     barbara         emma    emma  liis  shane   barbara joe          0         0       1      0     0       0

Upvotes: 2

tmfmnk
tmfmnk

Reputation: 39858

One dplyr option could be:

df %>%
 mutate(across(-c(1:2), list(q = ~ +(. == who_did_you_get[match(cur_column(), your_name)])))) %>%
 mutate(score = rowSums(select(., ends_with("_q")))) %>%
 arrange(desc(score))

  your_name who_did_you_get barbara brent emma  joe     liis    shane   barbara_q brent_q emma_q joe_q liis_q shane_q score
  <chr>     <chr>           <chr>   <chr> <chr> <chr>   <chr>   <chr>       <int>   <int>  <int> <int>  <int>   <int> <dbl>
1 liis      shane           brent   emma  joe   shane   barbara barbara         1       1      1     0      0       1     4
2 barbara   brent           shane   liis  joe   emma    brent   barbara         0       0      1     0      0       1     2
3 brent     emma            shane   liis  joe   emma    brent   barbara         0       0      1     0      0       1     2
4 emma      joe             brent   emma  shane barbara barbara emma            1       1      0     0      0       0     2
5 joe       liis            shane   liis  joe   emma    brent   barbara         0       0      1     0      0       1     2
6 shane     barbara         emma    emma  liis  shane   barbara joe             0       1      0     0      0       0     1

Upvotes: 2

Related Questions