Splitting/Separating a Character Column into Multiple Columns with specified names & formats

Question

I've been looking for examples of how to do what I'm looking for using either stringr::str_split or tidyr::separate. I'm not sure of the most efficient way to do what I'm looking for. I am basically looking to take the 2 columns listed in data within my example below to create the 4 columns shown in my output_df below.

For output_df: The first 3 columns come from the base_out_state column, and the 4th column is avg_re in data. All of the columns in output_df should be numeric. The column names for the first 3 columns have the prefix on_

I'm guessing the solution will also use purrr, but I haven't had any success.

data <- tibble::tribble(
       ~base_out_state,           ~avg_re,
  "0  outs,  1b 2b 3b",  2.53237410071942,
   "0  outs,  _ 2b 3b",  1.95045045045045,
  "1  outs,  1b 2b 3b",  1.73913043478261,
   "0  outs,  1b 2b _",  1.60282021151586,
   "0  outs,  1b _ 3b",  1.59868421052632,
   "1  outs,  _ 2b 3b",  1.47916666666667,
    "0  outs,  _ _ 3b",  1.42028985507246,
   "1  outs,  1b _ 3b",  1.27450980392157,
    "0  outs,  _ 2b _",  1.11675126903553,
    "1  outs,  _ _ 3b", 0.960416666666667,
   "1  outs,  1b 2b _", 0.939353099730458,
    "0  outs,  1b _ _", 0.925538103548575,
  "2  outs,  1b 2b 3b", 0.740189445196211,
    "1  outs,  _ 2b _", 0.708523096942095,
    "1  outs,  1b _ _", 0.568587968789328,
   "2  outs,  _ 2b 3b",  0.55668358714044,
     "0  outs,  _ _ _", 0.534048257372654,
   "2  outs,  1b _ 3b",           0.53125,
   "2  outs,  1b 2b _", 0.463123644251627,
    "2  outs,  _ _ 3b",              0.39,
    "2  outs,  _ 2b _", 0.324457593688363,
     "1  outs,  _ _ _", 0.286259541984733,
    "2  outs,  1b _ _", 0.230750721847931,
     "2  outs,  _ _ _", 0.104665825977301
  )



output_df<- tibble::tribble(
               ~outs_when_up, ~on_1b, ~on_2b, ~on_3b,           ~avg_re,
                           0,      1,      1,      1,  2.53237410071942,
                           0,      0,      1,      1,  1.95045045045045,
                           1,      1,      1,      1,  1.73913043478261,
                           0,      1,      1,      0,  1.60282021151586,
                           0,      1,      0,      1,  1.59868421052632,
                           1,      0,      1,      1,  1.47916666666667,
                           0,      0,      0,      1,  1.42028985507246,
                           1,      1,      0,      1,  1.27450980392157,
                           0,      0,      1,      0,  1.11675126903553,
                           1,      0,      0,      1, 0.960416666666667,
                           1,      1,      1,      0, 0.939353099730458,
                           0,      1,      0,      0, 0.925538103548575,
                           2,      1,      1,      1, 0.740189445196211,
                           1,      0,      1,      0, 0.708523096942095,
                           1,      1,      0,      0, 0.568587968789328,
                           2,      0,      1,      1,  0.55668358714044,
                           0,      0,      0,      0, 0.534048257372654,
                           2,      1,      0,      1,           0.53125,
                           2,      1,      1,      0, 0.463123644251627,
                           2,      0,      0,      1,              0.39,
                           2,      0,      1,      0, 0.324457593688363,
                           1,      0,      0,      0, 0.286259541984733,
                           2,      1,      0,      0, 0.230750721847931,
                           2,      0,      0,      0, 0.104665825977301
               )

JBGruber · Accepted Answer

How about:

library(tidyverse)
data %>% 
  separate(col = base_out_state, sep = "\s+", into = c("outs_when_up", "outs", "on_1b", "on_2b", "on_3b")) %>% 
  select(-outs) %>% 
  mutate(across(starts_with("on"), .fns = ~ ifelse(.x == "_", 0L, 1L))) %>% 
  mutate(outs_when_up = as.integer(outs_when_up))
#> # A tibble: 24 x 5
#>    outs_when_up on_1b on_2b on_3b avg_re
#>                
#>  1            0     1     1     1  2.53 
#>  2            0     0     1     1  1.95 
#>  3            1     1     1     1  1.74 
#>  4            0     1     1     0  1.60 
#>  5            0     1     0     1  1.60 
#>  6            1     0     1     1  1.48 
#>  7            0     0     0     1  1.42 
#>  8            1     1     0     1  1.27 
#>  9            0     0     1     0  1.12 
#> 10            1     0     0     1  0.960
#> # … with 14 more rows

^{Created on 2021-02-24 by the reprex package (v1.0.0)}

Splitting/Separating a Character Column into Multiple Columns with specified names & formats

Answers (2)

Related Questions

Splitting/Separating a Character Column into Multiple Columns with specified names &amp; formats

Answers (2)

Related Questions

Splitting/Separating a Character Column into Multiple Columns with specified names & formats