Reputation: 645
I've been looking for examples of how to do what I'm looking for using either stringr::str_split
or tidyr::separate
. I'm not sure of the most efficient way to do what I'm looking for. I am basically looking to take the 2 columns listed in data
within my example below to create the 4 columns shown in my output_df
below.
For output_df
: The first 3 columns come from the base_out_state
column, and the 4th column is avg_re
in data
. All of the columns in output_df
should be numeric. The column names for the first 3 columns have the prefix on_
I'm guessing the solution will also use purrr
, but I haven't had any success.
data <- tibble::tribble(
~base_out_state, ~avg_re,
"0 outs, 1b 2b 3b", 2.53237410071942,
"0 outs, _ 2b 3b", 1.95045045045045,
"1 outs, 1b 2b 3b", 1.73913043478261,
"0 outs, 1b 2b _", 1.60282021151586,
"0 outs, 1b _ 3b", 1.59868421052632,
"1 outs, _ 2b 3b", 1.47916666666667,
"0 outs, _ _ 3b", 1.42028985507246,
"1 outs, 1b _ 3b", 1.27450980392157,
"0 outs, _ 2b _", 1.11675126903553,
"1 outs, _ _ 3b", 0.960416666666667,
"1 outs, 1b 2b _", 0.939353099730458,
"0 outs, 1b _ _", 0.925538103548575,
"2 outs, 1b 2b 3b", 0.740189445196211,
"1 outs, _ 2b _", 0.708523096942095,
"1 outs, 1b _ _", 0.568587968789328,
"2 outs, _ 2b 3b", 0.55668358714044,
"0 outs, _ _ _", 0.534048257372654,
"2 outs, 1b _ 3b", 0.53125,
"2 outs, 1b 2b _", 0.463123644251627,
"2 outs, _ _ 3b", 0.39,
"2 outs, _ 2b _", 0.324457593688363,
"1 outs, _ _ _", 0.286259541984733,
"2 outs, 1b _ _", 0.230750721847931,
"2 outs, _ _ _", 0.104665825977301
)
output_df<- tibble::tribble(
~outs_when_up, ~on_1b, ~on_2b, ~on_3b, ~avg_re,
0, 1, 1, 1, 2.53237410071942,
0, 0, 1, 1, 1.95045045045045,
1, 1, 1, 1, 1.73913043478261,
0, 1, 1, 0, 1.60282021151586,
0, 1, 0, 1, 1.59868421052632,
1, 0, 1, 1, 1.47916666666667,
0, 0, 0, 1, 1.42028985507246,
1, 1, 0, 1, 1.27450980392157,
0, 0, 1, 0, 1.11675126903553,
1, 0, 0, 1, 0.960416666666667,
1, 1, 1, 0, 0.939353099730458,
0, 1, 0, 0, 0.925538103548575,
2, 1, 1, 1, 0.740189445196211,
1, 0, 1, 0, 0.708523096942095,
1, 1, 0, 0, 0.568587968789328,
2, 0, 1, 1, 0.55668358714044,
0, 0, 0, 0, 0.534048257372654,
2, 1, 0, 1, 0.53125,
2, 1, 1, 0, 0.463123644251627,
2, 0, 0, 1, 0.39,
2, 0, 1, 0, 0.324457593688363,
1, 0, 0, 0, 0.286259541984733,
2, 1, 0, 0, 0.230750721847931,
2, 0, 0, 0, 0.104665825977301
)
Upvotes: 1
Views: 51
Reputation: 4708
splitstackshape
could work too.
library(splitstackshape)
library(tidyverse)
cSplit(data, splitCols = "base_out_state", sep = " ", direction = "wide", drop = FALSE) %>%
mutate(across(matches("state_3|state_4|state_5"), ~ifelse(.x == "_", 0, 1))) %>%
dplyr::select(-base_out_state, -base_out_state_2) %>%
rename(outs_when_up = base_out_state_1) %>%
rename_at(vars(matches("state")), list(~paste0("on_", c("1b", "2b", "3b"))))
# avg_re outs_when_up on_1b on_2b on_3b
# 1: 2.5323741 0 1 1 1
# 2: 1.9504505 0 0 1 1
# 3: 1.7391304 1 1 1 1
# 4: 1.6028202 0 1 1 0
# 5: 1.5986842 0 1 0 1
# 6: 1.4791667 1 0 1 1
# 7: 1.4202899 0 0 0 1
# 8: 1.2745098 1 1 0 1
# 9: 1.1167513 0 0 1 0
# 10: 0.9604167 1 0 0 1
# 11: 0.9393531 1 1 1 0
# 12: 0.9255381 0 1 0 0
# 13: 0.7401894 2 1 1 1
# 14: 0.7085231 1 0 1 0
# 15: 0.5685880 1 1 0 0
# 16: 0.5566836 2 0 1 1
# 17: 0.5340483 0 0 0 0
# 18: 0.5312500 2 1 0 1
# 19: 0.4631236 2 1 1 0
# 20: 0.3900000 2 0 0 1
# 21: 0.3244576 2 0 1 0
# 22: 0.2862595 1 0 0 0
# 23: 0.2307507 2 1 0 0
# 24: 0.1046658 2 0 0 0
Upvotes: 0
Reputation: 12470
How about:
library(tidyverse)
data %>%
separate(col = base_out_state, sep = "\\s+", into = c("outs_when_up", "outs", "on_1b", "on_2b", "on_3b")) %>%
select(-outs) %>%
mutate(across(starts_with("on"), .fns = ~ ifelse(.x == "_", 0L, 1L))) %>%
mutate(outs_when_up = as.integer(outs_when_up))
#> # A tibble: 24 x 5
#> outs_when_up on_1b on_2b on_3b avg_re
#> <int> <int> <int> <int> <dbl>
#> 1 0 1 1 1 2.53
#> 2 0 0 1 1 1.95
#> 3 1 1 1 1 1.74
#> 4 0 1 1 0 1.60
#> 5 0 1 0 1 1.60
#> 6 1 0 1 1 1.48
#> 7 0 0 0 1 1.42
#> 8 1 1 0 1 1.27
#> 9 0 0 1 0 1.12
#> 10 1 0 0 1 0.960
#> # … with 14 more rows
Created on 2021-02-24 by the reprex package (v1.0.0)
Upvotes: 2