Reputation: 115
The Problem:
I would like to count the number of unique 5-player combinations n
, which meet the criteria described below, for each team using the following data.
The Data:
TEAM <- c("A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B")
PLAYER <- c("Will","Will","Roy","Roy","Jaylon","Dean","Yosef","Devan","Quincy","Quincy","Luis","Xzavier","Seth","Layne","Layne","Antwan")
LP <- c(1,1,2,2,3,4,5,6,1,1,2,3,4,5,5,6)
POS <- c("3B","OF","1B","OF","SS","OF","C","OF","2B","OF","OF","C","3B","1B","OF","SS")
df <- data.frame(TEAM,PLAYER,LP,POS)
df:
TEAM PLAYER LP POS
1 A Will 1 3B
2 A Will 1 OF
3 A Roy 2 1B
4 A Roy 3 OF
5 A Jaylon 3 SS
6 A Dean 4 OF
7 A Yosef 5 C
8 A Devan 6 OF
9 B Quincy 1 2B
10 B Quincy 1 OF
11 B Luis 2 OF
12 B Xzavier 3 C
13 B Seth 4 3B
14 B Layne 5 1B
15 B Layne 5 OF
16 B Antwan 6 SS
Edit: The LP
column is irrelevant to the output. That wasn't as clear as I would have liked it to be in the original post.
The Criteria:
PLAYER
must be used (one will always be left out, as there are six players in the pool available for each team).POS
may only be used once with the exception of OF
, which may be used up to three times OF <= 3
.PLAYER
from multiple teams TEAM
.For Example:
These are just a few of the many possible combinations I am looking to create/count:
TEAM 1 2 3 4 5
1 A Will-OF Roy-1B Jaylon-SS Dean-OF Devan-OF
2 A Roy-OF Jaylon-SS Dean-OF Yosef-C Devan-OF
3 A Will-3B Roy-OF Jaylon-SS Dean-OF Yosef-C
...
n A Will-3B Roy-1B Jaylon-SS Dean-OF Yosef-C
TEAM 1 2 3 4 5
1 B Quincy-2B Luis-OF Xzavier-C Seth-3B Layne-1B
2 B Quincy-2B Luis-OF Seth-3B Layne-1B Antwan-SS
3 B Quincy-OF Luis-OF Xzavier-C Seth-3B Layne-OF
...
n B Quincy-2B Luis-OF Xzavier-C Seth-3B Layne-OF
Desired Result:
TEAM UNIQUE
A n
B n
What I've Tried:
I know how to get all possible 5-player combinations for each team and summarise that. I'm just not sure how to get the combinations I'm looking for using the specific criteria as defined for their positions.
I wish I knew where to begin with this one. I could really use your help. Thank you!
Upvotes: 2
Views: 99
Reputation: 123
I worked on this through the morning and just got to my solution (only to see there is a more elegant one posted. But I offer this up to you anyways to share my thought process on how I arrived at the solution.
library(tidyverse)
TEAM <- c("A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B")
PLAYER <- c("Will","Will","Roy","Roy","Jaylon","Dean","Yosef","Devan","Quincy","Quincy","Luis","Xzavier","Seth","Layne","Layne","Antwan")
LP <- c(1,1,2,2,3,4,5,6,1,1,2,3,4,5,5,6)
POS <- c("3B","OF","1B","OF","SS","OF","C","OF","2B","OF","OF","C","3B","1B","OF","SS")
df <- data.frame(TEAM,PLAYER,LP,POS)
rm(TEAM, PLAYER, LP, POS)
# Each team has 6 players and I want to find the groups of 5 that are possible.
posible_player_combinations <- combn(1:6, 5) %>% as_tibble()
team = "A"
make_2nd_column <- function(first_stage, mydata_byteam, pcomp){
mydf <- mydata_byteam %>% filter(LP == pcomp[2])
col2_filter <- tibble(
col1LP = rep(first_stage$LP, each = nrow(mydf)),
col1POS = rep(first_stage$POS, each = nrow(mydf)))
helper <- tibble(
col2LP = rep(mydf$LP, nrow(first_stage)),
col2POS = rep(mydf$POS, nrow(first_stage))
)
col2_filter <- cbind(col2_filter, helper)
second_stage <- col2_filter %>% filter(col1POS != col2POS)
return(second_stage)
}
make_3rd_column <- function(second_stage, mydata_byteam, pcomp){
mydf <- mydata_byteam %>% filter(LP == pcomp[3])
col3_filter <- tibble(
col1LP = rep(second_stage$col1LP, each = nrow(mydf)),
col1POS = rep(second_stage$col1POS, each = nrow(mydf)),
col2LP = rep(second_stage$col2LP, each = nrow(mydf)),
col2POS = rep(second_stage$col2POS, each = nrow(mydf)))
helper <- tibble(
col3LP = rep(mydf$LP, nrow(second_stage)),
col3POS = rep(mydf$POS, nrow(second_stage))
)
col3_filter <- cbind(col3_filter, helper)
third_stage <- col3_filter %>% filter(col1POS != col2POS,
col2POS != col3POS,
col3POS != col1POS)
return(third_stage)
}
make_4th_column <- function(third_stage, mydata_byteam, pcomp){
mydf <- mydata_byteam %>% filter(LP == pcomp[4])
col4_filter <- tibble(
col1LP = rep(third_stage$col1LP, each = nrow(mydf)),
col1POS = rep(third_stage$col1POS, each = nrow(mydf)),
col2LP = rep(third_stage$col2LP, each = nrow(mydf)),
col2POS = rep(third_stage$col2POS, each = nrow(mydf)),
col3LP = rep(third_stage$col3LP, each = nrow(mydf)),
col3POS = rep(third_stage$col3POS, each = nrow(mydf)))
helper <- tibble(
col4LP = rep(mydf$LP, nrow(third_stage)),
col4POS = rep(mydf$POS, nrow(third_stage))
)
col4_filter <- cbind(col4_filter, helper)
fourth_stage <- col4_filter %>% filter(col1POS != col2POS,
col1POS != col3POS,
col1POS != col4POS,
col2POS != col3POS,
col2POS != col4POS,
col3POS != col4POS)
return(fourth_stage)
}
make_5th_column <- function(fourth_stage, mydata_byteam, pcomp){
mydf <- mydata_byteam %>% filter(LP == pcomp[5])
col5_filter <- tibble(
col1LP = rep(fourth_stage$col1LP, each = nrow(mydf)),
col1POS = rep(fourth_stage$col1POS, each = nrow(mydf)),
col2LP = rep(fourth_stage$col2LP, each = nrow(mydf)),
col2POS = rep(fourth_stage$col2POS, each = nrow(mydf)),
col3LP = rep(fourth_stage$col3LP, each = nrow(mydf)),
col3POS = rep(fourth_stage$col3POS, each = nrow(mydf)),
col4LP = rep(fourth_stage$col4LP, each = nrow(mydf)),
col4POS = rep(fourth_stage$col4POS, each = nrow(mydf)))
helper <- tibble(
col5LP = rep(mydf$LP, nrow(fourth_stage)),
col5POS = rep(mydf$POS, nrow(fourth_stage))
)
col5_filter <- cbind(col5_filter, helper)
final_stage_prefilter <- col5_filter %>% filter(
col1POS != col2POS,
col1POS != col3POS,
col1POS != col4POS,
col1POS != col5POS,
col2POS != col3POS,
col2POS != col4POS,
col2POS != col5POS,
col3POS != col4POS,
col3POS != col5POS,
col4POS != col5POS)
return(final_stage_prefilter)
}
make_final <- function(final_stage_prefilter){
final_stage_prefilter %>% mutate(
Player1 = paste(col1LP, str_remove_all(col1POS, "-.*")),
Player2 = paste(col2LP, str_remove_all(col2POS, "-.*")),
Player3 = paste(col3LP, str_remove_all(col3POS, "-.*")),
Player4 = paste(col4LP, str_remove_all(col4POS, "-.*")),
Player5 = paste(col5LP, str_remove_all(col5POS, "-.*"))
) %>% select(
11:15
) %>% distinct()
}
make_teams <- function(posible_player_combinations, mydata, k){
pcomp <- posible_player_combinations[k] %>% as_vector() %>% unname()
mydata_byteam <- mydata %>% filter(LP %in% pcomp)
first_stage <- mydata_byteam %>% filter(LP == pcomp[1])
second_stage <- make_2nd_column(first_stage, mydata_byteam, pcomp)
third_stage <- make_3rd_column(second_stage, mydata_byteam, pcomp)
fourth_stage <- make_4th_column(third_stage, mydata_byteam, pcomp)
final_stage_prefilter <- make_5th_column(fourth_stage, mydata_byteam, pcomp)
final_stage <- make_final(final_stage_prefilter)
return(final_stage)
}
make_all_combinations <- function(df, team, posible_player_combinations) {
mydata <- df %>% filter(TEAM == team) %>% select(LP, POS)
of_p <- mydata %>% filter(POS == "OF") %>% select(LP) %>% as_vector()
# I want to treat 3 possible "OF"s as separate positions
# so that that a later restirction on POS can occur.
# Later I will need to filter out non-unique results
# by separating the strings with "-" and dropping the letter.
of_df <- bind_rows(lapply(
seq_along(of_p),
function(x, k){
of_df <- tibble(
LP = rep(of_p[k], 3),
POS = c("OF-a", "OF-b", "OF-c")
)
},
x = of_p
))
mydata <- rbind(mydata %>% filter(POS != "OF"), of_df)
all_combinations <- bind_rows(lapply(
X = seq_along(posible_player_combinations),
FUN = make_teams,
posible_player_combinations = posible_player_combinations,
mydata = mydata
))
}
mydata_a <- make_all_combinations(df, "A", posible_player_combinations)
mydata_b <- make_all_combinations(df, "B", posible_player_combinations)
tail(mydata_a)
tail(mydata_b)
# > tail(mydata_a)
# Player1 Player2 Player3 Player4 Player5
# 13 1 3B 2 OF 4 OF 5 C 6 OF
# 14 1 OF 2 1B 4 OF 5 C 6 OF
# 15 1 3B 3 SS 4 OF 5 C 6 OF
# 16 1 OF 3 SS 4 OF 5 C 6 OF
# 17 2 1B 3 SS 4 OF 5 C 6 OF
# 18 2 OF 3 SS 4 OF 5 C 6 OF
# > tail(mydata_b)
# Player1 Player2 Player3 Player4 Player5
# 15 1 2B 3 C 4 3B 5 1B 6 SS
# 16 1 2B 3 C 4 3B 5 OF 6 SS
# 17 1 OF 3 C 4 3B 5 1B 6 SS
# 18 1 OF 3 C 4 3B 5 OF 6 SS
# 19 2 OF 3 C 4 3B 5 1B 6 SS
# 20 2 OF 3 C 4 3B 5 OF 6 SS
Upvotes: 0
Reputation: 107567
Consider several wrangling steps:
PLAYER
and POS
.by
to split data frame by teams and run operations on splits (Rule #3).combn
on PLAYER_POS
to choose 5 listings.ave
for running count of similar PLAYER
.Filter
to keep data frames of 5 rows, 5 unique players, and adheres to positions criteria (Rule #1 and #2).Base R code
# HELPER COLUMN
df$PLAYER_POS <- with(df, paste(PLAYER, POS, sep="_"))
# BUILD LIST OF DFs BY TEAM
df_list <- by(df, df$TEAM, function(sub){
combn(sub$PLAYER_POS, 5, FUN = function(p)
transform(subset(sub, PLAYER_POS %in% p),
PLAYER_NUM = ave(LP, PLAYER, FUN=seq_along)),
simplify = FALSE)
})
# FILTER LIST OF DFs BY TEAM
df_list <- lapply(df_list, function(dfs)
Filter(function(df)
nrow(df) == 5 &
max(df$PLAYER_NUM)==1 &
length(df$POS[df$POS == "OF"]) <= 3 &
length(df$POS[df$POS != "OF"]) == length(unique(df$POS[df$POS != "OF"])),
dfs)
)
# COUNT REMAINING DFs BY TEAM FOR UNIQUE n
lengths(df_list)
# A B
# 18 20
data.frame(TEAMS=names(df_list), UNIQUE=lengths(df_list), row.names=NULL)
# TEAMS UNIQUE
# 1 A 18
# 2 B 20
Output (list of subsetted data frames)
df_list$A[[1]]
# TEAM PLAYER LP POS PLAYER_POS PLAYER_NUM
# 1 A Will 1 3B Will_3B 1
# 3 A Roy 2 1B Roy_1B 1
# 5 A Jaylon 3 SS Jaylon_SS 1
# 6 A Dean 4 OF Dean_OF 1
# 7 A Yosef 5 C Yosef_C 1
df_list$A[[2]]
df_list$A[[3]]
...
df_list$A[[18]]
df_list$B[[1]]
# TEAM PLAYER LP POS PLAYER_POS PLAYER_NUM
# 9 B Quincy 1 2B Quincy_2B 1
# 11 B Luis 2 OF Luis_OF 1
# 12 B Xzavier 3 C Xzavier_C 1
# 13 B Seth 4 3B Seth_3B 1
# 14 B Layne 5 1B Layne_1B 1
df_list$B[[2]]
df_list$B[[3]]
...
df_list$B[[20]]
Upvotes: 3