Eric_Alan
Eric_Alan

Reputation: 115

Count Unique Combinations Meeting Specific Criteria

The Problem:

I would like to count the number of unique 5-player combinations n, which meet the criteria described below, for each team using the following data.

The Data:

TEAM <- c("A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B")
PLAYER <- c("Will","Will","Roy","Roy","Jaylon","Dean","Yosef","Devan","Quincy","Quincy","Luis","Xzavier","Seth","Layne","Layne","Antwan")
LP <- c(1,1,2,2,3,4,5,6,1,1,2,3,4,5,5,6)
POS <- c("3B","OF","1B","OF","SS","OF","C","OF","2B","OF","OF","C","3B","1B","OF","SS")
df <- data.frame(TEAM,PLAYER,LP,POS)

df:

    TEAM  PLAYER   LP  POS
 1  A     Will     1   3B
 2  A     Will     1   OF
 3  A     Roy      2   1B
 4  A     Roy      3   OF
 5  A     Jaylon   3   SS
 6  A     Dean     4   OF
 7  A     Yosef    5   C
 8  A     Devan    6   OF
 9  B     Quincy   1   2B
10  B     Quincy   1   OF
11  B     Luis     2   OF
12  B     Xzavier  3   C
13  B     Seth     4   3B
14  B     Layne    5   1B
15  B     Layne    5   OF
16  B     Antwan   6   SS

Edit: The LP column is irrelevant to the output. That wasn't as clear as I would have liked it to be in the original post.

The Criteria:

  1. Five unique players PLAYER must be used (one will always be left out, as there are six players in the pool available for each team).
  2. Each position POS may only be used once with the exception of OF, which may be used up to three times OF <= 3.
  3. Combinations may not use players PLAYER from multiple teams TEAM.

For Example:

These are just a few of the many possible combinations I am looking to create/count:

   TEAM  1          2          3          4         5
1  A     Will-OF    Roy-1B     Jaylon-SS  Dean-OF   Devan-OF
2  A     Roy-OF     Jaylon-SS  Dean-OF    Yosef-C   Devan-OF
3  A     Will-3B    Roy-OF     Jaylon-SS  Dean-OF   Yosef-C
...
n  A     Will-3B    Roy-1B     Jaylon-SS  Dean-OF   Yosef-C       

   TEAM  1          2          3          4         5
1  B     Quincy-2B  Luis-OF    Xzavier-C  Seth-3B   Layne-1B
2  B     Quincy-2B  Luis-OF    Seth-3B    Layne-1B  Antwan-SS
3  B     Quincy-OF  Luis-OF    Xzavier-C  Seth-3B   Layne-OF
...
n  B     Quincy-2B  Luis-OF    Xzavier-C  Seth-3B   Layne-OF  

Desired Result:

TEAM  UNIQUE
A     n
B     n

What I've Tried:

I know how to get all possible 5-player combinations for each team and summarise that. I'm just not sure how to get the combinations I'm looking for using the specific criteria as defined for their positions.

I wish I knew where to begin with this one. I could really use your help. Thank you!

Upvotes: 2

Views: 99

Answers (2)

Bryan Wammack
Bryan Wammack

Reputation: 123

A messier solution,

I worked on this through the morning and just got to my solution (only to see there is a more elegant one posted. But I offer this up to you anyways to share my thought process on how I arrived at the solution.

        library(tidyverse)
    
    TEAM <- c("A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B")
    PLAYER <- c("Will","Will","Roy","Roy","Jaylon","Dean","Yosef","Devan","Quincy","Quincy","Luis","Xzavier","Seth","Layne","Layne","Antwan")
    LP <- c(1,1,2,2,3,4,5,6,1,1,2,3,4,5,5,6)
    POS <- c("3B","OF","1B","OF","SS","OF","C","OF","2B","OF","OF","C","3B","1B","OF","SS")
    df <- data.frame(TEAM,PLAYER,LP,POS)
    rm(TEAM, PLAYER, LP, POS)
    
    # Each team has 6 players and I want to find the groups of 5 that are possible.
    posible_player_combinations <- combn(1:6, 5) %>% as_tibble() 
    team = "A"
    
    make_2nd_column <- function(first_stage, mydata_byteam, pcomp){
      mydf <- mydata_byteam %>% filter(LP == pcomp[2])
      col2_filter <- tibble(
        col1LP =  rep(first_stage$LP, each = nrow(mydf)),
        col1POS = rep(first_stage$POS, each = nrow(mydf)))
      helper <- tibble(
        col2LP = rep(mydf$LP, nrow(first_stage)),
        col2POS = rep(mydf$POS, nrow(first_stage))
      )
      col2_filter <- cbind(col2_filter, helper)
      second_stage <- col2_filter %>% filter(col1POS != col2POS)
      return(second_stage)
    }
    make_3rd_column <- function(second_stage, mydata_byteam, pcomp){
      mydf <- mydata_byteam %>% filter(LP == pcomp[3])
      col3_filter <- tibble(
        col1LP =  rep(second_stage$col1LP, each = nrow(mydf)),
        col1POS = rep(second_stage$col1POS, each = nrow(mydf)),
        col2LP =  rep(second_stage$col2LP, each = nrow(mydf)),
        col2POS = rep(second_stage$col2POS, each = nrow(mydf)))
      helper <- tibble(
        col3LP = rep(mydf$LP, nrow(second_stage)),
        col3POS = rep(mydf$POS, nrow(second_stage))
      )
      col3_filter <- cbind(col3_filter, helper)
      third_stage <- col3_filter %>% filter(col1POS != col2POS,
                                            col2POS != col3POS,
                                            col3POS != col1POS)
      return(third_stage)
    }
    make_4th_column <- function(third_stage, mydata_byteam, pcomp){
      mydf <- mydata_byteam %>% filter(LP == pcomp[4])
      col4_filter <- tibble(
        col1LP =  rep(third_stage$col1LP, each = nrow(mydf)),
        col1POS = rep(third_stage$col1POS, each = nrow(mydf)),
        col2LP =  rep(third_stage$col2LP, each = nrow(mydf)),
        col2POS = rep(third_stage$col2POS, each = nrow(mydf)),
        col3LP =  rep(third_stage$col3LP, each = nrow(mydf)),
        col3POS = rep(third_stage$col3POS, each = nrow(mydf)))
      helper <- tibble(
        col4LP = rep(mydf$LP, nrow(third_stage)),
        col4POS = rep(mydf$POS, nrow(third_stage))
      )
      col4_filter <- cbind(col4_filter, helper)
      fourth_stage <- col4_filter %>% filter(col1POS != col2POS,
                                             col1POS != col3POS,
                                             col1POS != col4POS,
                                             col2POS != col3POS,
                                             col2POS != col4POS,
                                             col3POS != col4POS)
      return(fourth_stage)
    }
    make_5th_column <- function(fourth_stage, mydata_byteam, pcomp){
      mydf <- mydata_byteam %>% filter(LP == pcomp[5])
      col5_filter <- tibble(
        col1LP =  rep(fourth_stage$col1LP, each = nrow(mydf)),
        col1POS = rep(fourth_stage$col1POS, each = nrow(mydf)),
        col2LP =  rep(fourth_stage$col2LP, each = nrow(mydf)),
        col2POS = rep(fourth_stage$col2POS, each = nrow(mydf)),
        col3LP =  rep(fourth_stage$col3LP, each = nrow(mydf)),
        col3POS = rep(fourth_stage$col3POS, each = nrow(mydf)),
        col4LP =  rep(fourth_stage$col4LP, each = nrow(mydf)),
        col4POS = rep(fourth_stage$col4POS, each = nrow(mydf)))
      helper <- tibble(
        col5LP = rep(mydf$LP, nrow(fourth_stage)),
        col5POS = rep(mydf$POS, nrow(fourth_stage))
      )
      col5_filter <- cbind(col5_filter, helper)
      final_stage_prefilter <- col5_filter %>% filter(
        col1POS != col2POS,
        col1POS != col3POS,
        col1POS != col4POS,
        col1POS != col5POS,
        col2POS != col3POS,
        col2POS != col4POS,
        col2POS != col5POS,
        col3POS != col4POS,
        col3POS != col5POS,
        col4POS != col5POS)
      return(final_stage_prefilter)
    }
    make_final <- function(final_stage_prefilter){
      final_stage_prefilter %>% mutate(
        Player1 = paste(col1LP, str_remove_all(col1POS, "-.*")),
        Player2 = paste(col2LP, str_remove_all(col2POS, "-.*")),
        Player3 = paste(col3LP, str_remove_all(col3POS, "-.*")),
        Player4 = paste(col4LP, str_remove_all(col4POS, "-.*")),
        Player5 = paste(col5LP, str_remove_all(col5POS, "-.*"))
      ) %>% select(
        11:15
      ) %>% distinct()
    }
    
    make_teams <- function(posible_player_combinations, mydata, k){
      pcomp  <- posible_player_combinations[k] %>% as_vector() %>% unname()
      mydata_byteam <- mydata %>% filter(LP %in% pcomp)
      first_stage            <- mydata_byteam %>% filter(LP == pcomp[1])
      second_stage           <- make_2nd_column(first_stage, mydata_byteam, pcomp)
      third_stage            <- make_3rd_column(second_stage, mydata_byteam, pcomp)
      fourth_stage           <- make_4th_column(third_stage, mydata_byteam, pcomp)
      final_stage_prefilter  <- make_5th_column(fourth_stage, mydata_byteam, pcomp)
      final_stage            <- make_final(final_stage_prefilter)
      return(final_stage)
    }
    
    
    make_all_combinations <- function(df, team, posible_player_combinations) {
      mydata <- df %>% filter(TEAM == team) %>% select(LP, POS)
      of_p <- mydata %>% filter(POS == "OF") %>% select(LP) %>% as_vector()
      # I want to treat 3 possible "OF"s as separate positions
      # so that that a later restirction on POS can occur.
      # Later I will need to filter out non-unique results
      # by separating the strings with "-" and dropping the letter.
      of_df <- bind_rows(lapply(
        seq_along(of_p),
        function(x, k){
          of_df <- tibble(
            LP = rep(of_p[k], 3),
            POS = c("OF-a", "OF-b", "OF-c")
          )
        },
        x = of_p
      ))
      mydata <- rbind(mydata %>% filter(POS != "OF"), of_df)
      all_combinations <- bind_rows(lapply(
        X = seq_along(posible_player_combinations),
        FUN = make_teams,
        posible_player_combinations = posible_player_combinations,
        mydata = mydata
      ))
    }
mydata_a <- make_all_combinations(df, "A", posible_player_combinations)
mydata_b <- make_all_combinations(df, "B", posible_player_combinations)

tail(mydata_a)
tail(mydata_b)

# > tail(mydata_a)
#      Player1 Player2 Player3 Player4 Player5
# 13    1 3B    2 OF    4 OF     5 C    6 OF
# 14    1 OF    2 1B    4 OF     5 C    6 OF
# 15    1 3B    3 SS    4 OF     5 C    6 OF
# 16    1 OF    3 SS    4 OF     5 C    6 OF
# 17    2 1B    3 SS    4 OF     5 C    6 OF
# 18    2 OF    3 SS    4 OF     5 C    6 OF
# > tail(mydata_b)
#      Player1 Player2 Player3 Player4 Player5
# 15    1 2B     3 C    4 3B    5 1B    6 SS
# 16    1 2B     3 C    4 3B    5 OF    6 SS
# 17    1 OF     3 C    4 3B    5 1B    6 SS
# 18    1 OF     3 C    4 3B    5 OF    6 SS
# 19    2 OF     3 C    4 3B    5 1B    6 SS
# 20    2 OF     3 C    4 3B    5 OF    6 SS

Upvotes: 0

Parfait
Parfait

Reputation: 107567

Consider several wrangling steps:

  1. Assign new column as concatenation of PLAYER and POS.
  2. Run by to split data frame by teams and run operations on splits (Rule #3).
  3. Run combn on PLAYER_POS to choose 5 listings.
  4. Run ave for running count of similar PLAYER.
  5. Run Filter to keep data frames of 5 rows, 5 unique players, and adheres to positions criteria (Rule #1 and #2).

Base R code

# HELPER COLUMN
df$PLAYER_POS <- with(df, paste(PLAYER, POS, sep="_"))

# BUILD LIST OF DFs BY TEAM
df_list <- by(df, df$TEAM, function(sub){
  combn(sub$PLAYER_POS, 5, FUN = function(p) 
    transform(subset(sub, PLAYER_POS %in% p),
              PLAYER_NUM = ave(LP, PLAYER, FUN=seq_along)), 
    simplify = FALSE)
})
  
# FILTER LIST OF DFs BY TEAM
df_list <- lapply(df_list, function(dfs) 
  Filter(function(df) 
           nrow(df) == 5 & 
           max(df$PLAYER_NUM)==1 &
           length(df$POS[df$POS == "OF"]) <= 3 &
           length(df$POS[df$POS != "OF"]) == length(unique(df$POS[df$POS != "OF"])), 
         dfs)
)

# COUNT REMAINING DFs BY TEAM FOR UNIQUE n
lengths(df_list)
#  A  B 
# 18 20 

data.frame(TEAMS=names(df_list), UNIQUE=lengths(df_list), row.names=NULL)
#   TEAMS UNIQUE
# 1     A     18
# 2     B     20

Output (list of subsetted data frames)

df_list$A[[1]]
#   TEAM PLAYER LP POS PLAYER_POS PLAYER_NUM
# 1    A   Will  1  3B    Will_3B          1
# 3    A    Roy  2  1B     Roy_1B          1
# 5    A Jaylon  3  SS  Jaylon_SS          1
# 6    A   Dean  4  OF    Dean_OF          1
# 7    A  Yosef  5   C    Yosef_C          1
df_list$A[[2]]
df_list$A[[3]]
...
df_list$A[[18]]


df_list$B[[1]]
#    TEAM  PLAYER LP POS PLAYER_POS PLAYER_NUM
# 9     B  Quincy  1  2B  Quincy_2B          1
# 11    B    Luis  2  OF    Luis_OF          1
# 12    B Xzavier  3   C  Xzavier_C          1
# 13    B    Seth  4  3B    Seth_3B          1
# 14    B   Layne  5  1B   Layne_1B          1
df_list$B[[2]]
df_list$B[[3]]
...
df_list$B[[20]]

Upvotes: 3

Related Questions