HeyCool08
HeyCool08

Reputation: 1

Create all combinations satisfying a sum condition without expand.grid

I am using R to find the set of values that maximise a certain function (the function is given here by obj_fun). The problem is that for a large number of the parameter pD, I get an integer overflow error. I know this is caused by expand.grid, since for any value of pD, I will have pD parameters which can take values $0, i+1, \dots, p_D$, where $i$ is just the "index" of each parameter (e.g. the 3rd parameter can take the values 0, 4, ... , $p_D$). I am basically storing all possible combinations of all the $p_D$ parameters in a list (that is ais_list in the code) and I am using expand.grid to get all their combinations and then I remove all the rows with a sum greater than $p_D$. However, this filtering process cannot be done for large $p_D$ values, as expand.grid throws an integer overflow error due to the fact that the number of combinations is extremely large.

I am looking for a way to avoid the use of expand.grid - something along the lines of looking whether each combination that could possibly be generated satisfies the condition that the sum of all parameters does not exceed $p_D$ and then appending it to the data frame of all combinations, denoted here by combs_mat. This way I could avoid integer overflow issues for some larger $p_D$ values (just for the record, at the moment I am getting this issue when $p_D = 13$.

library(pracma)

# Function of interest
obj_fun <- function(pD, ai_coeffs){
  fun_val <- ai_coeffs[1]
  for (i in 2:pD){
    ifelse(ai_coeffs[i]==0, add_val <- 0, add_val <- nchoosek(ai_coeffs[i], i)/(i^2))
    fun_val <- fun_val + add_val
  }
  return(fun_val)
}

for (pD in c(3:25)){
  
  # Generate a list of all possible coefficients ai's
  # First element list is the one corresponding to unit length itemsets
  ais_list <- list()
  ais_list[[1]] <- c(0:pD)
  for (i in 2:pD){
    ais_list[[i-1]] <- c(0, c(i:pD))
  }
  
  # Get all possible combinations and make sure they do not exceed pD in sum
  combs_mat <- expand.grid(ais_list[1:length(ais_list)])
  combs_mat <- combs_mat[which(rowSums(combs_mat)<=pD),]
  combs_mat <- cbind(data.frame('Var0'=pD-rowSums(combs_mat)), combs_mat)
  
  # Apply the function to each row and store the results
  res_vec <- c()
  for (i in 1:nrow(combs_mat)){
    res_vec <- c(res_vec, obj_fun(pD, as.numeric(combs_mat[i,])))
  }
  cat('pD =',pD,':\n')
  print(combs_mat[which.max(res_vec),])
  cat('\n')
}

Upvotes: 0

Views: 95

Answers (1)

jblood94
jblood94

Reputation: 17001

A data.table option:

for (pD in c(3:25)){
  # Generate a list of all possible coefficients ai's
  # First element list is the one corresponding to unit length itemsets
  ais_list <- vector("list", pD - 1)
  # ais_list[[1]] <- c(0:pD)
  for (i in 2:pD){
    ais_list[[i-1]] <- c(0, i:pD)
  }
  
  dt <- data.table(var0 = ais_list[[1]], var1 = ais_list[[1]])
  
  for (i in 2:length(ais_list)) {
    dt <- dt[rep(1:.N, each = length(ais_list[[i]]))][
      , paste0("var", i) := rep(ais_list[[i]], length.out = .N)
    ]
    dt <- dt[,var0 := var0 + dt[[i + 1]]][var0 <= pD]
  }
  
  dt[,var0 := pD - var0]
  res_vec <- rowSums(choose(as.matrix(dt), col(dt))/col(dt)^2)
  dt[which.max(res_vec)]
  
  cat('pD =',pD,':\n')
  print(dt[which.max(res_vec)])
  cat('\n')
}
#> pD = 3 :
#>    var0 var1 var2
#> 1:    3    0    0
#> 
#> pD = 4 :
#>    var0 var1 var2 var3
#> 1:    4    0    0    0
#> 
#> pD = 5 :
#>    var0 var1 var2 var3 var4
#> 1:    5    0    0    0    0
#> 
#> pD = 6 :
#>    var0 var1 var2 var3 var4 var5
#> 1:    6    0    0    0    0    0
#> 
#> pD = 7 :
#>    var0 var1 var2 var3 var4 var5 var6
#> 1:    7    0    0    0    0    0    0
#> 
#> pD = 8 :
#>    var0 var1 var2 var3 var4 var5 var6 var7
#> 1:    8    0    0    0    0    0    0    0
#> 
#> pD = 9 :
#>    var0 var1 var2 var3 var4 var5 var6 var7 var8
#> 1:    0    0    9    0    0    0    0    0    0
#> 
#> pD = 10 :
#>    var0 var1 var2 var3 var4 var5 var6 var7 var8 var9
#> 1:    0    0   10    0    0    0    0    0    0    0
#> 
#> pD = 11 :
#>    var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10
#> 1:    0    0    0   11    0    0    0    0    0    0     0
#> 
#> pD = 12 :
#>    var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11
#> 1:    0    0    0    0   12    0    0    0    0    0     0     0
#> 
#> pD = 13 :
#>    var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12
#> 1:    0    0    0    0   13    0    0    0    0    0     0     0     0
#> 
#> pD = 14 :
#>    var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1:    0    0    0    0    0   14    0    0    0    0     0     0     0     0
#> 
#> pD = 15 :
#>    var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1:    0    0    0    0    0   15    0    0    0    0     0     0     0     0
#>    var14
#> 1:     0
#> 
#> pD = 16 :
#>    var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1:    0    0    0    0    0    0   16    0    0    0     0     0     0     0
#>    var14 var15
#> 1:     0     0
#> 
#> pD = 17 :
#>    var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1:    0    0    0    0    0    0   17    0    0    0     0     0     0     0
#>    var14 var15 var16
#> 1:     0     0     0
#> 
#> pD = 18 :
#>    var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1:    0    0    0    0    0    0    0   18    0    0     0     0     0     0
#>    var14 var15 var16 var17
#> 1:     0     0     0     0
#> 
#> pD = 19 :
#>    var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1:    0    0    0    0    0    0    0   19    0    0     0     0     0     0
#>    var14 var15 var16 var17 var18
#> 1:     0     0     0     0     0
#> 
#> pD = 20 :
#>    var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1:    0    0    0    0    0    0    0    0   20    0     0     0     0     0
#>    var14 var15 var16 var17 var18 var19
#> 1:     0     0     0     0     0     0
#> 
#> pD = 21 :
#>    var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1:    0    0    0    0    0    0    0    0   21    0     0     0     0     0
#>    var14 var15 var16 var17 var18 var19 var20
#> 1:     0     0     0     0     0     0     0
#> 
#> pD = 22 :
#>    var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1:    0    0    0    0    0    0    0    0    0   22     0     0     0     0
#>    var14 var15 var16 var17 var18 var19 var20 var21
#> 1:     0     0     0     0     0     0     0     0
#> 
#> pD = 23 :
#>    var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1:    0    0    0    0    0    0    0    0    0   23     0     0     0     0
#>    var14 var15 var16 var17 var18 var19 var20 var21 var22
#> 1:     0     0     0     0     0     0     0     0     0
#> 
#> pD = 24 :
#>    var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1:    0    0    0    0    0    0    0    0    0    0    24     0     0     0
#>    var14 var15 var16 var17 var18 var19 var20 var21 var22 var23
#> 1:     0     0     0     0     0     0     0     0     0     0
#> 
#> pD = 25 :
#>    var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1:    0    0    0    0    0    0    0    0    0    0    25     0     0     0
#>    var14 var15 var16 var17 var18 var19 var20 var21 var22 var23 var24
#> 1:     0     0     0     0     0     0     0     0     0     0     0

There appears to be a pattern. Compare to the original (up to pD = 12).

library(pracma)

obj_fun <- function(pD, ai_coeffs){
  fun_val <- ai_coeffs[1]
  for (i in 2:pD){
    ifelse(ai_coeffs[i]==0, add_val <- 0, add_val <- nchoosek(ai_coeffs[i], i)/(i^2))
    fun_val <- fun_val + add_val
  }
  return(fun_val)
}

for (pD in c(3:12)){
  
  # Generate a list of all possible coefficients ai's
  # First element list is the one corresponding to unit length itemsets
  ais_list <- list()
  ais_list[[1]] <- c(0:pD)
  for (i in 2:pD){
    ais_list[[i-1]] <- c(0, c(i:pD))
  }
  
  # Get all possible combinations and make sure they do not exceed pD in sum
  combs_mat <- expand.grid(ais_list[1:length(ais_list)])
  combs_mat <- combs_mat[which(rowSums(combs_mat)<=pD),]
  combs_mat <- cbind(data.frame('Var0'=pD-rowSums(combs_mat)), combs_mat)
  
  # Apply the function to each row and store the results
  res_vec <- c()
  for (i in 1:nrow(combs_mat)){
    res_vec <- c(res_vec, obj_fun(pD, as.numeric(combs_mat[i,])))
  }
  cat('pD =',pD,':\n')
  print(combs_mat[which.max(res_vec),])
  cat('\n')
}
#> pD = 3 :
#>   Var0 Var1 Var2
#> 1    3    0    0
#> 
#> pD = 4 :
#>   Var0 Var1 Var2 Var3
#> 1    4    0    0    0
#> 
#> pD = 5 :
#>   Var0 Var1 Var2 Var3 Var4
#> 1    5    0    0    0    0
#> 
#> pD = 6 :
#>   Var0 Var1 Var2 Var3 Var4 Var5
#> 1    6    0    0    0    0    0
#> 
#> pD = 7 :
#>   Var0 Var1 Var2 Var3 Var4 Var5 Var6
#> 1    7    0    0    0    0    0    0
#> 
#> pD = 8 :
#>   Var0 Var1 Var2 Var3 Var4 Var5 Var6 Var7
#> 1    8    0    0    0    0    0    0    0
#> 
#> pD = 9 :
#>    Var0 Var1 Var2 Var3 Var4 Var5 Var6 Var7 Var8
#> 64    0    0    9    0    0    0    0    0    0
#> 
#> pD = 10 :
#>    Var0 Var1 Var2 Var3 Var4 Var5 Var6 Var7 Var8 Var9
#> 81    0    0   10    0    0    0    0    0    0    0
#> 
#> pD = 11 :
#>     Var0 Var1 Var2 Var3 Var4 Var5 Var6 Var7 Var8 Var9 Var10
#> 881    0    0    0   11    0    0    0    0    0    0     0
#> Error: cannot allocate vector of size 39.3 Gb

Upvotes: 0

Related Questions