Reputation: 1
I am using R to find the set of values that maximise a certain function (the function is given here by obj_fun
). The problem is that for a large number of the parameter pD
, I get an integer overflow error. I know this is caused by expand.grid
, since for any value of pD
, I will have pD
parameters which can take values $0, i+1, \dots, p_D$, where $i$ is just the "index" of each parameter (e.g. the 3rd parameter can take the values 0, 4, ... , $p_D$). I am basically storing all possible combinations of all the $p_D$ parameters in a list (that is ais_list
in the code) and I am using expand.grid
to get all their combinations and then I remove all the rows with a sum greater than $p_D$. However, this filtering process cannot be done for large $p_D$ values, as expand.grid
throws an integer overflow error due to the fact that the number of combinations is extremely large.
I am looking for a way to avoid the use of expand.grid
- something along the lines of looking whether each combination that could possibly be generated satisfies the condition that the sum of all parameters does not exceed $p_D$ and then appending it to the data frame of all combinations, denoted here by combs_mat
. This way I could avoid integer overflow issues for some larger $p_D$ values (just for the record, at the moment I am getting this issue when $p_D = 13$.
library(pracma)
# Function of interest
obj_fun <- function(pD, ai_coeffs){
fun_val <- ai_coeffs[1]
for (i in 2:pD){
ifelse(ai_coeffs[i]==0, add_val <- 0, add_val <- nchoosek(ai_coeffs[i], i)/(i^2))
fun_val <- fun_val + add_val
}
return(fun_val)
}
for (pD in c(3:25)){
# Generate a list of all possible coefficients ai's
# First element list is the one corresponding to unit length itemsets
ais_list <- list()
ais_list[[1]] <- c(0:pD)
for (i in 2:pD){
ais_list[[i-1]] <- c(0, c(i:pD))
}
# Get all possible combinations and make sure they do not exceed pD in sum
combs_mat <- expand.grid(ais_list[1:length(ais_list)])
combs_mat <- combs_mat[which(rowSums(combs_mat)<=pD),]
combs_mat <- cbind(data.frame('Var0'=pD-rowSums(combs_mat)), combs_mat)
# Apply the function to each row and store the results
res_vec <- c()
for (i in 1:nrow(combs_mat)){
res_vec <- c(res_vec, obj_fun(pD, as.numeric(combs_mat[i,])))
}
cat('pD =',pD,':\n')
print(combs_mat[which.max(res_vec),])
cat('\n')
}
Upvotes: 0
Views: 95
Reputation: 17001
A data.table
option:
for (pD in c(3:25)){
# Generate a list of all possible coefficients ai's
# First element list is the one corresponding to unit length itemsets
ais_list <- vector("list", pD - 1)
# ais_list[[1]] <- c(0:pD)
for (i in 2:pD){
ais_list[[i-1]] <- c(0, i:pD)
}
dt <- data.table(var0 = ais_list[[1]], var1 = ais_list[[1]])
for (i in 2:length(ais_list)) {
dt <- dt[rep(1:.N, each = length(ais_list[[i]]))][
, paste0("var", i) := rep(ais_list[[i]], length.out = .N)
]
dt <- dt[,var0 := var0 + dt[[i + 1]]][var0 <= pD]
}
dt[,var0 := pD - var0]
res_vec <- rowSums(choose(as.matrix(dt), col(dt))/col(dt)^2)
dt[which.max(res_vec)]
cat('pD =',pD,':\n')
print(dt[which.max(res_vec)])
cat('\n')
}
#> pD = 3 :
#> var0 var1 var2
#> 1: 3 0 0
#>
#> pD = 4 :
#> var0 var1 var2 var3
#> 1: 4 0 0 0
#>
#> pD = 5 :
#> var0 var1 var2 var3 var4
#> 1: 5 0 0 0 0
#>
#> pD = 6 :
#> var0 var1 var2 var3 var4 var5
#> 1: 6 0 0 0 0 0
#>
#> pD = 7 :
#> var0 var1 var2 var3 var4 var5 var6
#> 1: 7 0 0 0 0 0 0
#>
#> pD = 8 :
#> var0 var1 var2 var3 var4 var5 var6 var7
#> 1: 8 0 0 0 0 0 0 0
#>
#> pD = 9 :
#> var0 var1 var2 var3 var4 var5 var6 var7 var8
#> 1: 0 0 9 0 0 0 0 0 0
#>
#> pD = 10 :
#> var0 var1 var2 var3 var4 var5 var6 var7 var8 var9
#> 1: 0 0 10 0 0 0 0 0 0 0
#>
#> pD = 11 :
#> var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10
#> 1: 0 0 0 11 0 0 0 0 0 0 0
#>
#> pD = 12 :
#> var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11
#> 1: 0 0 0 0 12 0 0 0 0 0 0 0
#>
#> pD = 13 :
#> var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12
#> 1: 0 0 0 0 13 0 0 0 0 0 0 0 0
#>
#> pD = 14 :
#> var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1: 0 0 0 0 0 14 0 0 0 0 0 0 0 0
#>
#> pD = 15 :
#> var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1: 0 0 0 0 0 15 0 0 0 0 0 0 0 0
#> var14
#> 1: 0
#>
#> pD = 16 :
#> var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1: 0 0 0 0 0 0 16 0 0 0 0 0 0 0
#> var14 var15
#> 1: 0 0
#>
#> pD = 17 :
#> var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1: 0 0 0 0 0 0 17 0 0 0 0 0 0 0
#> var14 var15 var16
#> 1: 0 0 0
#>
#> pD = 18 :
#> var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1: 0 0 0 0 0 0 0 18 0 0 0 0 0 0
#> var14 var15 var16 var17
#> 1: 0 0 0 0
#>
#> pD = 19 :
#> var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1: 0 0 0 0 0 0 0 19 0 0 0 0 0 0
#> var14 var15 var16 var17 var18
#> 1: 0 0 0 0 0
#>
#> pD = 20 :
#> var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1: 0 0 0 0 0 0 0 0 20 0 0 0 0 0
#> var14 var15 var16 var17 var18 var19
#> 1: 0 0 0 0 0 0
#>
#> pD = 21 :
#> var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1: 0 0 0 0 0 0 0 0 21 0 0 0 0 0
#> var14 var15 var16 var17 var18 var19 var20
#> 1: 0 0 0 0 0 0 0
#>
#> pD = 22 :
#> var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1: 0 0 0 0 0 0 0 0 0 22 0 0 0 0
#> var14 var15 var16 var17 var18 var19 var20 var21
#> 1: 0 0 0 0 0 0 0 0
#>
#> pD = 23 :
#> var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1: 0 0 0 0 0 0 0 0 0 23 0 0 0 0
#> var14 var15 var16 var17 var18 var19 var20 var21 var22
#> 1: 0 0 0 0 0 0 0 0 0
#>
#> pD = 24 :
#> var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1: 0 0 0 0 0 0 0 0 0 0 24 0 0 0
#> var14 var15 var16 var17 var18 var19 var20 var21 var22 var23
#> 1: 0 0 0 0 0 0 0 0 0 0
#>
#> pD = 25 :
#> var0 var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13
#> 1: 0 0 0 0 0 0 0 0 0 0 25 0 0 0
#> var14 var15 var16 var17 var18 var19 var20 var21 var22 var23 var24
#> 1: 0 0 0 0 0 0 0 0 0 0 0
There appears to be a pattern. Compare to the original (up to pD = 12
).
library(pracma)
obj_fun <- function(pD, ai_coeffs){
fun_val <- ai_coeffs[1]
for (i in 2:pD){
ifelse(ai_coeffs[i]==0, add_val <- 0, add_val <- nchoosek(ai_coeffs[i], i)/(i^2))
fun_val <- fun_val + add_val
}
return(fun_val)
}
for (pD in c(3:12)){
# Generate a list of all possible coefficients ai's
# First element list is the one corresponding to unit length itemsets
ais_list <- list()
ais_list[[1]] <- c(0:pD)
for (i in 2:pD){
ais_list[[i-1]] <- c(0, c(i:pD))
}
# Get all possible combinations and make sure they do not exceed pD in sum
combs_mat <- expand.grid(ais_list[1:length(ais_list)])
combs_mat <- combs_mat[which(rowSums(combs_mat)<=pD),]
combs_mat <- cbind(data.frame('Var0'=pD-rowSums(combs_mat)), combs_mat)
# Apply the function to each row and store the results
res_vec <- c()
for (i in 1:nrow(combs_mat)){
res_vec <- c(res_vec, obj_fun(pD, as.numeric(combs_mat[i,])))
}
cat('pD =',pD,':\n')
print(combs_mat[which.max(res_vec),])
cat('\n')
}
#> pD = 3 :
#> Var0 Var1 Var2
#> 1 3 0 0
#>
#> pD = 4 :
#> Var0 Var1 Var2 Var3
#> 1 4 0 0 0
#>
#> pD = 5 :
#> Var0 Var1 Var2 Var3 Var4
#> 1 5 0 0 0 0
#>
#> pD = 6 :
#> Var0 Var1 Var2 Var3 Var4 Var5
#> 1 6 0 0 0 0 0
#>
#> pD = 7 :
#> Var0 Var1 Var2 Var3 Var4 Var5 Var6
#> 1 7 0 0 0 0 0 0
#>
#> pD = 8 :
#> Var0 Var1 Var2 Var3 Var4 Var5 Var6 Var7
#> 1 8 0 0 0 0 0 0 0
#>
#> pD = 9 :
#> Var0 Var1 Var2 Var3 Var4 Var5 Var6 Var7 Var8
#> 64 0 0 9 0 0 0 0 0 0
#>
#> pD = 10 :
#> Var0 Var1 Var2 Var3 Var4 Var5 Var6 Var7 Var8 Var9
#> 81 0 0 10 0 0 0 0 0 0 0
#>
#> pD = 11 :
#> Var0 Var1 Var2 Var3 Var4 Var5 Var6 Var7 Var8 Var9 Var10
#> 881 0 0 0 11 0 0 0 0 0 0 0
#> Error: cannot allocate vector of size 39.3 Gb
Upvotes: 0