Reputation: 33
I want to find all possible combinations (without replacement) of a big sparse matrix. Every combination can choose at most one time from each row and column. My goal is to find the combination that maximizes the sum of the chosen entries.
Say I have the following matrix:
6 8 . .
. 5 7 .
. 6 . 9
There are 4 possible combinations (in terms of i and j): [(1,1),(2,2),(3,4)],[(1,1),(2,3),(3,2)],[(1,2),(2,3),(3,2)],[(1,2),(2,3),(3,4)]
My outcome should be the sum of entries for each possible combination, where my final goal is to find the combination that maximizes this outcome ([(1,2),(2,3),(3,4)] = 8 + 7 + 9 = 24 in this example).
Edit: here is the full code that generates the sparse matrix of which I want to find the optimal combination:
library(data.table)
library(ggplot2)
library(haven)
library(Matrix)
library(evd)
set.seed(12345)
N1 <- 100
M <- 100
I1 <- 10
I2 <- 2
I <- I1 * I2
N <- N1 * I2
J <- 5
p_c_A = 0.02
p_c_B = 0.01
p_0 = 0.05
p_1 = 0.2
dt_workers<- data.table(worker_id = 1:N,
firm_id = sample.int(M, N, replace = TRUE),
worker_type = sample.int(I1, N, replace = TRUE))
dt_workers[, worker_ethnicity := 1 * (worker_id > N1)]
dt_firms <- data.table(firm_id = 1:M,
firm_type = sample(J) )
sys_util <- matrix(NA, nrow=I1, ncol=J)
for(i in 1:dim(sys_util)[1]){
for(j in 1:dim(sys_util)[2]){
sys_util[i,j] <- i * j}
}
joint_surplus
con_A <- matrix(data = runif(N1 * M), nrow = N1, ncol = M)
con_B <- matrix(data = runif(N1 * M), nrow = N1, ncol = M)
con_A <- 1 * (con_A < p_c_A)
con_B <- 1 * (con_B < p_c_B)
p_meet_A <- con_A * p_1 + (1 - con_A) * p_0
p_meet_B <- con_B * p_1 + (1 - con_B) * p_0
meet_A <- matrix(data = runif(N1 * M), nrow = N1, ncol = M)
meet_B <- matrix(data = runif(N1 * M), nrow = N1, ncol = M)
meet_A <- 1* ( meet_A < p_meet_A )
meet_B <- 1* ( meet_B < p_meet_B )
meet <- rbind(meet_A,meet_B)
meet_sparse <- Matrix(meet, sparse = TRUE)
util <- which (meet_sparse>0, arr.ind=T)
n_draws <- dim(util)[1]
mu = 0
sigma = 10
idio = rgumbel(n=n_draws, loc=mu, scale=sigma)
util <- cbind(util,idio)
sys <- vector()
for(k in 1:dim(util)[1]){
g <- util[k,1]
f <- util[k,2]
i <- dt_workers[g, worker_type]
j <- dt_firms[f, firm_type]
sys[k] = sys_util[i,j]
}
util <- cbind(util,sys)
total_util = util[,3] + util[,4]
M <- sparseMatrix(
i = util[,1],
j = util[,2],
x = total_util
)
dat <- as.data.frame(summary(M))
dat <-dat[order(dat$i, dat$j),]
rownames(dat) <- NULL
Upvotes: 1
Views: 226
Reputation: 33
I found a solution using linear programming with the help of Aurèle:
f.con <- matrix(,nrow = dim(dat)[1],ncol=0)
for(k in 1: N){
vec <- 1 * (dat[,1] == k)
f.con <- cbind(f.con , vec )
}
for(k in 1: M){
vec <- 1 * (dat[,2] == k)
f.con <- cbind(f.con , vec )
}
f.con <- t(f.con)
f.obj <- dat[,3]
f.dir <- rep ("<=", dim(f.con)[1])
f.rhs <- rep (1, dim(f.con)[1])
res = lp (direction = "max", f.obj, f.con, f.dir, f.rhs , all.int=TRUE)$solution
Upvotes: 0
Reputation: 12819
library(Matrix)
M <- sparseMatrix(
i = c(1, 1, 2, 2, 3, 3),
j = c(1, 2, 2, 3, 2, 4),
x = c(6, 8, 5, 7, 6, 9)
)
#> 3 x 4 sparse Matrix of class "dgCMatrix"
#>
#> [1,] 6 8 . .
#> [2,] . 5 7 .
#> [3,] . 6 . 9
dat <- as.data.frame(summary(M))
#> i j x
#> 1 1 1 6
#> 2 1 2 8
#> 3 2 2 5
#> 4 3 2 6
#> 5 2 3 7
#> 6 3 4 9
row_indices <- unique(dat$i)
col_indices <- split(dat$j, dat$i)
#> $`1`
#> [1] 1 2
#>
#> $`2`
#> [1] 2 3
#>
#> $`3`
#> [1] 2 4
all_combinations_with_atmost_one_per_row <- do.call(expand.grid, col_indices)
#> 1 2 3
#> 1 1 2 2
#> 2 2 2 2
#> 3 1 3 2
#> 4 2 3 2
#> 5 1 2 4
#> 6 2 2 4
#> 7 1 3 4
#> 8 2 3 4
more_than_one_per_col <- apply(all_combinations_with_atmost_one_per_row, MARGIN = 1, anyDuplicated)
#> [1] 3 2 0 3 0 2 0 0
combinations <- all_combinations_with_atmost_one_per_row[!more_than_one_per_col, , drop = FALSE]
#> 1 2 3
#> 3 1 3 2
#> 5 1 2 4
#> 7 1 3 4
#> 8 2 3 4
lapply(
split(combinations, 1:nrow(combinations)),
function(cols) {
elements <- data.frame(i = row_indices, j = unlist(cols))
elements$value <- M[as.matrix(elements)]
list(elements = elements, sum = sum(elements$value))
}
)
#> $`1`
#> $`1`$elements
#> i j value
#> 1 1 1 6
#> 2 2 3 7
#> 3 3 2 6
#>
#> $`1`$sum
#> [1] 19
#>
#>
#> $`2`
#> $`2`$elements
#> i j value
#> 1 1 1 6
#> 2 2 2 5
#> 3 3 4 9
#>
#> $`2`$sum
#> [1] 20
#>
#>
#> $`3`
#> $`3`$elements
#> i j value
#> 1 1 1 6
#> 2 2 3 7
#> 3 3 4 9
#>
#> $`3`$sum
#> [1] 22
#>
#>
#> $`4`
#> $`4`$elements
#> i j value
#> 1 1 2 8
#> 2 2 3 7
#> 3 3 4 9
#>
#> $`4`$sum
#> [1] 24
Created on 2019-04-10 by the reprex package (v0.2.1)
And the best combination is found with res[[which.max(sapply(res, `[[`, "sum"))]]
$elements
i j value
1 1 2 8
2 2 3 7
3 3 4 9
$sum
[1] 24
Upvotes: 0