Muly
Muly

Reputation: 33

R: Find all combinations without replacement of a sparse matrix

I want to find all possible combinations (without replacement) of a big sparse matrix. Every combination can choose at most one time from each row and column. My goal is to find the combination that maximizes the sum of the chosen entries.

Say I have the following matrix:

6 8 . .
. 5 7 .
. 6 . 9

There are 4 possible combinations (in terms of i and j): [(1,1),(2,2),(3,4)],[(1,1),(2,3),(3,2)],[(1,2),(2,3),(3,2)],[(1,2),(2,3),(3,4)]

My outcome should be the sum of entries for each possible combination, where my final goal is to find the combination that maximizes this outcome ([(1,2),(2,3),(3,4)] = 8 + 7 + 9 = 24 in this example).

Edit: here is the full code that generates the sparse matrix of which I want to find the optimal combination:

library(data.table)
library(ggplot2)
library(haven)
library(Matrix)
library(evd) 

set.seed(12345)

N1 <- 100
M <- 100
I1 <- 10
I2 <- 2
I <- I1 * I2
N <- N1 * I2
J <- 5
p_c_A = 0.02
p_c_B = 0.01
p_0 = 0.05
p_1 = 0.2

dt_workers<- data.table(worker_id = 1:N, 
                           firm_id = sample.int(M, N, replace = TRUE),
                           worker_type = sample.int(I1, N, replace = TRUE)) 

dt_workers[, worker_ethnicity := 1 * (worker_id > N1)]

dt_firms <- data.table(firm_id = 1:M, 
                         firm_type = sample(J) )


sys_util <- matrix(NA, nrow=I1, ncol=J)
for(i in 1:dim(sys_util)[1]){
  for(j in 1:dim(sys_util)[2]){
    sys_util[i,j] <- i * j}
}


joint_surplus

con_A <- matrix(data = runif(N1 * M), nrow = N1, ncol = M)
con_B <- matrix(data = runif(N1 * M), nrow = N1, ncol = M)

con_A <- 1 * (con_A < p_c_A)
con_B <- 1 * (con_B < p_c_B)

p_meet_A <- con_A * p_1 + (1 - con_A) * p_0
p_meet_B <- con_B * p_1 + (1 - con_B) * p_0

meet_A <- matrix(data = runif(N1 * M), nrow = N1, ncol = M)
meet_B <- matrix(data = runif(N1 * M), nrow = N1, ncol = M)

meet_A <- 1* ( meet_A < p_meet_A )
meet_B <- 1* ( meet_B < p_meet_B )

meet <- rbind(meet_A,meet_B)

meet_sparse <- Matrix(meet, sparse = TRUE)
util <- which (meet_sparse>0, arr.ind=T)

n_draws <- dim(util)[1]


mu = 0
sigma = 10
idio = rgumbel(n=n_draws, loc=mu, scale=sigma)

util <- cbind(util,idio)
sys <- vector()
for(k in 1:dim(util)[1]){
  g <- util[k,1]
  f <- util[k,2]
  i <- dt_workers[g, worker_type]
  j <- dt_firms[f, firm_type]
  sys[k] = sys_util[i,j]
}
util <- cbind(util,sys)

total_util = util[,3] + util[,4]

M <- sparseMatrix(
  i = util[,1],
  j = util[,2],
  x = total_util
)
dat <- as.data.frame(summary(M))
dat <-dat[order(dat$i, dat$j),]
rownames(dat) <- NULL

Upvotes: 1

Views: 226

Answers (2)

Muly
Muly

Reputation: 33

I found a solution using linear programming with the help of Aurèle:

f.con <- matrix(,nrow = dim(dat)[1],ncol=0)
for(k in 1: N){   
    vec <- 1 * (dat[,1] == k)
    f.con <- cbind(f.con , vec )
}

for(k in 1: M){   
  vec <- 1 * (dat[,2] == k)
  f.con <- cbind(f.con , vec )
}
f.con <- t(f.con)
f.obj <- dat[,3]
f.dir <- rep ("<=", dim(f.con)[1])
f.rhs <- rep (1, dim(f.con)[1])
res = lp (direction = "max", f.obj, f.con, f.dir, f.rhs ,  all.int=TRUE)$solution

Upvotes: 0

Aur&#232;le
Aur&#232;le

Reputation: 12819

library(Matrix)

M <- sparseMatrix(
  i = c(1, 1, 2, 2, 3, 3),
  j = c(1, 2, 2, 3, 2, 4),
  x = c(6, 8, 5, 7, 6, 9)
)

#> 3 x 4 sparse Matrix of class "dgCMatrix"
#>             
#> [1,] 6 8 . .
#> [2,] . 5 7 .
#> [3,] . 6 . 9

dat <- as.data.frame(summary(M))

#>   i j x
#> 1 1 1 6
#> 2 1 2 8
#> 3 2 2 5
#> 4 3 2 6
#> 5 2 3 7
#> 6 3 4 9

row_indices <- unique(dat$i)
col_indices <- split(dat$j, dat$i)

#> $`1`
#> [1] 1 2
#> 
#> $`2`
#> [1] 2 3
#> 
#> $`3`
#> [1] 2 4

all_combinations_with_atmost_one_per_row <- do.call(expand.grid, col_indices) 

#>   1 2 3
#> 1 1 2 2
#> 2 2 2 2
#> 3 1 3 2
#> 4 2 3 2
#> 5 1 2 4
#> 6 2 2 4
#> 7 1 3 4
#> 8 2 3 4

more_than_one_per_col <- apply(all_combinations_with_atmost_one_per_row, MARGIN = 1, anyDuplicated)

#> [1] 3 2 0 3 0 2 0 0

combinations <- all_combinations_with_atmost_one_per_row[!more_than_one_per_col, , drop = FALSE]

#>   1 2 3
#> 3 1 3 2
#> 5 1 2 4
#> 7 1 3 4
#> 8 2 3 4

lapply(
  split(combinations, 1:nrow(combinations)),
  function(cols) {
    elements <- data.frame(i = row_indices, j = unlist(cols))
    elements$value <- M[as.matrix(elements)]
    list(elements = elements, sum = sum(elements$value))
  }
)
#> $`1`
#> $`1`$elements
#>   i j value
#> 1 1 1     6
#> 2 2 3     7
#> 3 3 2     6
#> 
#> $`1`$sum
#> [1] 19
#> 
#> 
#> $`2`
#> $`2`$elements
#>   i j value
#> 1 1 1     6
#> 2 2 2     5
#> 3 3 4     9
#> 
#> $`2`$sum
#> [1] 20
#> 
#> 
#> $`3`
#> $`3`$elements
#>   i j value
#> 1 1 1     6
#> 2 2 3     7
#> 3 3 4     9
#> 
#> $`3`$sum
#> [1] 22
#> 
#> 
#> $`4`
#> $`4`$elements
#>   i j value
#> 1 1 2     8
#> 2 2 3     7
#> 3 3 4     9
#> 
#> $`4`$sum
#> [1] 24

Created on 2019-04-10 by the reprex package (v0.2.1)

And the best combination is found with res[[which.max(sapply(res, `[[`, "sum"))]]

$elements
  i j value
1 1 2     8
2 2 3     7
3 3 4     9

$sum
[1] 24

Upvotes: 0

Related Questions