Wagner Jorge
Wagner Jorge

Reputation: 430

Optimization of big lists

I have this code and I don't know as to optimize, I think that is my best performance. Do you kwnow a way for optmization lists?

require(dplyr); require(rgeos); require(sp)
sim.polygons = function(objects, vertex){
  polygons = NULL
  for(i in 1:objects) polygons[[i]] = matrix(runif(vertex*2), ncol = 2)
  return(polygons)
}

test = function(lista1, lista2, progress = F){
  lista1 = lapply(lista1, as, Class = "gpc.poly")
  lista2 = lapply(lista2,  as, Class = "gpc.poly")
  res = matrix(0, nrow = length(lista2), ncol = length(lista1))
  for(k in 1 : length(lista1)){
    for(l in 1 : length(lista2)){
      res[l, k] = area.poly(intersect(lista1[[k]], lista2[[l]])) #very slow
    }
    if(progress == T) print(k)
  }
  res
}
#exemple
a = sim.polygons(50, 3) #in my problem, objects = 144 and vertex = 3
b = sim.polygons(100, 3) #objects = 114^2 and vertex = 3

test(a, b, T)

Upvotes: 1

Views: 54

Answers (1)

Bryan Goggin
Bryan Goggin

Reputation: 2489

This problem is pretty easy to implement in parallel with foreach and doParallel. In this example I compare your for loops with the foreach loops on the problem you describe in your comments for the example.

require(dplyr); require(rgeos); require(sp)
sim.polygons = function(objects, vertex){
  polygons = NULL
  for(i in 1:objects) polygons[[i]] = matrix(runif(vertex*2), ncol = 2)
  return(polygons)
}

test = function(lista1, lista2, progress = F){
  lista1 = lapply(lista1, as, Class = "gpc.poly")
  lista2 = lapply(lista2,  as, Class = "gpc.poly")
  res = matrix(0, nrow = length(lista2), ncol = length(lista1))
  for(k in 1 : length(lista1)){
    for(l in 1 : length(lista2)){
      res[l, k] = area.poly(intersect(lista1[[k]], lista2[[l]])) #very slow
    }
    if(progress == T) print(k)
  }
  res
}

a = sim.polygons(144, 3) #in my problem, objects = 144 and vertex = 3
b = sim.polygons(114, 3) #objects = 114^2 and vertex = 3

system.time(res<-test(a, b, T))
user  system elapsed 
34.66    0.02   34.67 

library(foreach)
library(doParallel)
cl<-makeCluster(6)
registerDoParallel(cl)
getDoParWorkers() #6

foreach(i=1:6) %dopar% library(rgeos)

test.par = function(lista1, lista2, progress = F){
  lista1 = lapply(lista1, as, Class = "gpc.poly")
  lista2 = lapply(lista2,  as, Class = "gpc.poly")
  res = matrix(0, nrow = length(lista2), ncol = length(lista1))
  res<-foreach(k= 1 : length(lista1), .combine = "cbind") %:%
    foreach(l = 1 : length(lista2), .combine = 'c') %dopar% #not as slow
      area.poly(intersect(lista1[[k]], lista2[[l]]))
    }


system.time(res.par<-test.par(a, b, T))
user  system elapsed 
7.97    0.46   15.51 

dim(res)
[1] 114 144

dim(res.par)
[1] 114 144

sum(rowSums(res-res.par))
[1] 0

This implementation effectively cuts the computation time in half running on 6 cores. Your results may vary with more or less cores. There is still likely more to gain from clever programming within the loops.

Upvotes: 2

Related Questions