jpcgandre
jpcgandre

Reputation: 1505

Substituting for loop to run in parallel in R

I have this for loop:

library(EnvStats)
mvfyfueu <- matrix(, nrow = 0, ncol = 3)
for (i in 1:2000 ) {

  # fy (S355, yield strength N/mm2)
  meanmean = 419.38 #(*)
  sdmean = 10 #(**)
  meanmeanlv = 400 #(**)
  meanmeanuv = 440 #(**)
  meanfy <- dist(meanmean,sdmean,meanmeanlv,meanmeanuv,"norm")

  meansd = 20.25 #(*)
  sdsd = 5 #(**)
  meansdlv = 15 #(**)
  meansduv = 25 #(**)
  sdfy <- dist(meansd,sdsd,meansdlv,meansduv,"norm")

  fylv = 355 #(*)
  fyuv = 500 #(**)  
  lsupfy <- 1 - plnormTruncAlt(fyuv, mean = meanfy[1], cv = sdfy[1]/meanfy[1]) - 1e-10
  linffy <- plnormTruncAlt(fylv, mean = meanfy[1], cv = sdfy[1]/meanfy[1]) - 1e-10

  # fu (S355, tensile strength N/mm2)
  meanmean = 533.44 #(*)
  sdmean = 10 #(**)
  meanmeanlv = 500 #(**)
  meanmeanuv = 550 #(**)
  meanfu <- dist(meanmean,sdmean,meanmeanlv,meanmeanuv,"norm")

  meansd = 16.53 #(*)
  sdsd = 5 #(**)
  meansdlv = 10 #(**)
  meansduv = 25 #(**)
  sdfu <- dist(meansd,sdsd,meansdlv,meansduv,"norm")

  fulv = 470 #(*)
  fuuv = 630 #(*)  
  lsupfu <- 1 - plnormTruncAlt(fuuv, mean = meanfu[1], cv = sdfu[1]/meanfu[1]) - 1e-10
  linffu <- plnormTruncAlt(fulv, mean = meanfu[1], cv = sdfu[1]/meanfu[1]) - 1e-10

  # eu (S355, strain at maximum strength mm/mm)
  meanmean = 0.2645 #(*)
  sdmean = 10 #(**)
  meanmeanlv = 0.2 #(**)
  meanmeanuv = 0.3 #(**)
  meaneu <- dist(meanmean,sdmean,meanmeanlv,meanmeanuv,"norm")

  meansd = 0.0613 #(*)
  sdsd = 0.02 #(**)
  meansdlv = 0.02 #(**)
  meansduv = 0.1 #(**)
  sdeu <- dist(meansd,sdsd,meansdlv,meansduv,"norm")

  eulv = 0.1 #(*)
  euuv = 0.3 #(*)  
  lsupeu <- 1 - plnormTruncAlt(euuv, mean = meaneu[1], cv = sdeu[1]/meaneu[1]) - 1e-10
  linfeu <- plnormTruncAlt(eulv, mean = meaneu[1], cv = sdeu[1]/meaneu[1]) - 1e-10

  #Generate samples
  mat.fyfueu <- simulateMvMatrix(2000,
                              distributions = c(fy = "lnormAlt",fu = "lnormAlt",eu = "lnormAlt"),
                              param.list = list(fy = list(mean=meanfy[1], cv=sdfy[1]/meanfy[1]),
                                                fu = list(mean=meanfu[1], cv=sdfu[1]/meanfu[1]),
                                                eu = list(mean=meaneu[1], cv=sdeu[1]/meaneu[1])),
                              left.tail.cutoff = c(fy = ifelse(linffy <= 1e-5, 0, linffy),
                                                      fu = ifelse(linffu <= 1e-5, 0, linffu),
                                                      eu = ifelse(linfeu <= 1e-5, 0, linfeu)),
                              right.tail.cutoff = c(fy = ifelse(lsupfy <= 0, .Machine$double.eps, lsupfy),
                                                       fu = ifelse(lsupfu <= 0, .Machine$double.eps, lsupfu),
                                                       eu = ifelse(lsupeu <= 0, .Machine$double.eps, lsupeu)),
                              cor.mat = matrix(c(1, .75, -0.45, .75, 1, -0.6, -0.45, -0.6, 1), 3, 3),
                              sample.method = "LHS", max.iter = 100) #, seed = i

  mvfyfueu <- rbind2(mvfyfueu, mat.fyfueu)
}

dist function:

dist <- function(meanv, sdv, lv, uv, dist) {
  library(EnvStats)
  lsup <- 1 - pnorm(uv, mean = meanv, sd = sdv)
  linf <- pnorm(lv, mean = meanv, sd = sdv)
  value <- simulateVector(2, distribution = dist,
                       param.list = list(mean = meanv, sd = sdv), #seed = i,
                       sort = FALSE, left.tail.cutoff = ifelse(linf == 0, .Machine$double.eps, linf),
                       right.tail.cutoff = ifelse(lsup == 0, .Machine$double.eps, lsup), sample.method = "LHS")
  return(value)
}

Now I want to run it in parallel so I change it to:

library(doParallel)
library(foreach)
#setup parallel backend to use 7 processors
cl<-makeCluster(7)
registerDoParallel(cl)
library(EnvStats)
mvfyfueu <- matrix(, nrow = 0, ncol = 3)
iters <- 100
foreach(icount(iters)) %dopar% {

  # fy (S355, yield strength N/mm2)
  meanmean = 419.38 #(*)
  sdmean = 10 #(**)
  meanmeanlv = 400 #(**)
  meanmeanuv = 440 #(**)
  meanfy <- dist(meanmean,sdmean,meanmeanlv,meanmeanuv,"norm")

  meansd = 20.25 #(*)
  sdsd = 5 #(**)
  meansdlv = 15 #(**)
  meansduv = 25 #(**)
  sdfy <- dist(meansd,sdsd,meansdlv,meansduv,"norm")

  fylv = 355 #(*)
  fyuv = 500 #(**)  
  lsupfy <- 1 - plnormTruncAlt(fyuv, mean = meanfy[1], cv = sdfy[1]/meanfy[1]) - 1e-10
  linffy <- plnormTruncAlt(fylv, mean = meanfy[1], cv = sdfy[1]/meanfy[1]) - 1e-10

  # fu (S355, tensile strength N/mm2)
  meanmean = 533.44 #(*)
  sdmean = 10 #(**)
  meanmeanlv = 500 #(**)
  meanmeanuv = 550 #(**)
  meanfu <- dist(meanmean,sdmean,meanmeanlv,meanmeanuv,"norm")

  meansd = 16.53 #(*)
  sdsd = 5 #(**)
  meansdlv = 10 #(**)
  meansduv = 25 #(**)
  sdfu <- dist(meansd,sdsd,meansdlv,meansduv,"norm")

  fulv = 470 #(*)
  fuuv = 630 #(*)  
  lsupfu <- 1 - plnormTruncAlt(fuuv, mean = meanfu[1], cv = sdfu[1]/meanfu[1]) - 1e-10
  linffu <- plnormTruncAlt(fulv, mean = meanfu[1], cv = sdfu[1]/meanfu[1]) - 1e-10

  # eu (S355, strain at maximum strength mm/mm)
  meanmean = 0.2645 #(*)
  sdmean = 10 #(**)
  meanmeanlv = 0.2 #(**)
  meanmeanuv = 0.3 #(**)
  meaneu <- dist(meanmean,sdmean,meanmeanlv,meanmeanuv,"norm")

  meansd = 0.0613 #(*)
  sdsd = 0.02 #(**)
  meansdlv = 0.02 #(**)
  meansduv = 0.1 #(**)
  sdeu <- dist(meansd,sdsd,meansdlv,meansduv,"norm")

  eulv = 0.1 #(*)
  euuv = 0.3 #(*)  
  lsupeu <- 1 - plnormTruncAlt(euuv, mean = meaneu[1], cv = sdeu[1]/meaneu[1]) - 1e-10
  linfeu <- plnormTruncAlt(eulv, mean = meaneu[1], cv = sdeu[1]/meaneu[1]) - 1e-10

  #Generate samples
  mat.fyfueu <- simulateMvMatrix(2000,
                                 distributions = c(fy = "lnormAlt",fu = "lnormAlt",eu = "lnormAlt"),
                                 param.list = list(fy = list(mean=meanfy[1], cv=sdfy[1]/meanfy[1]),
                                                   fu = list(mean=meanfu[1], cv=sdfu[1]/meanfu[1]),
                                                   eu = list(mean=meaneu[1], cv=sdeu[1]/meaneu[1])),
                                 left.tail.cutoff = c(fy = ifelse(linffy <= 1e-5, 0, linffy),
                                                      fu = ifelse(linffu <= 1e-5, 0, linffu),
                                                      eu = ifelse(linfeu <= 1e-5, 0, linfeu)),
                                 right.tail.cutoff = c(fy = ifelse(lsupfy <= 0, .Machine$double.eps, lsupfy),
                                                       fu = ifelse(lsupfu <= 0, .Machine$double.eps, lsupfu),
                                                       eu = ifelse(lsupeu <= 0, .Machine$double.eps, lsupeu)),
                                 cor.mat = matrix(c(1, .75, -0.45, .75, 1, -0.6, -0.45, -0.6, 1), 3, 3),
                                 sample.method = "LHS", max.iter = 100) #, seed = i

  mvfyfueu <- rbind2(mvfyfueu, mat.fyfueu)
}

But at the end of the parallel run I get an empty mvfyfueu matrix:

> mvfyfueu
     [,1] [,2] [,3]

Which is quite different result than in the serial run. What should I correct? Thanks

Upvotes: 1

Views: 100

Answers (1)

nrussell
nrussell

Reputation: 18612

Since I don't have an appropriate set of objects to test your functions, or the time to create a comparable version of your script, I'm going to just use some toy data and a trivial process to demonstrate -

library(iterators)
library(foreach)
## your parallel backend setup may 
## be different, but that shouldn't
## affect anything
library(doSNOW)
library(parallel)
##
mvfyfueu <- matrix( , nrow = 0, ncol = 3)
iters <- 100
v1 <- v2 <- v3 <- 1:100
##
cl <- parallel::makeCluster("SOCK",3)
registerDoSNOW(cl)
##
mvfyfueu <- foreach(
  icount(iters),
  .combine=rbind) %dopar% {

    mat_row <- matrix(
      c(sample(v1),
        sample(v2),
        sample(v3)),
      nrow=1,
      ncol=3,
      byrow=TRUE)
    mat_row
  }
##
stopCluster(cl)
##
> dim(mvfyfueu)
[1] 100   3
> head(mvfyfueu)
     [,1] [,2] [,3]
[1,]   80   95   77
[2,]   75   24   57
[3,]   33   89   67
[4,]   29   91   75
[5,]   18   75   20
[6,]   54   44   25

When you use foreach you should most definitely take advantage of the .combine argument, which determines how your data is combined (in my experience, this is usually rbind). When you do this you don't need to explicitly rbind objects from each iteration with a global object in your foreach body, the .combine argument takes care of this. And as I noted in my comment, I believe it is necessary to assign the foreach call to an object. Let me know if this helps, and if not, feel free to post some sample data for testing your code.

Upvotes: 2

Related Questions