Reputation: 81

How can I improve the speed of this for loop code?

y<-data.frame(f1=round(runif(100, 1,5)),
              f2=round(runif(100, 1,5)),
              f3=round(runif(100, 1,5)))
m <- nrow(y)
nr <- rownames(y) 
response <- matrix(NA, m, m, dimnames=list(nr, nr))
for(a in 1:m) for(b in 1:m) 
  response[a, b] <- all(y[a,]<=y[b,])
response

Upvotes: 0

Answers (3)

d.b

Reputation: 32548

Convert y to matrix and you can use apply functions to improve speed.

dat = as.matrix(y)
response2 = sapply(1:m, function(i) apply(dat, 1, function(x) all(x <= dat[i,])))
dimnames(response2) = list(nr, nr)

Benchmarking

y<-data.frame(f1=round(runif(20, 1,5)),
              f2=round(runif(20, 1,5)),
              f3=round(runif(20, 1,5)))
m <- nrow(y)
nr <- rownames(y) 

microbenchmark(f1 = {
    response <- matrix(NA, m, m, dimnames=list(nr, nr))
    for(a in 1:m) for(b in 1:m) 
        response[a, b] <- all(y[a,]<=y[b,])
},
f2 = {
    dat = as.matrix(y)
    response2 = sapply(1:m, function(i) apply(dat, 1, function(x) all(x <= dat[i,])))
    dimnames(response2) = list(nr, nr)
})
# Unit: milliseconds
#  expr     min       lq      mean   median      uq      max neval
#    f1 73.3571 77.43730 84.701259 79.64565 85.7293 135.9448   100
#    f2  1.9453  2.16485  2.544873  2.27505  2.5295   5.9237   100
identical(response, response2)
# [1] TRUE

Upvotes: 2

ThomasIsCoding

Reputation: 101403

You can try outer like below

lst <- asplit(y, 1)
outer(lst, lst, FUN = Vectorize(function(x, y) all(x <= y)))

Upvotes: 1

Brian Montgomery

Reputation: 2414

I agree with @d.b that converting the data to a matrix is the best first step, but using the matrixStats package you can also avoid the nested loops.

y<-data.frame(f1=round(runif(100, 1,5)),
              f2=round(runif(100, 1,5)),
              f3=round(runif(100, 1,5)))
m <- nrow(y)
nr <- rownames(y) 

microbenchmark::microbenchmark(f1 = {
  response <- matrix(NA, m, m, dimnames=list(nr, nr))
  for(a in 1:m) for(b in 1:m) 
    response[a, b] <- all(y[a,]<=y[b,])
},
f2 = {
  dat = as.matrix(y)
  response2 = sapply(1:m, function(i) apply(dat, 1, function(x) all(x <= dat[i,])))
},
f3 = {
  response3 <- matrix(NA, m, m, dimnames=list(nr, nr))
  z <- as.matrix(y)
  for (i in 1:m) {
    response3[i,] <- matrixStats::rowAlls(matrix(rep(z[i, ], m), nrow = m, ncol = 3, byrow = TRUE) <= z)  
  }
  
})
Unit: milliseconds
 expr       min         lq        mean     median         uq       max neval
   f1 1154.2246 1164.34475 1178.059824 1170.31180 1177.58840 1272.3578   100
   f2   24.4325   25.09795   27.147773   26.13300   29.82185   32.7160   100
   f3    3.6844    3.94145    4.104324    4.04175    4.13685    9.7601   100

identical(response, response3)
[1] TRUE

Upvotes: 1

How can I improve the speed of this for loop code?

Answers (3)

Related Questions