Reputation: 81
How can I improve the speed of this for loop code?
y<-data.frame(f1=round(runif(100, 1,5)),
f2=round(runif(100, 1,5)),
f3=round(runif(100, 1,5)))
m <- nrow(y)
nr <- rownames(y)
response <- matrix(NA, m, m, dimnames=list(nr, nr))
for(a in 1:m) for(b in 1:m)
response[a, b] <- all(y[a,]<=y[b,])
response
Upvotes: 0
Views: 75
Reputation: 32548
Convert y
to matrix and you can use apply
functions to improve speed.
dat = as.matrix(y)
response2 = sapply(1:m, function(i) apply(dat, 1, function(x) all(x <= dat[i,])))
dimnames(response2) = list(nr, nr)
Benchmarking
y<-data.frame(f1=round(runif(20, 1,5)),
f2=round(runif(20, 1,5)),
f3=round(runif(20, 1,5)))
m <- nrow(y)
nr <- rownames(y)
microbenchmark(f1 = {
response <- matrix(NA, m, m, dimnames=list(nr, nr))
for(a in 1:m) for(b in 1:m)
response[a, b] <- all(y[a,]<=y[b,])
},
f2 = {
dat = as.matrix(y)
response2 = sapply(1:m, function(i) apply(dat, 1, function(x) all(x <= dat[i,])))
dimnames(response2) = list(nr, nr)
})
# Unit: milliseconds
# expr min lq mean median uq max neval
# f1 73.3571 77.43730 84.701259 79.64565 85.7293 135.9448 100
# f2 1.9453 2.16485 2.544873 2.27505 2.5295 5.9237 100
identical(response, response2)
# [1] TRUE
Upvotes: 2
Reputation: 101403
You can try outer
like below
lst <- asplit(y, 1)
outer(lst, lst, FUN = Vectorize(function(x, y) all(x <= y)))
Upvotes: 1
Reputation: 2414
I agree with @d.b that converting the data to a matrix is the best first step, but using the matrixStats package you can also avoid the nested loops.
y<-data.frame(f1=round(runif(100, 1,5)),
f2=round(runif(100, 1,5)),
f3=round(runif(100, 1,5)))
m <- nrow(y)
nr <- rownames(y)
microbenchmark::microbenchmark(f1 = {
response <- matrix(NA, m, m, dimnames=list(nr, nr))
for(a in 1:m) for(b in 1:m)
response[a, b] <- all(y[a,]<=y[b,])
},
f2 = {
dat = as.matrix(y)
response2 = sapply(1:m, function(i) apply(dat, 1, function(x) all(x <= dat[i,])))
},
f3 = {
response3 <- matrix(NA, m, m, dimnames=list(nr, nr))
z <- as.matrix(y)
for (i in 1:m) {
response3[i,] <- matrixStats::rowAlls(matrix(rep(z[i, ], m), nrow = m, ncol = 3, byrow = TRUE) <= z)
}
})
Unit: milliseconds
expr min lq mean median uq max neval
f1 1154.2246 1164.34475 1178.059824 1170.31180 1177.58840 1272.3578 100
f2 24.4325 25.09795 27.147773 26.13300 29.82185 32.7160 100
f3 3.6844 3.94145 4.104324 4.04175 4.13685 9.7601 100
identical(response, response3)
[1] TRUE
Upvotes: 1