Pork Chop
Pork Chop

Reputation: 29387

How could I speed up the following for loop?

rm(list = ls())
a <- seq(from = 1, to = 50000, by = 1)
b <- seq(from = 1, to = 10000, by = 2)
c <- seq(from = 1, to = 10000, by = 3)

two <- rep(NA, length(a))
three <- rep(NA, length(a))

system.time(
  for (i in seq_along(a))
  {
    if (length(tail(which(a[i] > b),1)) != 0 & length(tail(which(a[i] > c),1)) != 0)
    {    
      two[i] <- tail(which(a[i] > b),1)
      three[i] <- tail(which(a[i] > c),1)
    }
    else    
    {    
      two[i] <- NA
      three[i] <- NA
    }
  } 
)
build_b <- b[two]
build_c <- c[three]

What I am trying to do is find what b and c looked like at the time of a. So I am prelocating memory in vectors two and three in attempt to save some time and so I can keep track of indexing of those occurrences. After the loop is completed I build the new vectors according to the indexing I just computed. Currently the operation takes about 10 sec to compute. My question is how can I speed up this operation?

Thank you!

Upvotes: 0

Views: 135

Answers (2)

sgibb
sgibb

Reputation: 25736

Here is another solution using findInterval:

## assume a, b and c are sorted
two <- findInterval(a-1L, b)
three <- findInterval(a-1L, c)

two[two==0] <- NA
three[three==0] <- NA

build_b <- b[two]
build_c <- c[three]

And here a little benchmark:

a <- seq(from = 1, to = 50000, by = 1)
b <- seq(from = 1, to = 10000, by = 2)
c <- seq(from = 1, to = 10000, by = 3)

pops <- function(a, b, c) {

  two <- rep(NA, length(a))
  three <- rep(NA, length(a))

  for (i in seq_along(a))
  {
    if (length(tail(which(a[i] > b),1)) != 0 & length(tail(which(a[i] > c),1)) != 0)
    {    
      two[i] <- tail(which(a[i] > b),1)
      three[i] <- tail(which(a[i] > c),1)
    }
    else    
    {    
      two[i] <- NA
      three[i] <- NA
    }
  } 
  return(list(b=b[two], c=c[three]))
}

droopy <- function(a, b, c) {

  two <- rep(NA, length(a))
  three <- rep(NA, length(a))

  for (i in seq_along(a))
  {
    if (any(u <- (a[i] > b)) & any(v <- (a[i] > c)))
    {    
      two[i] <- sum(u)
      three[i] <- sum(v)
    }
    else    
    {    
      two[i] <- NA
      three[i] <- NA
    }
  }
  return(list(b=b[two], c=c[three]))
}

sgibb <- function(a, b, c) {
  ## assume a, b and c are sorted
  two <- findInterval(a-1L, b)
  three <- findInterval(a-1L, c)

  two[two==0] <- NA
  three[three==0] <- NA

  return(list(b=b[two], c=c[three]))
}

The benchmark:

library("rbenchmark")
benchmark(pops(a, b, c), droopy(a, b, c), sgibb(a, b, c), order="relative", replications=2)
#             test replications elapsed relative user.self sys.self user.child sys.child
#3  sgibb(a, b, c)            2   0.010      1.0     0.008    0.004          0         0
#2 droopy(a, b, c)            2   8.639    863.9     8.613    0.000          0         0
#1   pops(a, b, c)            2  26.838   2683.8    26.753    0.004          0         0

identical(pops(a, b, c), sgibb(a, b, c))
## TRUE
identical(droopy(a, b, c), sgibb(a, b, c))
## TRUE

Upvotes: 4

droopy
droopy

Reputation: 2818

a possibility :

a <- seq(from = 1, to = 50000, by = 1)
b <- seq(from = 1, to = 10000, by = 2)
c <- seq(from = 1, to = 10000, by = 3)

two <- integer(length(a))
three <- integer(length(a))

system.time(
{
  for (i in seq_along(a))
  {
    if (any(u <- (a[i] > b)) & any(v <- (a[i] > c)))
    {    
      two[i] <- sum(u)
      three[i] <- sum(v)
    }
    else    
    {    
      two[i] <- NA
      three[i] <- NA
    }
  }
})

build_b <- b[two]
build_c <- c[three]

Upvotes: 1

Related Questions