Reputation: 29387
rm(list = ls())
a <- seq(from = 1, to = 50000, by = 1)
b <- seq(from = 1, to = 10000, by = 2)
c <- seq(from = 1, to = 10000, by = 3)
two <- rep(NA, length(a))
three <- rep(NA, length(a))
system.time(
for (i in seq_along(a))
{
if (length(tail(which(a[i] > b),1)) != 0 & length(tail(which(a[i] > c),1)) != 0)
{
two[i] <- tail(which(a[i] > b),1)
three[i] <- tail(which(a[i] > c),1)
}
else
{
two[i] <- NA
three[i] <- NA
}
}
)
build_b <- b[two]
build_c <- c[three]
What I am trying to do is find what b
and c
looked like at the time of a
. So I am prelocating memory in vectors two
and three
in attempt to save some time and so I can keep track of indexing of those occurrences. After the loop is completed I build the new vectors according to the indexing I just computed. Currently the operation takes about 10 sec to compute. My question is how can I speed up this operation?
Thank you!
Upvotes: 0
Views: 135
Reputation: 25736
Here is another solution using findInterval
:
## assume a, b and c are sorted
two <- findInterval(a-1L, b)
three <- findInterval(a-1L, c)
two[two==0] <- NA
three[three==0] <- NA
build_b <- b[two]
build_c <- c[three]
And here a little benchmark:
a <- seq(from = 1, to = 50000, by = 1)
b <- seq(from = 1, to = 10000, by = 2)
c <- seq(from = 1, to = 10000, by = 3)
pops <- function(a, b, c) {
two <- rep(NA, length(a))
three <- rep(NA, length(a))
for (i in seq_along(a))
{
if (length(tail(which(a[i] > b),1)) != 0 & length(tail(which(a[i] > c),1)) != 0)
{
two[i] <- tail(which(a[i] > b),1)
three[i] <- tail(which(a[i] > c),1)
}
else
{
two[i] <- NA
three[i] <- NA
}
}
return(list(b=b[two], c=c[three]))
}
droopy <- function(a, b, c) {
two <- rep(NA, length(a))
three <- rep(NA, length(a))
for (i in seq_along(a))
{
if (any(u <- (a[i] > b)) & any(v <- (a[i] > c)))
{
two[i] <- sum(u)
three[i] <- sum(v)
}
else
{
two[i] <- NA
three[i] <- NA
}
}
return(list(b=b[two], c=c[three]))
}
sgibb <- function(a, b, c) {
## assume a, b and c are sorted
two <- findInterval(a-1L, b)
three <- findInterval(a-1L, c)
two[two==0] <- NA
three[three==0] <- NA
return(list(b=b[two], c=c[three]))
}
The benchmark:
library("rbenchmark")
benchmark(pops(a, b, c), droopy(a, b, c), sgibb(a, b, c), order="relative", replications=2)
# test replications elapsed relative user.self sys.self user.child sys.child
#3 sgibb(a, b, c) 2 0.010 1.0 0.008 0.004 0 0
#2 droopy(a, b, c) 2 8.639 863.9 8.613 0.000 0 0
#1 pops(a, b, c) 2 26.838 2683.8 26.753 0.004 0 0
identical(pops(a, b, c), sgibb(a, b, c))
## TRUE
identical(droopy(a, b, c), sgibb(a, b, c))
## TRUE
Upvotes: 4
Reputation: 2818
a possibility :
a <- seq(from = 1, to = 50000, by = 1)
b <- seq(from = 1, to = 10000, by = 2)
c <- seq(from = 1, to = 10000, by = 3)
two <- integer(length(a))
three <- integer(length(a))
system.time(
{
for (i in seq_along(a))
{
if (any(u <- (a[i] > b)) & any(v <- (a[i] > c)))
{
two[i] <- sum(u)
three[i] <- sum(v)
}
else
{
two[i] <- NA
three[i] <- NA
}
}
})
build_b <- b[two]
build_c <- c[three]
Upvotes: 1