user3602239
user3602239

Reputation: 681

How to use Rcpp to speed up a for loop?

I have created a for loop and I would like to speed it up using the Rcpp library. I am not too familiar with C++. Can you please help me make my function faster? Thank you for your help!

I have included my algorithm, the code, along with the input and the output, with the sessionInfo.

Here is my algorithm:

if the current price is above the previous price, mark (+1) in the column named TR

if the current price is below the previous price, mark (-1) in the column named TR

if the current price is the same as the previous price, mark the same thing as in the previous price in the column named TR

Here is my code:

price <- c(71.91, 71.82, 71.81, 71.81, 71.81, 71.82, 71.81, 71.81, 71.81, 
           71.82, 71.81, 71.81, 71.8, 71.81, 71.8, 71.81, 71.8, 71.8, 71.8, 
           71.8, 71.81, 71.81, 71.81, 71.81, 71.81, 71.81, 71.81, 71.81, 
           71.81, 71.82, 71.81, 71.81, 71.81, 71.81, 71.81, 71.81, 71.8, 
           71.8, 71.81, 71.81, 71.81, 71.81, 71.82, 71.82, 71.81, 71.81, 
           71.81, 71.81, 71.81, 71.81, 71.81, 71.82, 71.82, 71.82, 71.82, 
           71.82, 71.82, 71.82, 71.81, 71.82, 71.82, 71.82, 71.82, 71.82, 
           71.82, 71.82, 71.82, 71.82, 71.81, 71.81, 71.81, 71.82, 71.82, 
           71.81, 71.82, 71.82, 71.82, 71.81, 71.82, 71.82, 71.82, 71.81, 
           71.81, 71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 
           71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 
           71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 
           71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 
           71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 
           71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 71.81, 
           71.82, 71.82, 71.82, 71.82, 71.83, 71.82, 71.82, 71.82, 71.81, 
           71.81, 71.81, 71.81, 71.81, 71.81, 71.81, 71.82, 71.82, 71.82, 
           71.81, 71.81, 71.81, 71.82, 71.82, 71.82, 71.82, 71.82, 71.82, 
           71.82, 71.82, 71.82, 71.82, 71.82, 71.83, 71.83, 71.83, 71.83, 
           71.83, 71.83, 71.83, 71.83, 71.83, 71.83, 71.83, 71.83, 71.83, 
           71.83, 71.83, 71.83, 71.83, 71.83, 71.83, 71.83, 71.83, 71.83, 
           71.83, 71.83, 71.83, 71.83, 71.83, 71.83, 71.83, 71.83, 71.83, 
           71.83)

TR <- numeric(length(price)-1)
TR <- c(NA,TR)

for (i in 1: (length(price)-1)){

  if (price[i] == price[i+1]) {TR[i+1] = TR[i]}

  if (price[i] < price[i+1]) {TR[i+1] = 1}

  if (price[i] > price[i+1]) {TR[i+1] = -1}

}

And here is my output: dput(TR) yields

c(NA, -1, -1, -1, -1, 1, -1, -1, -1, 1, -1, -1, -1, 1, -1, 1, 
  -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, 
  -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, 
  1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, 
  1, 1, -1, 1, 1, 1, -1, 1, 1, 1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
  1, 1, 1, -1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
  -1, 1, 1, 1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1)

and here is my sessionInfo:

> sessionInfo()
R version 3.1.2 (2014-10-31)
Platform: x86_64-w64-mingw32/x64 (64-bit)

locale:
[1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] data.table_1.9.4

loaded via a namespace (and not attached):
[1] chron_2.3-45  plyr_1.8.1    Rcpp_0.11.1   reshape2_1.4  stringr_0.6.2 tools_3.1.2  

Upvotes: 18

Views: 7517

Answers (4)

josliber
josliber

Reputation: 44299

You can pretty directly translate the for loop:

library(Rcpp)
cppFunction(
"IntegerVector proc(NumericVector x) {
  const int n = x.size();
  IntegerVector y(n);
  y[0] = NA_INTEGER;
  for (int i=1; i < n; ++i) {
    if (x[i] == x[i-1]) y[i] = y[i-1];
    else if (x[i] > x[i-1]) y[i] = 1;
    else y[i] = -1;
  }
  return y;
}")

As is usual, you can get a pretty big speedup with Rcpp as compared to the for loop in base R:

proc.for <- function(price) {
  TR <- numeric(length(price)-1)
  TR <- c(NA,TR)
  for (i in 1: (length(price)-1)){
    if (price[i] == price[i+1]) {TR[i+1] = TR[i]}
    if (price[i] < price[i+1]) {TR[i+1] = 1}
    if (price[i] > price[i+1]) {TR[i+1] = -1}
  }
  return(TR)
}
proc.aaron <- function(price) {
  change <- sign(diff(price))
  good <- change != 0
  goodval <- change[good]
  c(NA, goodval[cumsum(good)])
}
proc.jbaums <- function(price) {
  TR <- sign(diff(price))
  TR[TR==0] <- TR[which(TR != 0)][findInterval(which(TR == 0), which(TR != 0))]
  TR
}

all.equal(proc(price), proc.for(price), proc.aaron(price), proc.jbaums(price))
# [1] TRUE
library(microbenchmark)
microbenchmark(proc(price), proc.for(price), proc.aaron(price), proc.jbaums(price))
# Unit: microseconds
#                expr     min       lq      mean   median       uq      max neval
#         proc(price)   1.871   2.5380   3.92111   3.1110   4.5880   15.318   100
#     proc.for(price) 408.200 448.2830 542.19766 484.1265 546.3255 1821.104   100
#   proc.aaron(price)  23.916  25.5770  33.53259  31.5420  35.8575  190.372   100
#  proc.jbaums(price)  33.536  38.8995  46.80109  43.4510  49.3555  112.306   100

We see a speedup of more than 100x compared to a for loop and 10x compared to vectorized alternatives for the provided vector.

The speedup is even more significant with a larger vector (length 1 million tested here):

price.big <- rep(price, times=5000)
all.equal(proc(price.big), proc.for(price.big), proc.aaron(price.big), proc.jbaums(price.big))
# [1] TRUE
microbenchmark(proc(price.big), proc.for(price.big), proc.aaron(price.big), proc.jbaums(price.big))
# Unit: milliseconds
#                    expr         min          lq        mean      median          uq        max neval
#         proc(price.big)    1.442119    1.818494    5.094274    2.020437    2.771903   56.54321   100
#     proc.for(price.big) 2639.819536 2699.493613 2949.962241 2781.636460 3062.277930 4472.35369   100
#   proc.aaron(price.big)   91.499940   99.859418  132.519296  140.521212  147.462259  207.72813   100
#  proc.jbaums(price.big)  117.242451  138.528214  170.989065  170.606048  180.337074  487.13615   100

Now we have a 1000x speedup compared to the for loop and a ~70x speedup compared to the vectorized R functions. Even at this size, it's not clear there's much advantage of Rcpp over vectorized R solutions if the function is only called once, since it certainly takes at least 100 ms to compile the Rcpp code. The speedup is pretty attractive if this is a piece of code that's called repeatedly in your analysis.

Upvotes: 24

Luke Tierney
Luke Tierney

Reputation: 1055

You can try byte compiling. It's also useful to look at an R loop that uses the same if-else-if-else logic as the Rcpp code. With R 3.1.2 I get

f1 <- function(price) {
    TR <- numeric(length(price)-1)
    TR <- c(NA,TR)
    for (i in 1: (length(price)-1)){
        if (price[i] == price[i+1]) {TR[i+1] = TR[i]}
        if (price[i] < price[i+1]) {TR[i+1] = 1}
        if (price[i] > price[i+1]) {TR[i+1] = -1}
    }
    return(TR)
}

f2 <- function(price) {
    TR <- numeric(length(price)-1)
    TR <- c(NA,TR)
    for (i in 1: (length(price)-1)){
        if (price[i] == price[i+1]) {TR[i+1] = TR[i]}
        else if (price[i] < price[i+1]) {TR[i+1] = 1}
        else {TR[i+1] = -1}
    }
    return(TR)
}

library(compiler)
f1c <- cmpfun(f1)
f2c <- cmpfun(f2)

library(microbenchmark)
microbenchmark(f1(price), f2(price), f1c(price), f2c(price), times = 1000)
## Unit: microseconds
##       expr     min       lq     mean   median       uq       max neval  cld
##  f1(price) 536.619 570.3715 667.3520 586.2465 609.9280 45046.462  1000    d
##  f2(price) 328.592 351.2070 386.5895 365.0245 381.4850  1302.497  1000   c 
## f1c(price) 167.570 182.4645 218.9537 192.4780 204.7810  7843.291  1000  b  
## f2c(price)  96.644 107.4465 124.1324 113.5470 121.5365  1019.389  1000 a   

R-devel, to be released as R 3.2.0 in April, has a number of improvements in the byte code engine for scalar computations like this; there I get

microbenchmark(f1(price), f2(price), f1c(price), f2c(price), times = 1000)
## Unit: microseconds
##        expr     min       lq      mean   median       uq      max neval  cld
##   f1(price) 490.300 520.3845 559.19539 533.2050 548.6850 1330.219  1000    d
##   f2(price) 298.375 319.7475 348.71384 330.4535 342.6405 1813.113  1000   c 
##  f1c(price)  61.947  66.3255  68.01555  67.7270  69.5470  138.308  1000  b  
##  f2c(price)  36.334  38.9500  40.45085  40.1830  41.8610   55.909  1000 a   

This gets you into the same general ballpark as the vectorized solutions in this example. There is still room for further improvements in the byte code engine that should make it into future releases.

All solutions differ in how they handle NA/NaN values, which may or may not matter to you.

Upvotes: 21

Aaron - mostly inactive
Aaron - mostly inactive

Reputation: 37734

Maybe try vectorization first. Though that probably won't be quite as fast as Rcpp, it's more straightforward.

f2 <- function(price) {
    change <- sign(diff(price))
    good <- change != 0
    goodval <- change[good]
    c(NA, goodval[cumsum(good)])
}

And it's still a substantial speed up over an R for loop.

f1 <- function(price) {
    TR <- numeric(length(price)-1)
    TR <- c(NA,TR)
    for (i in 1: (length(price)-1)){
        if (price[i] == price[i+1]) {TR[i+1] = TR[i]}
        if (price[i] < price[i+1]) {TR[i+1] = 1}
        if (price[i] > price[i+1]) {TR[i+1] = -1}
    }
    TR
}

microbenchmark(f1(price), f2(price), times=100)
## Unit: microseconds
##       expr     min       lq      mean   median       uq      max neval cld
##  f1(price) 550.037 592.9830 756.20095 618.7910 703.8335 3042.530   100   b
##  f2(price)  36.915  39.3285  56.45267  45.5225  60.1965  184.536   100  a 

Upvotes: 13

jbaums
jbaums

Reputation: 27388

This can be easily vectorised in R.

For example with diff and findInterval:

TR <- sign(diff(price))
TR[TR==0] <- TR[which(TR != 0)][findInterval(which(TR == 0), which(TR != 0))]

Upvotes: 4

Related Questions