Reputation: 177
Assume the following dataframe and functions:
cond var_x var_y
cond1 2 3
cond1 3 0.1
cond2 2 2
cond2 1 0.2
my_fun1 <- function(x) {
act_vec <- rnorm(500, x, x/4)
RT = 0
for (i in 1:length(act_vec)) {
my_act = act_vec[i]
my_rt = my_act
RT = RT + my_rt
}
RT = RT/500
return(RT)
}
my_fun2 <- function(x,y) {
act_vec1 <- rnorm(500, x, x/4)
act_vec2 <- rnorm(500, y, y/4)
RT = 0
for (i in 1:length(act_vec1)) {
my_act1 = act_vec1[i]
my_act2 = act_vec1[i]*act_vec2[i]
my_rt = min(my_act1, my_act2)
RT = RT + my_rt
}
RT = RT/500
return(RT)
}
The following code applies the function my_fun1
to every row in DF
if DF$cond == 'cond1'
and otherwise my_fun2
.
my_test_vec = c()
for (i in (1:nrow(DF))) {
if (DF$cond[i] == 'cond1') {
my_test = my_fun1(DF$var_x[i])
} else {
my_test = my_fun2(DF$var_x[i], DF$var_y[i])
}
my_test_vec = c(my_test_vec, my_test)
}
This for-loop, however, is computational inefficient. Because of that, I would like to implement it in a way such that all the operations are applied in one go. My approach so far doesn't work and looks like this:
DF$results <- ifelse(DF$cond == 'cond1',
my_fun1(DF$var_x),
my_fun2(DF$var_x, DF$var_y))
Any suggestions?
Upvotes: 1
Views: 88
Reputation: 72683
You should improve your knowledge of the vectorization of R. Below are vectorized versions of your functions. First, the code applies vector arithmetic within the functions. Second, also the whole functions are vectorized; Vectorize()
makes sure that they can work with vectors as input as it is the case in anifelse()
.
my_fun1A <- Vectorize(function(x) {
act_vec <- rnorm(500, x, x/4)
RT <- sum(act_vec) / 500
return(RT)
})
my_fun2A <- Vectorize(function(x, y) {
act_vec1 <- rnorm(500, x, x/4)
act_vec2 <- rnorm(500, y, y/4)
RT <- sum(pmin(act_vec1, act_vec1*act_vec2)) / 500
return(RT)
})
set.seed(42)
resA <- ifelse(DF$cond == 'cond1', my_fun1A(DF$var_x), my_fun2A(DF$var_x, DF$var_y))
resA
# [1] 1.9849769 2.9837980 1.9772901 0.2028555
Vectorize
ing your original versions with the for
loops inside:
my_fun1v <- Vectorize(my_fun1)
my_fun2v <- Vectorize(my_fun2)
set.seed(42)
res <- ifelse(DF$cond == 'cond1', my_fun1v(DF$var_x), my_fun2v(DF$var_x, DF$var_y))
stopifnot(all.equal(res, resA)) ## same result
fun <- function() ifelse(DF$cond == 'cond1', my_fun1v(DF$var_x), my_fun2v(DF$var_x, DF$var_y))
funA <- function() ifelse(DF$cond == 'cond1', my_fun1A(DF$var_x), my_fun2A(DF$var_x, DF$var_y))
set.seed(42)
DF <- DF[sample(1:nrow(DF), 1e4, replace=T), ]
set.seed(42)
microbenchmark::microbenchmark(fun(), funA(), times=3)
# Unit: seconds
# expr min lq mean median uq max neval cld
# fun() 5.319736 5.352284 5.392068 5.384831 5.428234 5.471636 3 b
# funA() 1.793795 1.793863 1.835880 1.793931 1.856923 1.919914 3 a
We can clearly see an improvement of over 60%.
Data:
DF <- read.table(header=T, text="cond var_x var_y
cond1 2 3
cond1 3 0.1
cond2 2 2
cond2 1 0.2")
Upvotes: 2