Reputation:
I am trying to call R function optim()
in Rcpp. I saw an example in Calling R's optim function from within C++ using Rcpp, but I am unable to modify it correctly for my use case. Basically, the objective function depends on the x
and y
but I want to optimize it with respect to b
.
Here is the R code that does what I want:
example_r = function(b, x, y) {
phi = rnorm(length(x))
tar_val = (x ^ 2 + y ^ 2) * b * phi
objftn_r = function(beta, x, y) {
obj_val = (x ^ 2 + y ^ 2) * beta
return(obj_val)
}
b1 = optim(b, function(beta) {
sum((objftn_r(beta, x, y) - tar_val) ^ 2)
}, method = "BFGS")$par
result = (x ^ 2 + y ^ 2) * b1
return(b1)
}
Here's is my attempt to translate it to _RcppArmadillo:
#include <RcppArmadillo.h>
using namespace Rcpp;
// [[Rcpp::depends(RcppArmadillo)]]
arma::vec example_rcpp(arma::vec b, arma::vec x, arma::vec y){
arma::vec tar_val = pow(x,2)%b-pow(y,2);
return tar_val;
}
// [[Rcpp::export]]
arma::vec optim_rcpp(const arma::vec& init_val, arma::vec& x, arma::vec& y){
Rcpp::Environment stats("package:stats");
Rcpp::Function optim = stats["optim"];
Rcpp::List opt_results = optim(Rcpp::_["par"] = init_val,
Rcpp::_["fn"] = Rcpp::InternalFunction(&example_rcpp),
Rcpp::_["method"] = "BFGS");
arma::vec out = Rcpp::as<arma::vec>(opt_results[0]);
return out;
}
However, this code is returning:
> optim_rcpp(1:3,2:4,3:5)
Error in optim_rcpp(1:3, 2:4, 3:5) : not compatible with requested type
I'm not sure what the error is here.
Upvotes: 6
Views: 2677
Reputation: 11
I happened to write a C++ version of stats::optimize()
function. It consists of optimize.h
, optimize.cpp
and optim_test.cpp
. The former two contain classes and functions ready to use, and the last is a script containing unit tests and demonstrate the use of the functions.
This is optimize.h
:
#ifndef OPTIMIZE_R
#define OPTIMIZE_R
#include <iostream>
#include <cmath>
#include <cfloat>
// Optim class: virtual class to find max/min of univariate function in R manner
// Usually a subclass is used to define a substantiated function
// Member function (public): value, evaluate function with double x
// Other parameters may be added into the subclass as members
class Optim
{
public:
virtual double value(double x) = 0;
virtual ~Optim() {}
};
// Define optimize function
double optimize(Optim* optim, double lower, double upper, bool maximum, double tol);
#endif
This is optimize.cpp
, where Brent_fmin()
is adapted from stats
C code with the same name:
#include "optimize.h"
using namespace std;
// This function is copied from R: stats/src/optimize.c
// Add argument maximum and its use in function pointer *f
// Define Brent optimization
double Brent_fmin(double ax, double bx, double (*f)(double, void *, bool),
void *info, bool maximum, double tol)
{
/* c is the squared inverse of the golden ratio */
const double c = (3. - sqrt(5.)) * .5;
/* Local variables */
double a, b, d, e, p, q, r, u, v, w, x;
double t2, fu, fv, fw, fx, xm, eps, tol1, tol3;
/* eps is approximately the square root of the relative machine precision. */
eps = DBL_EPSILON;
tol1 = eps + 1.;/* the smallest 1.000... > 1 */
eps = sqrt(eps);
a = ax;
b = bx;
v = a + c * (b - a);
w = v;
x = v;
d = 0.;/* -Wall */
e = 0.;
fx = (*f)(x, info, maximum);
fv = fx;
fw = fx;
tol3 = tol / 3.;
/* main loop starts here ----------------------------------- */
for(;;) {
xm = (a + b) * .5;
tol1 = eps * fabs(x) + tol3;
t2 = tol1 * 2.;
/* check stopping criterion */
if (fabs(x - xm) <= t2 - (b - a) * .5) break;
p = 0.;
q = 0.;
r = 0.;
if (fabs(e) > tol1) { /* fit parabola */
r = (x - w) * (fx - fv);
q = (x - v) * (fx - fw);
p = (x - v) * q - (x - w) * r;
q = (q - r) * 2.;
if (q > 0.) p = -p; else q = -q;
r = e;
e = d;
}
if (fabs(p) >= fabs(q * .5 * r) ||
p <= q * (a - x) || p >= q * (b - x)) { /* a golden-section step */
if (x < xm) e = b - x; else e = a - x;
d = c * e;
}
else { /* a parabolic-interpolation step */
d = p / q;
u = x + d;
/* f must not be evaluated too close to ax or bx */
if (u - a < t2 || b - u < t2) {
d = tol1;
if (x >= xm) d = -d;
}
}
/* f must not be evaluated too close to x */
if (fabs(d) >= tol1)
u = x + d;
else if (d > 0.)
u = x + tol1;
else
u = x - tol1;
fu = (*f)(u, info, maximum);
/* update a, b, v, w, and x */
if (fu <= fx) {
if (u < x) b = x; else a = x;
v = w; w = x; x = u;
fv = fw; fw = fx; fx = fu;
} else {
if (u < x) a = u; else b = u;
if (fu <= fw || w == x) {
v = w; fv = fw;
w = u; fw = fu;
} else if (fu <= fv || v == x || v == w) {
v = u; fv = fu;
}
}
}
/* end of main loop */
return x;
} // Brent_fmin()
// Optim's value function: wrapper around Optim::value
// This function is used in Brent_fmin as function pointer
double optim_value(double x, Optim* optim, bool maximum) {
double out = (*optim).value(x);
if (maximum == true) {
out = -out;
}
return out;
}
// Optimize function: finding minimun of class-based univariate function
// optim: an object inheriting from Optim, with double value(double) member function
// Other parameters can be stored in members of Optim class
double optimize(Optim* optim, double lower, double upper, bool maximum = false,
double tol = pow(DBL_EPSILON, 0.25)) {
return Brent_fmin(lower, upper, (double (*)(double, void*, bool)) optim_value, optim,
maximum, tol);
}
This is optim_test.cpp
containing unit tests:
// Unit test
#include "optimize.h"
using namespace std;
// Define a class of parabolic function: f(x) = a * x^2 + b * x + c
class Parabol: public Optim
{
private:
const double a;
const double b;
const double c;
public:
Parabol(double a_, double b_, double c_) : a(a_), b(b_), c(c_) {}
double value(double x) {
return a * pow(x, 2) + b * x + c;
}
};
// Main function to min/max a parabolic function
int main() {
Parabol parabol1(1, -5, 3);
double x_min = optimize(¶bol1, 0, 5, false, 1e-3);
cout << x_min << endl;
Parabol parabol2(-1, -5, 3);
double x_max = optimize(¶bol2, -5, 0, true, 1e-3);
cout << x_max << endl;
return 0;
}
These files have been compiled and run by Visual C++ 6.0. The output from the main()
function is:
2.5
-2.5
Press any key to continue
Upvotes: 0
Reputation: 20746
Before we begin, I have a few remarks:
optim
from R in C++ is very different than using in C++ the underlying C++ code for opt()
from nlopt
.I've cleaned up your question as a result... But, in the future, this likely will not happen.
The data generation process seems to be done in 2 steps: First, outside of the example_r
function, and, then inside the function.
This should be simplified so that it is done outside of the optimization function. For example:
generate_data = function(n, x_mu = 0, y_mu = 1, beta = 1.5) {
x = rnorm(n, x_mu)
y = rnorm(n, y_mu)
phi = rnorm(length(x))
tar_val = (x ^ 2 + y ^ 2) * beta * phi
simulated_data = list(x = x, y = y, beta = beta, tar_val = tar_val)
return(simulated_data)
}
optim
Objective functions must return a single value, e.g. a scalar, in R. Under the posted R code, there was effectively two functions designed to act as an objective function in sequence, e.g.
objftn_r = function(beta, x, y) {
obj_val = (x ^ 2 + y ^ 2) * beta
return(obj_val)
}
b1 = optim(b, function(beta) {
sum((objftn_r(beta, x, y) - tar_val) ^ 2)
}, method = "BFGS")$par
This objective function should therefore be re-written as:
objftn_r = function(beta_hat, x, y, tar_val) {
# The predictions generate will be a vector
est_val = (x ^ 2 + y ^ 2) * beta_hat
# Here we apply sum of squares which changes it
# from a vector into a single "objective" value
# that optim can work with.
obj_val = sum( ( est_val - tar_val) ^ 2)
return(obj_val)
}
From there, the calls should align as:
sim_data = generate_data(10, 1, 2, .3)
b1 = optim(sim_data$beta, fn = objftn_r, method = "BFGS",
x = sim_data$x, y = sim_data$y, tar_val = sim_data$tar_val)$par
Having fixed the scope and behavior of the R code, let's focus on translating it into RcppArmadillo.
In particular, notice that the objection function defined after the translation returns a vector and not a scalar into optim
, which is not a single value. Also of concern is the lack of a tar_val
parameter in the objective function. With this in mind, the objective function will translate to:
// changed function return type and
// the return type of first parameter
double obj_fun_rcpp(double& beta_hat,
arma::vec& x, arma::vec& y, arma::vec& tar_val){
// Changed from % to * as it is only appropriate if
// `beta_hat` is the same length as x and y.
// This is because it performs element-wise multiplication
// instead of a scalar multiplication on a vector
arma::vec est_val = (pow(x, 2) - pow(y, 2)) * beta_hat;
// Compute objective value
double obj_val = sum( pow( est_val - tar_val, 2) );
// Return a single value
return obj_val;
}
Now, with the objective function set, let's address the Rcpp call into R for optim()
from C++. In this function, the parameters of the
function must be explicitly supplied. So, x
, y
, and tar_val
must be present in the optim
call. Thus, we will end up with:
// [[Rcpp::export]]
arma::vec optim_rcpp(double& init_val,
arma::vec& x, arma::vec& y, arma::vec& tar_val){
// Extract R's optim function
Rcpp::Environment stats("package:stats");
Rcpp::Function optim = stats["optim"];
// Call the optim function from R in C++
Rcpp::List opt_results = optim(Rcpp::_["par"] = init_val,
// Make sure this function is not exported!
Rcpp::_["fn"] = Rcpp::InternalFunction(&obj_fun_rcpp),
Rcpp::_["method"] = "BFGS",
// Pass in the other parameters as everything
// is scoped environmentally
Rcpp::_["x"] = x,
Rcpp::_["y"] = y,
Rcpp::_["tar_val"] = tar_val);
// Extract out the estimated parameter values
arma::vec out = Rcpp::as<arma::vec>(opt_results[0]);
// Return estimated values
return out;
}
The full functioning code can be written in test_optim.cpp
and compiled via sourceCpp()
as:
#include <RcppArmadillo.h>
// [[Rcpp::depends(RcppArmadillo)]]
// changed function return type and
// the return type of first parameter
// DO NOT EXPORT THIS FUNCTION VIA RCPP ATTRIBUTES
double obj_fun_rcpp(double& beta_hat,
arma::vec& x, arma::vec& y, arma::vec& tar_val){
// Changed from % to * as it is only appropriate if
// `beta_hat` is the same length as x and y.
// This is because it performs element-wise multiplication
// instead of a scalar multiplication on a vector
arma::vec est_val = (pow(x, 2) - pow(y, 2)) * beta_hat;
// Compute objective value
double obj_val = sum( pow( est_val - tar_val, 2) );
// Return a single value
return obj_val;
}
// [[Rcpp::export]]
arma::vec optim_rcpp(double& init_val,
arma::vec& x, arma::vec& y, arma::vec& tar_val){
// Extract R's optim function
Rcpp::Environment stats("package:stats");
Rcpp::Function optim = stats["optim"];
// Call the optim function from R in C++
Rcpp::List opt_results = optim(Rcpp::_["par"] = init_val,
// Make sure this function is not exported!
Rcpp::_["fn"] = Rcpp::InternalFunction(&obj_fun_rcpp),
Rcpp::_["method"] = "BFGS",
// Pass in the other parameters as everything
// is scoped environmentally
Rcpp::_["x"] = x,
Rcpp::_["y"] = y,
Rcpp::_["tar_val"] = tar_val);
// Extract out the estimated parameter values
arma::vec out = Rcpp::as<arma::vec>(opt_results[0]);
// Return estimated values
return out;
}
# Setup some values
beta = 2
x = 2:4
y = 3:5
# Set a seed for reproducibility
set.seed(111)
phi = rnorm(length(x))
tar_val = (x ^ 2 + y ^ 2) * beta * phi
optim_rcpp(beta, x, y, tar_val)
# [,1]
# [1,] 2.033273
Note: If you would like to avoid a matrix of size 1 x1 from being returned please use double
as the return parameter of optim_rcpp
and switch Rcpp::as<arma::vec>
to Rcpp::as<double>
Upvotes: 19