Reputation: 115
I wrote a function to numerically find the quantiles of the following finite mixture of normals
In order to do that I am minimizing . I am using the wrapper of
LBFGS
from RcppNumerical
for the optimization problem. My code is like the following
#include <RcppArmadillo.h>
#include <RcppGSL.h>
#include<omp.h>
#include<gsl/gsl_math.h>
// [[Rcpp::plugins(cpp17)]]
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::depends(RcppGSL)]]
// [[Rcpp::plugins(openmp)]]
// [[Rcpp::depends(RcppEigen)]]
// [[Rcpp::depends(RcppNumerical)]]
#include <RcppNumerical.h>
using namespace Numer;
using namespace arma;
// [[Rcpp::export]]
double log_sum_exp(const arma::vec &x) {
double maxVal= x.max();
double sum_exp=sum(exp(x-maxVal));
return log(sum_exp)+maxVal ;
}
class mixture_quantile: public MFuncGrad
{
private:
const double a;
const arma::vec mu, sd, pi;
public:
mixture_quantile( const double &a_,const arma::vec & mu_, const arma::vec &sd_, const arma::vec &pi_) : a(a_), mu(mu_), sd(sd_), pi(pi_) {}
double f_grad(Constvec& x0, Refvec grad)
{
double x=x0[0];
arma::vec std_x=(x-mu)/sd, log_pi=log(pi);
arma::vec log_norm_cdf=std_x ;
log_norm_cdf.transform( [](double val) { return R::pnorm5(val,0,1,1,1); } );
log_norm_cdf+=log_pi;
double log_mixture_cdf=log_sum_exp(log_norm_cdf);
double root_f=log_mixture_cdf-log(a);
const double f= gsl_pow_2(root_f ); //function to return
//derivative
arma::vec wh= -.5*(square(std_x) +M_LNPI +M_LN2 ) -log(sd) + log_pi -log_mixture_cdf;
grad[0]= 2*root_f * exp(log_sum_exp(wh));
return f;
}
};
// [[Rcpp::export]]
arma::mat gauss_solve_mat(const arma::vec &mu,const arma::vec &sd,const arma::vec &pi,
unsigned d, unsigned nthr=1)
{
const unsigned nmix=mu.n_elem, n=100;
mat mumat(d,nmix), sdmat(d,nmix), pimat(d,nmix);
mumat.each_col()=mu, sdmat.each_col()=sd, pimat.each_col()=sd;
mat a(n,d,fill::randu );
mat result(size(a));
// #pragma omp parallel for num_threads(nthr)
for(unsigned j = 0; j < d; ++j){
vec tmpvec=a.col(j);
for(unsigned i=0;i<n;++i){
// Initial guess
double fopt;
mixture_quantile f(a(i,j),mumat.col(j),sdmat.col(j),pimat.col(j));
// mixture_quantile f(a(i,j),mu,sd,pi);
Eigen::VectorXd guess(1);
guess[0]=0.;
int res = optim_lbfgs(f, guess, fopt);
cout<<"i="<<i<<" j="<<j<<endl;
if(res < 0)
cout<<"fail to converge. fopt= "<<fopt<<endl;
// Rcpp::stop("fail to converge");
tmpvec(i)=guess[0];
}
result.col(j)=tmpvec;
}
return result;
}
I know that the above script can be simplified but using this in order to generate a minimally reproducible code. I need to repeatedly use the numerical solver over matrices.
I saved this in the file root_finding.cpp
. Now I prepared the following R
script
Rcpp::sourceCpp("root_finding.cpp")
mu=c(11.7669 , 11.2504 , 11.6544 , 11.3683 , 11.5435 , 13.1097 , 1.1653 , 11.6884 , 10.2279 , 14.3162)
sd =c(0.3040 , 0.2813 , 0.2953 , 0.3056 , 0.2825 , 0.2853 , 0.2775 , 0.3007 , 0.2980 ,0.2816)
pi =c(0.0451 , 0.1389 , 0.1883 , 0.1031 , 0.0666 , 0.1938 , 0.0011 , 0.2541 , 0.0042 , 0.0050)
gauss_solve_mat(mu,sd,pi,d=100, nthr=1)
Whenever I run the above R
script, I get the following error and the code crashes
Registered S3 methods overwritten by 'RcppGSL':
method from
predict.fastLm RcppArmadillo
print.fastLm RcppArmadillo
summary.fastLm RcppArmadillo
print.summary.fastLm RcppArmadillo
Registered S3 methods overwritten by 'RcppEigen':
method from
predict.fastLm RcppGSL
print.fastLm RcppGSL
summary.fastLm RcppGSL
print.summary.fastLm RcppGSL
using C++ compiler: ‘g++-13 (Homebrew GCC 13.1.0) 13.1.0’
using C++17
using SDK: ‘MacOSX13.3.sdk’
ld: warning: directory not found for option '-L/opt/gfortran/lib/gcc/aarch64-apple-darwin20.0/12.2.0'
ld: warning: directory not found for option '-L/opt/gfortran/lib'
ld: warning: directory not found for option '-L/opt/gfortran/lib/gcc/aarch64-apple-darwin20.0/12.2.0'
ld: warning: directory not found for option '-L/opt/gfortran/lib'
R(42451,0x1dc661e00) malloc: *** error for object 0x16d928e40: pointer being freed was not allocated
R(42451,0x1dc661e00) malloc: *** set a breakpoint in malloc_error_break to debug
zsh: abort Rscript gauss_solve_vec.R
However the same functor mixture_quantile
perfectly works in the following case when I use the minimizer function for a single alpha:
// [[Rcpp::export]]
Rcpp::List gauss_solve(const arma::vec &mu,const arma::vec &sd,const arma::vec &pi, const double a, double init)
{
mixture_quantile nll(a,mu,sd,pi);
double fopt;
mixture_quantile f(a,mu,sd,pi);
Eigen::VectorXd guess(1);
int res = optim_lbfgs(f, guess, fopt);
if(res < 0)
Rcpp::stop("fail to converge");
return Rcpp::List::create(
Rcpp::Named("xopt") = guess[0],
Rcpp::Named("fopt") = fopt,
Rcpp::Named("status") = res
);
}
Output
> mu=c(11.7669 , 11.2504 , 11.6544 , 11.3683 , 11.5435 , 13.1097 , 1.1653 , 11.6884 , 10.2279 , 14.3162)
> sd =c(0.3040 , 0.2813 , 0.2953 , 0.3056 , 0.2825 , 0.2853 , 0.2775 , 0.3007 , 0.2980 ,0.2816)
> pi =c(0.0451 , 0.1389 , 0.1883 , 0.1031 , 0.0666 , 0.1938 , 0.0011 , 0.2541 , 0.0042 , 0.0050)
>
> a=0.000229046
> gauss_solve(mu,sd,pi,a,8)
$xopt
[1] 0.9398029
$fopt
[1] 7.340254e-12
$status
[1] 0
j
does simply uncommenting // #pragma omp parallel for num_threads(nthr)
in gauss_solve_mat
function work?Upvotes: 0
Views: 118
Reputation: 368489
As I suspected (in comments above), no segfault here but a mere run-time error on mismatched dims:
> Rcpp::sourceCpp("~/git/stackoverflow/76593467/question.cpp")
Registered S3 methods overwritten by 'RcppGSL':
method from
predict.fastLm RcppArmadillo
print.fastLm RcppArmadillo
summary.fastLm RcppArmadillo
print.summary.fastLm RcppArmadillo
Registered S3 methods overwritten by 'RcppEigen':
method from
predict.fastLm RcppGSL
print.fastLm RcppGSL
summary.fastLm RcppGSL
print.summary.fastLm RcppGSL
> mu <- c(11.7669 , 11.2504 , 11.6544 , 11.3683 , 11.5435 , 13.1097 , 1.1653 , 11.6884 , 10.2279 , 14.3162)
> sd <- c(0.3040 , 0.2813 , 0.2953 , 0.3056 , 0.2825 , 0.2853 , 0.2775 , 0.3007 , 0.2980 ,0.2816)
> pi <- c(0.0451 , 0.1389 , 0.1883 , 0.1031 , 0.0666 , 0.1938 , 0.0011 , 0.2541 , 0.0042 , 0.0050)
> gauss_solve_mat(mu,sd,pi,d=100, nthr=1)
Error in eval(ei, envir) :
each_col(): incompatible size; expected 100x1, got 10x1
>
(I appended your test to the cpp file with the usual /*** R .... */
bracketing meaning that sourceCpp()
will run as R code following the compilation. See the Rcpp Attributes vignette, Section 1.5, for more.)
Edit: If I correct d=100
to d=10
in the call, and comment out the per-iteration print, then things improve to this (where I also wrap the call in str()
as the return is large:
> Rcpp::sourceCpp("~/git/stackoverflow/76593467/question.cpp")
> mu <- c(11.7669 , 11.2504 , 11.6544 , 11.3683 , 11.5435 , 13.1097 , 1.1653 , 11.6884 , 10.2279 , 14.3162)
> sd <- c(0.3040 , 0.2813 , 0.2953 , 0.3056 , 0.2825 , 0.2853 , 0.2775 , 0.3007 , 0.2980 ,0.2816)
> pi <- c(0.0451 , 0.1389 , 0.1883 , 0.1031 , 0.0666 , 0.1938 , 0.0011 , 0.2541 , 0.0042 , 0.0050)
> str(gauss_solve_mat(mu,sd,pi,d=10, nthr=1))
num [1:100, 1:10] 2.54 2.47 2.48 1.36 2.55 ...
>
Upvotes: 0