Reputation: 1263
R's poly()
function produces orthogonal polynomials for data fitting. However, I would like to use the results of the regression outside of R (say in C++), and there doesn't seem to be a way to get the coefficients for each orthogonal polynomial.
Note 1: I don't mean the regression coefficients, but the coefficients of the orthogonal polynomials themselves), e.g. the p_i(x)
in
y = a0 + a1*p_1(x) + a2*p_2(x) + ...
Note 2: I know poly(x, n, raw=T)
forces poly to return non-orthogonal polynomials, but I want to regress against orthogonal polynomials, so that's what I'm looking for.
Upvotes: 19
Views: 4443
Reputation: 4841
Note 1: I don't mean the regression coefficients, but the coefficients of the orthogonal polynomials themselves), e.g. the
p_i(x)
iny = a0 + a1*p_1(x) + a2*p_2(x) + ...
As josliber mentions, the functions are constructed recursively and the most compact way of representing them is with the norms and "alpha" coefficients used by R. A C++ version using (Rcpp)Armadillo can look something like
// [[Rcpp::depends(RcppArmadillo)]]
#include <RcppArmadillo.h>
namespace polycpp {
class poly_basis {
void scale_basis(arma::mat &X) const {
if(X.n_cols > 0)
X.each_row() /=
arma::sqrt(norm2.subvec(1L, norm2.n_elem - 1L)).t();
}
public:
arma::vec const alpha, norm2;
poly_basis(arma::vec const &alpha, arma::vec const &norm2):
alpha(alpha), norm2(norm2) {
for(size_t i = 0; i < norm2.size(); ++i)
if(norm2[i] <= 0.)
throw std::invalid_argument("invalid norm2");
if(alpha.n_elem + 2L != norm2.n_elem)
throw std::invalid_argument("invalid alpha");
}
/**
behaves like poly(x, degree). The orthogonal polynomial is returned by
reference.
*/
static poly_basis get_poly_basis
(arma::vec x, size_t const degree, arma::mat &X){
size_t const n = x.n_elem,
nc = degree + 1L;
double const x_bar = arma::mean(x);
x -= x_bar;
arma::mat XX(n, nc);
XX.col(0).ones();
for(size_t d = 1L; d < nc; d++){
double * xx_new = XX.colptr(d);
double const * xx_old = XX.colptr(d - 1);
for(size_t i = 0; i < n; ++i, ++xx_new, ++xx_old)
*xx_new = *xx_old * x[i];
}
arma::mat R;
/* TODO: can be done smarter by calling LAPACK or LINPACK directly */
if(!arma::qr_econ(X, R, XX))
throw std::runtime_error("QR decomposition failed");
for(size_t c = 0; c < nc; ++c)
X.col(c) *= R.at(c, c);
arma::vec norm2(nc + 1L),
alpha(nc - 1L);
norm2[0] = 1.;
for(size_t c = 0; c < nc; ++c){
double z_sq(0),
x_z_sq(0);
double const *X_i = X.colptr(c);
for(size_t i = 0; i < n; ++i, ++X_i){
double const z_sq_i = *X_i * *X_i;
z_sq += z_sq_i;
if(c < degree)
x_z_sq += x[i] * z_sq_i;
}
norm2[c + 1] = z_sq;
if(c < degree)
alpha[c] = x_z_sq / z_sq + x_bar;
}
poly_basis out(alpha, norm2);
out.scale_basis(X);
return out;
}
/** behaves like predict(<poly>, newdata). */
arma::mat operator()(arma::vec const &x) const {
size_t const n = x.n_elem;
arma::mat out(n, alpha.n_elem + 1L);
out.col(0).ones();
if(alpha.n_elem > 0L){
out.col(1) = x;
out.col(1) -= alpha[0];
for(size_t c = 1; c < alpha.n_elem; c++){
double * x_new = out.colptr(c + 1L);
double const * x_prev = out.colptr(c),
* x_old = out.colptr(c - 1L),
* x_i = x.memptr();
double const fac = norm2[c + 1L] / norm2[c];
for(size_t i = 0; i < n; ++i, ++x_new, ++x_prev, ++x_old, ++x_i)
*x_new = (*x_i - alpha[c]) * *x_prev - fac * *x_old;
}
}
scale_basis(out);
return out;
}
};
} // namespace polycpp
// export the functions to R to show that we get the same
using namespace polycpp;
using namespace Rcpp;
// [[Rcpp::export(rng = false)]]
List my_poly(arma::vec const &x, unsigned const degree){
arma::mat out;
auto basis = poly_basis::get_poly_basis(x, degree, out);
return List::create(
Named("X") = out,
Named("norm2") = basis.norm2,
Named("alpha") = basis.alpha);
}
// [[Rcpp::export(rng = false)]]
arma::mat my_poly_predict(arma::vec const &x, arma::vec const &alpha,
arma::vec const &norm2){
poly_basis basis(alpha, norm2);
return basis(x);
}
We can easily get rid of the Armadillo dependence if needed. I verify below that we get the same as the R function
set.seed(1L)
x <- rnorm(100)
Rp <- poly(x, degree = 4L)
Cp <- my_poly(x, 4L)
all.equal(unclass(Rp), Cp$X[, -1L], check.attributes = FALSE)
#R> [1] TRUE
all.equal(attr(Rp, "coefs"),
lapply(Cp[c("alpha", "norm2")], drop))
#R> [1] TRUE
z <- rnorm(20)
Rpred <- predict(Rp, z)
Cpred <- my_poly_predict(z, Cp$alpha, Cp$norm2)
all.equal(Rpred, Cpred[, -1], check.attributes = FALSE)
#R> [1] TRUE
all.equal(Cp$X, my_poly_predict(x, Cp$alpha, Cp$norm2))
#R> [1] TRUE
A nice bonus, although it likely does not matter in practice, is that the new functions are faster
options(digits = 3)
microbenchmark::microbenchmark(
R = poly(x, degree = 4L), cpp = my_poly(x, 4L))
#R> Unit: microseconds
#R> expr min lq mean median uq max neval
#R> R 118.93 123.63 135.4 126.1 129.0 469.1 100
#R> cpp 7.22 7.97 11.3 10.9 11.7 89.4 100
microbenchmark::microbenchmark(
R = predict(Rp, z), cpp = my_poly_predict(z, Cp$alpha, Cp$norm2))
#R> Unit: microseconds
#R> expr min lq mean median uq max neval
#R> R 18.6 19.20 20.50 19.43 19.89 92.86 100
#R> cpp 1.2 1.39 1.92 1.98 2.23 8.85 100
Upvotes: 2
Reputation: 44309
The polynomials are defined recursively using the alpha
and norm2
coefficients of the poly
object you've created. Let's look at an example:
z <- poly(1:10, 3)
attributes(z)$coefs
# $alpha
# [1] 5.5 5.5 5.5
# $norm2
# [1] 1.0 10.0 82.5 528.0 3088.8
For notation, let's call a_d
the element in index d
of alpha
and let's call n_d
the element in index d
of norm2
. F_d(x)
will be the orthogonal polynomial of degree d
that is generated. For some base cases we have:
F_0(x) = 1 / sqrt(n_2)
F_1(x) = (x-a_1) / sqrt(n_3)
The rest of the polynomials are recursively defined:
F_d(x) = [(x-a_d) * sqrt(n_{d+1}) * F_{d-1}(x) - n_{d+1} / sqrt(n_d) * F_{d-2}(x)] / sqrt(n_{d+2})
To confirm with x=2.1
:
x <- 2.1
predict(z, newdata=x)
# 1 2 3
# [1,] -0.3743277 0.1440493 0.1890351
# ...
a <- attributes(z)$coefs$alpha
n <- attributes(z)$coefs$norm2
f0 <- 1 / sqrt(n[2])
(f1 <- (x-a[1]) / sqrt(n[3]))
# [1] -0.3743277
(f2 <- ((x-a[2]) * sqrt(n[3]) * f1 - n[3] / sqrt(n[2]) * f0) / sqrt(n[4]))
# [1] 0.1440493
(f3 <- ((x-a[3]) * sqrt(n[4]) * f2 - n[4] / sqrt(n[3]) * f1) / sqrt(n[5]))
# [1] 0.1890351
The most compact way to export your polynomials to your C++ code would probably be to export attributes(z)$coefs$alpha
and attributes(z)$coefs$norm2
and then use the recursive formula in C++ to evaluate your polynomials.
Upvotes: 25