Reputation: 786
Background
I'm working with numerical routines that require at least 15 decimal places of accuracy in the double-float domain. For the most part, this has never been a problem. However when implementing lanczos sums I am only getting 5/6 digits of accuracy when compared to Boost/Cephes (both of these use the same algorithm and coefficients)
My first thought was "Boost and Cephes use long-doubles, and I'm using double-floats", so I converted the coefficients to rationals. It's a simple algorithm with no floating point contagion that I can see, so in theory that should have solved any problem with the accuracy of the coefficients. It doesn't; whether I use double-float coefficients or rationals, the answers are the same.
The Code
The C code below comes from Boost's evaluate_rational function and the coefficients selected are n=13 and G=6.024680040776729583740234375L0. This is supposed to give the best accuracy for 64 bit floats (I could have selected other coefficients, but they're all long-double).
Here's the C code
template <class T>
static T lanczos_sum_expG_scaled(const T& z)
{
static const T num[13] = {
static_cast<T>(56906521.91347156388090791033559122686859L),
static_cast<T>(103794043.1163445451906271053616070238554L),
static_cast<T>(86363131.28813859145546927288977868422342L),
static_cast<T>(43338889.32467613834773723740590533316085L),
static_cast<T>(14605578.08768506808414169982791359218571L),
static_cast<T>(3481712.15498064590882071018964774556468L),
static_cast<T>(601859.6171681098786670226533699352302507L),
static_cast<T>(75999.29304014542649875303443598909137092L),
static_cast<T>(6955.999602515376140356310115515198987526L),
static_cast<T>(449.9445569063168119446858607650988409623L),
static_cast<T>(19.51992788247617482847860966235652136208L),
static_cast<T>(0.5098416655656676188125178644804694509993L),
static_cast<T>(0.006061842346248906525783753964555936883222L)
};
static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint32_t) denom[13] = {
static_cast<std::uint32_t>(0u),
static_cast<std::uint32_t>(39916800u),
static_cast<std::uint32_t>(120543840u),
static_cast<std::uint32_t>(150917976u),
static_cast<std::uint32_t>(105258076u),
static_cast<std::uint32_t>(45995730u),
static_cast<std::uint32_t>(13339535u),
static_cast<std::uint32_t>(2637558u),
static_cast<std::uint32_t>(357423u),
static_cast<std::uint32_t>(32670u),
static_cast<std::uint32_t>(1925u),
static_cast<std::uint32_t>(66u),
static_cast<std::uint32_t>(1u)
};
return boost::math::tools::evaluate_rational(num, denom, z);
template <class T, class U, class V>
V evaluate_rational(const T* num, const U* denom, const V& z_, std::size_t count) BOOST_MATH_NOEXCEPT(V)
{
V z(z_);
V s1, s2;
if(z <= 1)
{
s1 = static_cast<V>(num[count-1]);
s2 = static_cast<V>(denom[count-1]);
for(int i = (int)count - 2; i >= 0; --i)
{
s1 *= z;
s2 *= z;
s1 += num[i];
s2 += denom[i];
}
}
else
{
z = 1 / z;
s1 = static_cast<V>(num[0]);
s2 = static_cast<V>(denom[0]);
for(unsigned i = 1; i < count; ++i)
{
s1 *= z;
s2 *= z;
s1 += num[i];
s2 += denom[i];
}
}
return s1 / s2;
}
and the corresponding Common Lisp translation. The rational coefficients were obtained by using CLISP and rationalize
.
(defparameter lanczos-13-numerator-scaled
(make-array 13
:initial-contents '(5690652191347156388090791033559122686859/100000000000000000000000000000000
518970215581722725953135526808035119277/5000000000000000000000000000000
4318156564406929572773463644488934211171/50000000000000000000000000000000
866777786493522766954744748118106663217/20000000000000000000000000000000
1460557808768506808414169982791359218571/100000000000000000000000000000000
87042803874516147720517754741193639117/25000000000000000000000000000000
6018596171681098786670226533699352302507/10000000000000000000000000000000000
1899982326003635662468825860899727284273/25000000000000000000000000000000000
3477999801257688070178155057757599493763/500000000000000000000000000000000000
4499445569063168119446858607650988409623/10000000000000000000000000000000000000
121999549265476092677991310389728258513/6250000000000000000000000000000000000
5098416655656676188125178644804694509993/10000000000000000000000000000000000000000
3030921173124453262891876982277968441611/500000000000000000000000000000000000000000)
:element-type 'rational))
(defparameter lanczos-13-denominator
(make-array 13
:initial-contents '(0
39916800
120543840
150917976
105258076
45995730
13339535
2637558
357423
32677
1925
66
1)
:element-type 'rational))
(defun evaluate-rational (numerator denominator z)
;; (declare (double-float z))
(assert (= (length numerator)
(length denominator)) () "Numerator and denominator must be of the same length")
(let (s1 s2)
(if (<= z 1)
(progn
(setf s1 (aref numerator (1- (length numerator)))
s2 (aref denominator (1- (length denominator))))
(loop for i from (- (length numerator) 2) downto 0
do (setf s1 (* s1 z)
s1 (+ s1 (aref numerator i))
s2 (* s2 z)
s2 (+ s2 (aref denominator i)))))
(progn
(setf z (/ z)
s1 (aref numerator 0)
s2 (aref denominator 0))
(loop for i from 1 below (length numerator)
do (setf s1 (* s1 z)
s1 (+ s1 (aref numerator i))
s2 (* s2 z)
s2 (+ s2 (aref denominator i)))
)))
(/ s1 s2)))
(defun lanczos-sum (x &key (scaled t))
"Return the Lanczos sum for x, exp(g), possibly normalised"
;; (declare (double-float x))
(if scaled
(evaluate-rational lanczos-13-numerator-scaled lanczos-13-denominator x)
(evaluate-rational lanczos-13-numerator lanczos-13-denominator x)))
The Lisp version will give 5/6 digits of accuracy in comparison to Cephes/Boost. For example:
LS-USER> (lanczos::lanczos-sum 79.50051116943359375d0)
0.007539495225797168d0
LS-USER> (cephes:lanczos-sum-scaled 79.50051116943359375d0)
0.007539542760993982d0
using a rational for x
doesn't help:
CL-USER> (lanczos::lanczos-sum (rationalize 79.50051116943359375d0))
13651753323161301968358397968419859170434718739538108351630041579990307212168430280955832885058333515240792977193277162131291/1810698583169123197442845358696532421045761520142499578707004305761239420788152112760500000000000000000000000000000000000000000
CL-USER> (float * 1d0)
0.0075394952257971685d0
Converting the Coefficients
Using CLISP:
(SETF (EXT:LONG-FLOAT-DIGITS) 3322)
(defparameter boost-numerator-scaled '(56906521.91347156388090791033559122686859L0
103794043.1163445451906271053616070238554L0
86363131.28813859145546927288977868422342L0
43338889.32467613834773723740590533316085L0
14605578.08768506808414169982791359218571L0
3481712.15498064590882071018964774556468L0
601859.6171681098786670226533699352302507L0
75999.29304014542649875303443598909137092L0
6955.999602515376140356310115515198987526L0
449.9445569063168119446858607650988409623L0
19.51992788247617482847860966235652136208L0
0.5098416655656676188125178644804694509993L0
0.006061842346248906525783753964555936883222L0))
(defparameter boost-numerator '(23531376880.41075968857200767445163675473L0
42919803642.64909876895789904700198885093L0
35711959237.35566804944018545154716670596L0
17921034426.03720969991975575445893111267L0
6039542586.35202800506429164430729792107L0
1439720407.311721673663223072794912393972L0
248874557.8620541565114603864132294232163L0
31426415.58540019438061423162831820536287L0
2876370.628935372441225409051620849613599L0
186056.2653952234950402949897160456992822L0
8071.672002365816210638002902272250613822L0
210.8242777515793458725097339207133627117L0
2.506628274631000270164908177133837338626L0))
(defparameter boost-denominator '(0
39916800
120543840
150917976
105258076
45995730
13339535
2637558
357423
32670
1925
66
1))
(defun rationalize-coefficients (coeff)
(map 'list #'rationalize coeff))
(defun floatify-coefficients (coeff)
(map 'list #'(lambda (x)
(float x 1L0))
coeff))
and using this on the scaled numerator:
(rationalize-coefficients boost-numerator-scaled)
(5690652191347156388090791033559122686859/100000000000000000000000000000000
518970215581722725953135526808035119277/5000000000000000000000000000000
4318156564406929572773463644488934211171/50000000000000000000000000000000
866777786493522766954744748118106663217/20000000000000000000000000000000
1460557808768506808414169982791359218571/100000000000000000000000000000000
87042803874516147720517754741193639117/25000000000000000000000000000000
6018596171681098786670226533699352302507/10000000000000000000000000000000000
1899982326003635662468825860899727284273/25000000000000000000000000000000000
3477999801257688070178155057757599493763/500000000000000000000000000000000000
4499445569063168119446858607650988409623/10000000000000000000000000000000000000
121999549265476092677991310389728258513/6250000000000000000000000000000000000
5098416655656676188125178644804694509993/10000000000000000000000000000000000000000
3030921173124453262891876982277968441611/500000000000000000000000000000000000000000)
You can see that these are the coefficients I'm using. Just to double check:
(floatify-coefficients *)
(5.690652191347156388090791033559122686859L7 1.037940431163445451906271053616070238554L8
8.636313128813859145546927288977868422342L7 4.333888932467613834773723740590533316085L7
1.460557808768506808414169982791359218571L7 3481712.15498064590882071018964774556468L0
601859.6171681098786670226533699352302507L0 75999.29304014542649875303443598909137092L0
6955.999602515376140356310115515198987526L0 449.9445569063168119446858607650988409623L0
19.51992788247617482847860966235652136208L0 0.5098416655656676188125178644804694509993L0
0.006061842346248906525783753964555936883222L0)
Added post-comment by a community member:
Back on SBCL:
LS-USER> (defparameter *sample-double* 79.5005111694336d0)
*SAMPLE-DOUBLE*
LS-USER> (defparameter *cephes-answer* 0.007539542760993982d0)
*CEPHES-ANSWER*
LS-USER> (= 79.50051116943359375d0 *sample-double*)
T
LS-USER> (- (lanczos::lanczos-sum *sample-double*) *cephes-answer*)
-4.7535196814364744d-8
LS-USER> (- (cephes:lanczos-sum-scaled 79.5005111694336d0)
(float (lanczos::lanczos-sum (rationalize 79.5005111694336d0)) 1d0))
4.753519681349738d-8
So it seems SBCL is rather less accurate in this case.
Upvotes: 1
Views: 187
Reputation: 9282
No conversion to floats happens if you are doing rational arithmetic. If you hand the function a float argument then of course you are not doing rational arithmetic.
In the first version of this answer I assumed that your conversion of the C++ values to floats was wrong somehow and suggested you parse them differently.
However I checked that and I am getting the same values you are.
With these values (see below):
cl-user> *cephes-answer*
0.007539542760993982d0
cl-user> *sample-double*
79.5005111694336d0
cl-user> (= 79.50051116943359375d0 *sample-double*)
t
cl-user> (- (lanczos-sum *sample-double*) *cephes-answer*)
0.0d0
cl-user> (- (lanczos-sum (rationalize *sample-double*)) *cephes-answer*)
1.734723475976807d-18
This is SBCL, trunk of a few days ago, running on a Linux arm64 instance. As you can see there is a very tiny difference if you do entirely rational arithmetic.
I get the same results with LW and with CCL running under Rosetta.
For double argument to the function I also get no difference if I just use doubles rather than rationals for the numerator (see set-params
function below for how to set that up).
Thus I conclude that, with high probability, you are not running the code you think you are. Possible alternatives is buggy floating point somewhere or perhaps a buggy version of SBCL: both are probably unlikely here.
My code is below.
(in-package :cl-user)
;;; these are the values in the question
;;;
(defvar *given-numerator-scaled*
'(5690652191347156388090791033559122686859/100000000000000000000000000000000
518970215581722725953135526808035119277/5000000000000000000000000000000
4318156564406929572773463644488934211171/50000000000000000000000000000000
866777786493522766954744748118106663217/20000000000000000000000000000000
1460557808768506808414169982791359218571/100000000000000000000000000000000
87042803874516147720517754741193639117/25000000000000000000000000000000
6018596171681098786670226533699352302507/10000000000000000000000000000000000
1899982326003635662468825860899727284273/25000000000000000000000000000000000
3477999801257688070178155057757599493763/500000000000000000000000000000000000
4499445569063168119446858607650988409623/10000000000000000000000000000000000000
121999549265476092677991310389728258513/6250000000000000000000000000000000000
5098416655656676188125178644804694509993/10000000000000000000000000000000000000000
3030921173124453262891876982277968441611/500000000000000000000000000000000000000000))
(defvar *given-denominator* '(0
39916800
120543840
150917976
105258076
45995730
13339535
2637558
357423
32670
1925
66
1))
(defvar *lanczos-13-numerator-scaled*)
(defvar *lanczos-13-denominator*)
(defun set-params (&key float-numerator)
(setf *lanczos-13-numerator-scaled*
(coerce (if float-numerator
(mapcar (lambda (r)
(float r 1.0d0))
*given-numerator-scaled*)
*given-numerator-scaled*)
'vector)
*lanczos-13-denominator* (coerce *given-denominator* 'vector))
float-numerator)
(set-params)
(defun evaluate-rational (numerator denominator z)
(assert (= (length numerator)
(length denominator)) () "Numerator and denominator must be of the same length")
(let (s1 s2)
(if (<= z 1)
(progn
(setf s1 (aref numerator (1- (length numerator)))
s2 (aref denominator (1- (length denominator))))
(loop for i from (- (length numerator) 2) downto 0
do (setf s1 (* s1 z)
s1 (+ s1 (aref numerator i))
s2 (* s2 z)
s2 (+ s2 (aref denominator i)))))
(progn
(setf z (/ z)
s1 (aref numerator 0)
s2 (aref denominator 0))
(loop for i from 1 below (length numerator)
do (setf s1 (* s1 z)
s1 (+ s1 (aref numerator i))
s2 (* s2 z)
s2 (+ s2 (aref denominator i)))
)))
(/ s1 s2)))
(defun lanczos-sum (x &key (scaled t))
"Return the Lanczos sum for x, exp(g), possibly normalised"
(if scaled
(evaluate-rational *lanczos-13-numerator-scaled* *lanczos-13-denominator* x)
(error "not implemented")))
(defparameter *sample-double*
;; Sample double in the question
79.50051116943359375d0)
(defparameter *cephes-answer*
;; Answer from C++ in the question
0.007539542760993982d0)
(defun ts ()
(let ((*lanczos-13-numerator-scaled* *lanczos-13-numerator-scaled*)
(*lanczos-13-denominator* *lanczos-13-denominator*))
(set-params)
(let ((dd (- *cephes-answer* (lanczos-sum *sample-double*)))
(rd (- *cephes-answer* (lanczos-sum (rationalize
*sample-double*)))))
(set-params :float-numerator t)
(values dd rd
(- *cephes-answer* (lanczos-sum *sample-double*))
(- *cephes-answer* (lanczos-sum (rationalize
*sample-double*)))))))
Upvotes: 1