Tomilov Anatoliy
Tomilov Anatoliy

Reputation: 16711

Optimization of determinant calculation function

Searching for the best algorithm I found there is a tradeoff: complexity to implement and big constant on the one hand, and runtime complexity on the other hand. I choose LU-decomposition-based algorithm, because it is quite simple to implement and have good enough performance.

#include <valarray>
#include <vector>
#include <utility>

#include <cmath>
#include <cstddef>
#include <cassert>

template< typename value_type >
struct math
{

    using size_type = std::size_t;

    size_type const dimension_;
    value_type const & eps;

    value_type const zero = value_type(0);
    value_type const one = value_type(1);

private :

    using vector = std::valarray< value_type >;
    using matrix = std::vector< vector >;

    matrix matrix_;
    matrix minor_;

public :

    math(size_type const _dimension,
         value_type const & _eps)
        : dimension_(_dimension)
        , eps(_eps)
        , matrix_(dimension_)
        , minor_(dimension_ - 1)
    { 
        assert(1 < dimension_);
        assert(!(eps < zero));
        for (size_type r = 0; r < dimension_; ++r) {
            matrix_[r].resize(dimension_);
        }
        size_type const minor_size = dimension_ - 1;
        for (size_type r = 0; r < minor_size; ++r) {
            minor_[r].resize(minor_size);
        }
    }

    template< typename rhs = matrix >
    void
    operator = (rhs const & _matrix)
    {
        auto irow = std::begin(matrix_);
        for (auto const & row_ : _matrix) {
            auto icol = std::begin(*irow);
            for (auto const & v : row_) {
                *icol = v;
                ++icol;
            }
            ++irow;
        }
    }

    value_type
    det(matrix & _matrix,
        size_type const _dimension)
    { // calculates lower unit triangular matrix and upper triangular
        assert(0 < _dimension);
        value_type det_ = one;
        for (size_type i = 0; i < _dimension; ++i) {
            vector & ri_ = _matrix[i];
            using std::abs;
            value_type max_ = abs(ri_[i]);
            size_type pivot = i;
            {
                size_type p = i;
                while (++p < _dimension) {
                    value_type y_ = abs(_matrix[p][i]);
                    if (max_ < y_) {
                        max_ = std::move(y_);
                        pivot = p;
                    }
                }
            }
            if (!(eps < max_)) { // regular?
                return zero; // singular
            }
            if (pivot != i) {
                det_ = -det_; // each permutation flips sign of det
                ri_.swap(_matrix[pivot]);
            }
            value_type & dia_ = ri_[i];
            det_ *= dia_; // det is multiple of diagonal elements
            for (size_type j = 1 + i; j < _dimension; ++j) {
                _matrix[j][i] /= dia_;
            }
            for (size_type a = 1 + i; a < _dimension; ++a) {
                vector & a_ = minor_[a - 1];
                value_type const & ai_ = _matrix[a][i];
                for (size_type b = 1 + i; b < _dimension; ++b) {
                    a_[b - 1] = ai_ * ri_[b];
                }
            }
            for (size_type a = 1 + i; a < _dimension; ++a) {
                vector const & a_ = minor_[a - 1];
                vector & ra_ = _matrix[a];
                for (size_type b = 1 + i; b < _dimension; ++b) {
                    ra_[b] -= a_[b - 1];
                }
            }
        }
        return det_;
    }

    value_type
    det(size_type const _dimension)
    {
        return det(matrix_, _dimension);
    }

    value_type
    det()
    {
        return det(dimension_);
    }

};

// main.cpp
#include <iostream>

#include <cstdlib>

int
main()
{
    using value_type = double;
    value_type const eps = std::numeric_limits< value_type >::epsilon();
    std::size_t const dimension_ = 3;
    math< value_type > m(dimension_, eps);
    m = { // example from https://en.wikipedia.org/wiki/Determinant#Laplace.27s_formula_and_the_adjugate_matrix
            {-2.0, 2.0, -3.0},
            {-1.0, 1.0,  3.0},
            { 2.0, 0.0, -1.0}
        };
    std::cout << m.det() << std::endl; // 18
    return EXIT_SUCCESS;
}

LIVE DEMO

det() function is hottest function in the algorithm, that uses it as a part. I sure det() is not as fast as it can be, because runtime performance comparisons (using google-pprof) to reference implementation of the whole algorithm shows a disproportion towards det().

How to improve performance of det() function? What are evident optimizations to apply immediately? Should I change the indexing and memory access order or something else? Container types? Prefetching?

Typical value of dimension_ is in the range of 3 to 10 (but can be 100, if value_type is mpfr or something else).

Upvotes: 3

Views: 402

Answers (1)

Walter
Walter

Reputation: 45414

Isn't your (snippet from det())

       for (size_type a = 1 + i; a < _dimension; ++a) {
            vector & a_ = minor_[a - 1];
            value_type const & ai_ = _matrix[a][i];
            for (size_type b = 1 + i; b < _dimension; ++b) {
                a_[b - 1] = ai_ * ri_[b];
            }
        }
        for (size_type a = 1 + i; a < _dimension; ++a) {
            vector const & a_ = minor_[a - 1];
            vector & ra_ = _matrix[a];
            for (size_type b = 1 + i; b < _dimension; ++b) {
                ra_[b] -= a_[b - 1];
            }
        }

doing the same as

       for (size_type a = 1 + i; a < _dimension; ++a) {
            vector & ra_ = _matrix[a];
            value_type ai_ = ra_[i];
            for (size_type b = 1 + i; b < _dimension; ++b) {
                ra_[b] -= ai_ * ri_[b];
            }
        }

without any need for minor_? Moreover, now the inner loop can easily be vectorised.

Upvotes: 1

Related Questions