Reputation: 9

Computing the 1 millionth fibonacci number

I am trying to compute the 1 millionth fibonacci number using the Big Int Library since the number is more than 200,000 digits. I can't seem to fix this problem I am having when taking the sqrt or pow, Big Int can't be assigned to pow or sqrt.

BigInteger Phi = (sqrt(5) + 1) / 2;

BigInteger phi = (sqrt(5) - 1) / 2;

BigInteger PhiSquared = pow(Phi, n);
BigInteger phiSquared = pow(phi, n);

Upvotes: 0

Answers (2)

Duke Le

Reputation: 472

Cross post from my answer here: https://stackoverflow.com/a/79318085/3192711

Compute Fibo(10^6) in 2 millisecond, Fibo(10^9) in 3 second. Tested on Ryzen 5900X, Ubuntu 18.04, g++ 13.2

Summary of empirical measurement:

I measure time to calculate Fibo(N) with N = 10^7, 10^8, 10^9, unit is millisecond

N = 10^7

dp cost = 27.7669 (almost similar method 3, a bit slower)
mpz_fib_ui cost = 16.284 (GMP math library)
dp no-recursion cost = 16.274 (method 6. It has the same speed as mpz_fib_ui, but for larger numbers it uses 2 threads, making it faster)
binet cost = 657.771 (Binet's formula)
matrix cost = 232.092 (Matrix fast exponentiation)
cost to convert number to base10 string = 105.358

N = 10^8

dp cost = 397.618
mpz_fib_ui cost = 268.026
dp no-recursion cost = 239.268
binet cost = 12968.6
matrix cost = 3609.47
cost to convert number to base10 string = 1904.29
Output string cost = 124.072

N = 10^9

dp cost = 5390.61
mpz_fib_ui cost = 3801.59
dp no-recursion cost = 2980.57
binet cost = 183846
matrix cost = 50054.3
cost to convert number to base10 string = 30636.7

Repo to test: https://github.com/lehuyduc/fast-fibonacci/tree/main

To run, clone then run full_run.sh

#include <iostream>
#include <gmpxx.h>
#include <unordered_map>
#include <string>
#include <sstream>
#include <chrono>
#include <map>
#include <fstream>
#include <thread>
#include <mutex>
#include <memory>
#include <vector>
#include "gmp-impl.h"
#include "longlong.h"
using namespace std;

// Important functions:
// gmp_fibo
// F(int n), dp_fibo
// best_fibo
// binet_fibo
// matrix_fibo

class MyTimer {
    std::chrono::time_point<std::chrono::system_clock> start;

public:
    void startCounter() {
        start = std::chrono::system_clock::now();        
    }

    int64_t getCounterNs() {
        return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now() - start).count();        
    }

    int64_t getCounterMs() {
        return std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start).count();        
    }

    double getCounterMsPrecise() {
        return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now() - start).count()
                / 1000000.0;        
    }
};

bool ktt = false;

mpz_class gmp_fibo(int n) {
    mpz_class fib_n, fib_n1;
    mpz_fib_ui(fib_n.get_mpz_t(), n);
    return fib_n;
}

unordered_map<int, mpz_class> dp;
mpz_class& F(int n) {
    if (n <= 1) return dp[n];
    auto it = dp.find(n);
    if (it != dp.end()) return it->second;

    int k = n / 2;
    auto Fk = F(k);
    auto Fk1 = F(k - 1);
    if (n % 2 == 0) {           
        return dp[n] = Fk * Fk + Fk1 * Fk1;
    }

    return dp[n] = Fk * (Fk1 + Fk1 + Fk);
}

mpz_class dp_fibo(int n) {
    return F(n - 1);
}

void list_dependency(map<int,int>& mp, int n) {    
    if (n <= 1 || mp[n]) return;
    mp[n] = 1;
    list_dependency(mp, n / 2);
    list_dependency(mp, n / 2 - 1);
}

void mpz_class_reserve_bits(mpz_class& x, mp_bitcnt_t bitcount)
{
    // mpz_realloc2 is a C function, so we must call it on x.get_mpz_t().
    mpz_realloc2(x.get_mpz_t(), bitcount);
}

void mpz_rsblsh2(mpz_class& c, mpz_class& a, mpz_class& b) {
    // Access the internal representation of a, b, and c
    auto a_mpz = a.get_mpz_t();
    auto b_mpz = b.get_mpz_t();
    auto c_mpz = c.get_mpz_t();

    // Get the limb data and sizes    
    mp_size_t a_size = abs(a_mpz->_mp_size); // Number of limbs in a
    mp_size_t b_size = abs(b_mpz->_mp_size); // Number of limbs in b

    // Compute the maximum size needed
    // Idk why it has to realloc every time. If I alloc one big block at the start, its size is reset to a small size anyway
    mp_size_t max_size = std::max(a_size, b_size) + 2; // +2 for carry/borrow
    mpz_realloc2(c_mpz, (max_size) * GMP_LIMB_BITS);
    mpz_realloc2(a_mpz, (max_size) * GMP_LIMB_BITS); 
    mpz_realloc2(b_mpz, (max_size) * GMP_LIMB_BITS);

    mp_ptr a_limbs = a_mpz->_mp_d;
    mp_ptr b_limbs = b_mpz->_mp_d;
    for (mp_size_t i = a_size; i < max_size; ++i) {
        a_mpz->_mp_d[i] = 0;
    }

    for (mp_size_t i = b_size; i < max_size; ++i) {
        b_mpz->_mp_d[i] = 0;
    }

    // Ensure c's limb data is clean
    std::fill(c_mpz->_mp_d + a_size, c_mpz->_mp_d + max_size, 0);
    
    mp_limb_t carry = mpn_rsblsh2_n(c_mpz->_mp_d, b_limbs, a_limbs, a_size);

    // Determine the number of significant limbs
    mp_size_t result_size = max_size;
    while (result_size > 0 && c_mpz->_mp_d[result_size - 1] == 0) {
        result_size--; // Trim trailing zeros
    }

    // Handle carry propagation correctly
    if (carry > 0) {
        c_mpz->_mp_d[result_size] = carry; // Add carry as a new highest limb
        result_size++;
    }

    // Update the size of c
    c_mpz->_mp_size = result_size;

    // Final sanity check for size mismatches
    if (result_size == 0 || result_size > max_size) {
        throw std::logic_error("Unexpected result size mismatch");
    }
}


mpz_class best_fibo(int n) {
    if (n <= 100) return gmp_fibo(n);
    map<int,int> mp;
    list_dependency(mp, n);
    
    mpz_class f[3], dummy;
    bool started = false;
    bool flag = 0;
    map<int, mpz_class*> temps;

    for (int i = 0; i < 3; i++) {
        f[i] = 0;
        mpz_class_reserve_bits(f[i], 2 * n + 64);
    }
    dummy = 0;
    mpz_class_reserve_bits(dummy, 2 * n + 64);

    MyTimer timer;
    timer.startCounter();
    // for (auto [key, value] : mp) cout << key << "\n";
    // cout << std::endl;
    vector<thread> threads;

    for (auto &[key, value] : mp)
        if (key >= 20 && mp.count(key - 1) && !mp.count(key - 2))
    {
        int N = key;
        // cout << "key = " << N << std::endl;

        if (!started) {
            f[0] = gmp_fibo(N - 1);
            f[1] = gmp_fibo(N);
            f[2] = f[0] + f[1];
            temps[N - 1] = &f[0];
            temps[N] = &f[1];
            temps[N + 1] = &f[2];
            started = true;
            continue;
        }

        // edge cases: 160, 170
        // 2 3 4 5, 8 9 10, 18 19 20, 38 39 40, 79 80, 160
        // 2 3 4 5, 8 9 10, 19 20 21, 41 42, 84 85, 170
        // 13 14 15, 29 30 31, 61 62 63, 125 126 252
        // 13 14 15, 29 30 31, 61 62 63, 125 126 253
        // 13 14 15, 29 30 31, 60 61 62, 123 124 125, 248 249 250, 499 500, 1000
        // F[2k] = F[k]*(F[k]+2F[k-1])
        // F[2k+1] = (2F[k]+F[k-1])*(2F[k]-F[k-1]) + 2*(-1)^k
        // OR
        // F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k
        // F[2k-1] =   F[k]^2 + F[k-1]^2
        // F[2k] = F[2k+1] - F[2k-1]

        int k = N / 2;
        auto &Fk = *temps[k];
        auto &Fk1 = *temps[k - 1];                
        int sign = (k % 2 == 0) ? 1 : -1;

        if (N % 2 == 1) {
            // in this case, previous F[k+1] is unused. We that to store temporary result
            auto& Fkb = *temps[k + 1];            
            // Use f[k + 1] to store F[n - 1], f[k] = F[n], F[k - 1] = F[n + 1]

            if (n >= 50'000'000) {
                threads.clear();
                threads.emplace_back([&]() {
                    Fk *= Fk;
                });
                Fk1 *= Fk1;     
                threads[0].join();
            } else {
                Fk *= Fk;
                Fk1 *= Fk1;
            }
            
            //Fkb = (4 * Fk + 2 * sign) - Fk1; // Fkb = F[2 * k + 1] = F[N]
            mpz_rsblsh2(Fkb, Fk, Fk1);            
            Fkb += 2 * sign;
            Fk1 += Fk; // Fk1 = F[2 * k - 1] = F[n - 2]
            Fk = Fkb - Fk1; // F[k] = F[2 * k] = F[n - 1]            

            if (mp.count(N + 1)) Fk1 = Fkb + Fk;            
            
            temps.clear();
            temps[N - 1] = &Fk;
            temps[N] = &Fkb;
            temps[N + 1] = &Fk1;
        } else {
            // in this case, F[k - 2] is unused. Use it to store F[n - 1]
            auto& Fk2 = *temps[k - 2];            

            if (n >= 50'000'000) {
                threads.clear();
                threads.emplace_back([&]() {
                    Fk *= Fk;
                });
                Fk1 *= Fk1;     
                threads[0].join();
            } else {
                Fk *= Fk;
                Fk1 *= Fk1;
            }
    
            // Fk2 = (4 * Fk + 2 * sign) - Fk1; // Fk2 = F[2k + 1] = F[N + 1] 
            mpz_rsblsh2(Fk2, Fk, Fk1);            
            Fk2 += 2 * sign;
            Fk1 += Fk; // Fk1 = F[2k -1]; F[2k - 1] = F[N - 1]
            Fk = Fk2 - Fk1; // Fk = F[2k] = F[N]


            temps[N - 1] = &Fk1;    
            temps[N] = &Fk;
            temps[N + 1] = &Fk2;
        }
    }

    int k = n / 2;
    auto& Fk = *temps[k];
    auto& Fk1 = *temps[k - 1];
    int sign = (k % 2 == 0) ? 1 : -1;

    if (n % 2 == 0) {
        //return Fk * (Fk + 2 * Fk1);        
        Fk1 *= 2;
        Fk *= (Fk + Fk1);
        return std::move(Fk);
    }

    //return (2 * Fk + Fk1) * (2 * Fk - Fk1) + 2 * sign;
    Fk *= 2;
    Fk1 = (Fk + Fk1) * (Fk - Fk1) + 2 * sign;
    return std::move(Fk1);
}

mpz_class binet_fibo(int n)
{
    // Increase default precision so we don't lose accuracy for large n.    
    
    mpf_set_default_prec(n + 64);

    // Use mpf_class for floating-point operations
    mpf_class sqrt5(5);
    sqrt5 = sqrt(sqrt5); // sqrt(5)

    mpf_class phi = (mpf_class(1) + sqrt5) / 2; // (1 + sqrt(5)) / 2

    // power = phi^n
    mpf_class power;//(0, 2 * n + 32);
    mpf_pow_ui(power.get_mpf_t(), phi.get_mpf_t(), n);

    // result_float = power / sqrt(5) + 0.5
    mpf_class result_float = power / sqrt5;
    result_float += 0.5;

    // Convert the floating-point approximation to an integer (mpz_class)
    mpz_class result = mpz_class(result_float);

    return result;
}

struct Matrix {
    int n;
    vector<mpz_class> data;

    Matrix(int n) {
        this->n = n;
        data.resize(n * n);
    }

    Matrix(int n, const vector<int> &a) {
        this->n = n;
        data.resize(n * n);
        for (int i = 0; i < n * n; i++) data[i] = a[i];        
    }

    mpz_class& get(int row, int col) {
        return data[row * n + col];
    }

    const mpz_class& get(int row, int col) const {
        return data[row * n + col];
    }


    Matrix& operator*=(const Matrix& other) {
        Matrix dummy(n);

        for (int i = 0; i < n; i++)
        for (int j = 0; j < n; j++) {
            dummy.get(i, j) = 0;
            for (int k = 0; k < n; k++)
                dummy.get(i, j) += get(i, k) * other.get(k, j);
        }

        for (int i = 0; i < n * n; i++) data[i] = std::move(dummy.data[i]);        
        return *this;
    }
};

mpz_class matrix_fibo(int n) {
    if (n <= 2) return 1;
    n--;
    Matrix pow = Matrix(2, {1, 1, 1, 0});
    Matrix res = Matrix(2, {1, 0, 0, 1});
    
    while (n > 0) {
        if (n & 1) res *= pow;
        pow *= pow;
        n >>= 1;
    }
    return res.data[0];
}

bool test(int L, int R)
{
    for (int n = L; n <= R; n++) {
        cout << "n = " << n << "\n";
        auto res1 = gmp_fibo(n);
        auto res2 = best_fibo(n);
        string s1 = res1.get_str();
        string s2 = res2.get_str();
        
        if (s1.length() != s2.length()) cout << "Wrong length\n";

        if (s1 != s2) {
            cout << s1 << " " << s2 << "\n";
            cout << "Fail at n = " << n << "\n";
            return false;
        }        
    }

    cout << "Pass all\n";
    return true;
}

bool test(int n) {
    MyTimer timer;    

    timer.startCounter();
    auto res2 = dp_fibo(n);
    double cost2 = timer.getCounterMsPrecise();
    cout << "dp cost = " << cost2 << std::endl;

    timer.startCounter();
    auto res1 = gmp_fibo(n);
    double cost1 = timer.getCounterMsPrecise();    
    cout << "mpz_fib_ui cost = " << cost1 << std::endl;


    timer.startCounter();
    auto res3 = best_fibo(n);
    double cost3 = timer.getCounterMsPrecise();    
    cout << "dp no-recursion cost = " << cost3 << std::endl;    

    timer.startCounter();
    //auto res4 = res1;
    auto res4 = binet_fibo(n);
    double cost4 = timer.getCounterMsPrecise();
    cout << "binet cost = " << cost4 << std::endl;

    timer.startCounter();
    //auto res5 = res1;
    auto res5 = matrix_fibo(n);
    double cost5 = timer.getCounterMsPrecise();
    cout << "matrix cost = " << cost5 << std::endl;

    timer.startCounter();
    string s1 = res1.get_str();
    cout << "cost to convert number to base10 string = " << timer.getCounterMsPrecise() << std::endl;
    string s2 = res2.get_str();
    string s3 = res3.get_str();
    string s4 = res4.get_str();
    string s5 = res5.get_str();
    
    bool ok = true;
    if (s2 != s1) {cout << "DP wrong answer\n"; ok = false;};
    if (s3 != s1) {cout << "Non-recursive DP wrong answer\n"; ok = false;}
    if (s4 != s1) {cout << "Binet wrong answer\n"; ok = false;}
    if (s5 != s1) {cout << "Matrix wrong answer\n"; ok = false;}
    
    timer.startCounter();
    ofstream fo("output.txt");
    fo << s1 << "\n";
    fo.close();
    cout << "Output string cost = " << timer.getCounterMsPrecise() << "\n";

    return ok;
}

int main(int argc, char* argv[])
{
    std::ios_base::sync_with_stdio(false);
    cin.tie(0);
    
    int L = (argc > 1) ? atoi(argv[1]) : 10000000;
    int R = (argc > 2) ? atoi(argv[2]) : L;
    R = max(L, R);

    dp[0] = 1;
    dp[1] = 1;
    dp[2] = 2;
    dp[3] = 3;

    bool result;
    if (L == R) result = test(L);
    else result = test(L, R);    

    if (result) cout << "Correct\n";
    else cout << "Wrong\n";
    return 0;
}

Upvotes: 0

PaulMcKenzie

Reputation: 35440

If you want to use a library that can handle big integers, the simplest thing is to get the boost library and use the cpp_int type in the multiprecision library.

Since the multiprecision library is header-file only, this would be one of the simplest ones to use right out-of-box with minimal setup.

Here is a simple, straightforward implementation that computes the 1 millionth Fibonacci number without using floating point (which could yield inexact results):

#include <boost/multiprecision/cpp_int.hpp>
#include <iostream>
 
int main() {
    using Int = boost::multiprecision::cpp_int;
    Int n1 = 0, n2 = 1;
    Int n3;
    for (long i = 1; i <= 1000000; ++i)
    {
        n3 = n1 + n2;
        n1 = n2;
        n2 = n3;
    }
    std::cout << n3;
}

This took around 40 seconds to complete on an Intel I7 laptop, using Visual Studio 2019 in release mode.

The resulting number is a 208988 digit number.

Upvotes: 3

Computing the 1 millionth fibonacci number

Answers (2)

Summary of empirical measurement:

Related Questions