C_Schuler
C_Schuler

Reputation: 83

Isolate the fractional part of a double using bitwise operations in c++

I am stuck on an assignment for my modern numerical software development class.

Function prototype (assume x = 6.5):

//returns the IEEE fractional part of x as a decimal floating point number. You must convert binary to decimal.
inline double fraction(double x) {}

What I got:

inline double fraction(double x)
{
    // Get the fraction
    unsigned long long frac_mask = (1u << 52) - 1;                      // Get 52 1's
    unsigned long long xint = *reinterpret_cast<long long*>(&x);        // Interpret x's bits as an int
    unsigned long long frac_num = xint & frac_mask;                                 // Get the fraction as an int
    double fraction = double(frac_num) / double(2u << 52);              // Divide frac_num by 2^52

    return fraction;

    /* This code works, but is not what is specified:
        double fraction = x / pow(2, exponent(x));
        fraction = fmod(fraction, 1);
        return fraction;
    */
}

I keep getting a NaN. The answer I am looking for is 0.625. I am kind of hopelessly lost. Any help is much appreciated.

I was able to successfully isolate the exponent of the double with the following function:

inline int exponent(double x) //returns the unbiased(true) binary exponent of x as a decimal integer. Remember that subnormals are a special case. Consider 0 to be a subnormal.
{
    if (x == 0.0)
        return -1022;
    else if (isnan(x))
        return 1024;

    // Get the exponent
    unsigned long long exp_mask = (1u << 11) - 1;                       // Get eleven 1's
    exp_mask <<= 52;                                                    // Move into place
    unsigned long long xint = *reinterpret_cast<long long*>(&x);        // Interpret x's bits as an int
    unsigned long long exp_bits = xint & exp_mask;                      // Get the exponent bits
    unsigned long long exp = exp_bits >> 52;                            // Get the exponent as a number
    return exp -1023;
}

I am confused why the exponent logic works, but the fraction won't.

Upvotes: 3

Views: 1602

Answers (2)

Matthias
Matthias

Reputation: 4677

Code: Try It Online

// bit_cast, bit_width
#include <bit>
// assert
#include <cassert>
// uint64_t
#include <cstdint>

[[nodiscard]]
constexpr auto Frac(double x)
    noexcept -> double
{
    using Bits = std::uint64_t;
    constexpr Bits s_sign_bit_count{ 1ull };
    constexpr Bits s_exponent_bit_count{ 11ull };
    constexpr Bits s_mantissa_bit_count{ 52ull };
    constexpr Bits s_sign_max{ (1ull << s_sign_bit_count) - 1ull };
    constexpr Bits s_exponent_max{ (1ull << s_exponent_bit_count) - 1ull };
    constexpr Bits s_mantissa_max{ (1ull << s_mantissa_bit_count) - 1ull };
    constexpr Bits s_sign_mask{ s_sign_max << (s_exponent_bit_count + s_mantissa_bit_count) };
    constexpr Bits s_exponent_mask{ s_exponent_max << s_mantissa_bit_count };
    constexpr Bits s_mantissa_mask{ s_mantissa_max };
    constexpr Bits s_exponent_bias{ (1ull << (s_exponent_bit_count - 1ull)) - 1ull };

    if ((-1.0 < x) and (x < 1.0))
    {
        // Includes: subnormal, +0, -0
        // No integral part.
        return x;
    }

    const Bits u                  = std::bit_cast< Bits >(x);
    const Bits exponent_bits      = (u & s_exponent_mask) >> s_mantissa_bit_count;
    assert(s_exponent_bias <= exponent_bits);
    const Bits exponent           = exponent_bits - s_exponent_bias;
    if (s_mantissa_bit_count <= exponent)
    {
        // Includes: +Inf, -Inf, NaN
        // No fractional part.
        return {};
    }

    const Bits fraction_bit_count = s_mantissa_bit_count - exponent;
    const Bits fraction_mask      = (1ull << fraction_bit_count) - 1ull;
    const Bits fraction_bits      = u & fraction_mask;
    const Bits fraction_shift     = s_mantissa_bit_count - std::bit_width(fraction_bits)
                                  + 1ull; // Implicit leading one

    Bits fraction = u & s_sign_mask;
    if (fraction_shift < exponent_bits)
    {
        const Bits fraction_exponent = exponent_bits - fraction_shift;
        const Bits fraction_mantissa = (fraction_bits << fraction_shift)
                                     // Remove implicit leading one.
                                     & s_mantissa_mask;

        fraction |= (fraction_exponent << s_mantissa_bit_count);
        fraction |=  fraction_mantissa;
     }

    return std::bit_cast< double >(fraction);
}

Depending on your own preference, you can return x as well in case of a NaN.

Test:

// setprecision
#include <iomanip>
// cout, endl, fixed
#include <iostream>

auto main() -> int
{
    std::cout << std::fixed << std::setprecision(11);
    
    {
        constexpr double d = 7.99999952316;
        constexpr double frac = Frac(d);
        std::cout << "frac(" << d << ") = " << frac << std::endl;
    }

    {
        constexpr double d = 0.5;
        constexpr double frac = Frac(d);
        std::cout << "frac(" << d << ") = " << frac << std::endl;
    }
    {
        constexpr double d = 0.75;
        constexpr double frac = Frac(d);
        std::cout << "frac(" << d << ") = " << frac << std::endl;
    }
    {
        constexpr double d = 6.5;
        constexpr double frac = Frac(d);
        std::cout << "frac(" << d << ") = " << frac << std::endl;
    }
    {
        constexpr double d = 4.711;
        constexpr double frac = Frac(d);
        std::cout << "frac(" << d << ") = " << frac << std::endl;
    }

    return 0;
}

Output

frac(7.99999952316) = 0.99999952316
frac(0.50000000000) = 0.50000000000
frac(0.75000000000) = 0.75000000000
frac(6.50000000000) = 0.50000000000
frac(4.71100000000) = 0.71100000000

Upvotes: 0

Mats Petersson
Mats Petersson

Reputation: 129484

You are mixing unsigned (presumably 32-bits) with values that need 64 bits.

For example, frac_num is only 32-bits, use a long or long long... [or uint64_t, which is a more reliable way to get a 64-bit value.

inline double fraction(double x)
{
    // Get the fraction
    uint64_t frac_mask = (1ul << 52) - 1;                      // Get 52 1's
//    uint64_t xint = *reinterpret_cast<uint64_t*>(&x);        // Interpret x's bits as an int
      uint64_t xint; 
      memcpy(&xint, &x, sizeof(xint));        // Interpret x's bits as an int
    int64_t frac_num = xint & frac_mask;                                    // Get the fraction as an int
    frac_num += 1ul << 52; // Add hidden bit.
    double fraction = double(frac_num) / double(2ul << 52);              // Divide frac_num by 2^52

    return fraction;
}

Note the addition of l to the 1u and 2u, to ensure they are long, and. You will need to include cstdint to get the sized integers.

Edit: that will of course just give you the mantissa in the form of a fraction. The decimal point may be anywhere between bit 1023 and -1023, meaning that only values between -1 and +1 will have the correct result.

A complete example using the code above [+ some printouts]

#include <cstdint>
#include <iostream>
#include <cstring>

inline double fraction(double x)
{
    // Get the fraction
    uint64_t frac_mask = (1ul << 52) - 1;                      // Get 52 1's
    std::cout << "mask=" << std::hex << frac_mask << std::endl;
//    uint64_t xint = *reinterpret_cast<uint64_t*>(&x);        // Interpret x's bits as an int
      uint64_t xint; 
      memcpy(&xint, &x, sizeof(xint));        // Interpret x's bits as an int
    int64_t frac_num = xint & frac_mask;                                    // Get the fraction as an int

    frac_num += 1ul << 52; // Add hidden bit.
    std::cout << "xint=" << std::hex << xint << " num=" << std::hex << frac_num << std::endl;
    double fraction = double(frac_num) / double(2ul << 52);              // Divide frac_num by 2^52

    return fraction;
}


int main()
{
    double a = 0.5;
    double b = 0.75;
    double d = 6.5;
    double e = 4.711;


    double fa  = fraction(a);
    double fb  = fraction(b);
    double fd  = fraction(d);
    double fe  = fraction(e);

    std::cout << "fa=" << std::fixed << fa << " fb=" << fb << std::endl;
    std::cout << "fd=" << std::fixed << fd << " fe=" << fe << std::endl;
}

resutl of running the above:

mask=fffffffffffff
xint=3fe0000000000000 num=10000000000000
mask=fffffffffffff
xint=3fe8000000000000 num=18000000000000
mask=fffffffffffff
xint=401a000000000000 num=1a000000000000
mask=fffffffffffff
xint=4012d810624dd2f2 num=12d810624dd2f2
fa=0.500000 fb=0.750000
fd=0.812500 fe=0.588875

Note that if you divide 4.711 by 2 a few times [3 times to be precise], you get 0.588875, and if you divide 6.5 by 8 (or by 2 three times over), you get 0.8125

I need to go to bed, but you basically have to take the exponent into account to figure out the fraction of a floating point number. Or simply convert to an integer, and subtract it - as long as it's within range.

Upvotes: 2

Related Questions