Reputation: 303
I am trying to perform something that is brain-dead simple in any other language but not javascript: get the bits out of float (and the other way around).
In C/C++ it would be something like
float a = 3.1415; int b = *((int*)&a);
and vise-versa
int a = 1000; float b = *((float*)&a);
In C# you can use the BitConverter ...floatBits or something alike in Java... Even in VB6 for Christ's sake you can memcpy a float32 into an int32. How on earth can I translate between and int and a float in javascript?
Upvotes: 30
Views: 12133
Reputation: 97
These 4 functions I use to handle representation of floating-point numbers :
The first 2 functions return an array (filled with 0 or 1) with the sign, exponent and mantissa, and the last 2 return a floating point number.
When you want to convert a float number IEEE 754 to an array of 32-bits (0 or 1) :
function float32ToBits(floatNumber) {
// Check if the input is a valid number
if (typeof floatNumber !== "number")
throw new Error("A float number is expected.");
// Create a 4-byte ArrayBuffer to store the float
var buffer = new ArrayBuffer(4);
// Create a Float32Array view on the buffer and store the float number
(new Float32Array(buffer))[0] = floatNumber;
// Create a Uint32Array view on the buffer to access the bits as an integer
var uint32View = new Uint32Array(buffer)[0];
// Convert the integer to a 32-bit binary string
var binaryString = uint32View.toString(2).padStart(32, "0");
// Extract the sign bit, exponent, and mantissa
var signBit = binaryString.substring(0, 1);
var exponentBits = binaryString.substring(1, 9);
var mantissaBits = binaryString.substring(9, 32);
// Return the sign, exponent, and mantissa as an array
return [signBit, exponentBits, mantissaBits];
}
console.log(float32ToBits(Math.PI).join(''))
// 01000000010010010000111111011011
When you want to convert a float number IEEE 754 to an array of 64-bits (0 or 1) :
function float64ToBits(doubleNumber) {
// Check if the input is a valid number
if (typeof doubleNumber !== "number")
throw new Error("A float number is expected.");
// Create an 8-byte ArrayBuffer to store the double
var buffer = new ArrayBuffer(8);
// Create a Float64Array view on the buffer and store the double number
(new Float64Array(buffer))[0] = doubleNumber;
// Create a Uint32Array view on the buffer to access the bits as integers
var uint32Array = new Uint32Array(buffer);
// Combine the two 32-bit integers into a 64-bit binary string
let binaryString = "";
for (let i = 1; i >= 0; --i) {
var bits = uint32Array[i].toString(2).padStart(32, "0");
binaryString += bits;
}
// Extract the sign bit, exponent, and mantissa
var signBit = binaryString.substring(0, 1);
var exponentBits = binaryString.substring(1, 12);
var mantissaBits = binaryString.substring(12, 64);
// Return the sign, exponent, and mantissa as an array
return [signBit, exponentBits, mantissaBits];
}
console.log(float64ToBits(Math.PI).join(''))
// 0100000000001001001000011111101101010100010001000010110100011000
When you want to convert a string of bits (0 or 1) to a 32-bit IEEE 754 float number :
function bitsToFloat32(bitString) {
// Ensure the bit string is exactly 32 bits long
bitString = bitString.padStart(32, "0").slice(-32);
// Validate that the string contains only '0' or '1'
for (let i = 0; i < 32; ++i) {
if (bitString[i] !== '0' && bitString[i] !== '1') {
throw new Error("A 32-bit string is expected.");
}
}
// Create a 4-byte ArrayBuffer
var buffer = new ArrayBuffer(4);
// Create a Uint8Array view on the buffer to manipulate each byte
var uint8View = new Uint8Array(buffer);
// Convert the 32-bit string into bytes and store them in the buffer
for (let i = 32, byteIndex = 0; i > 0; i -= 8) {
uint8View[byteIndex++] = parseInt(bitString.substring(i - 8, i), 2);
}
// Convert the buffer back into a float32
return new Float32Array(buffer)[0];
}
When you want to convert a string of bits (0 or 1) to a 64-bit IEEE 754 float number :
function bitsToFloat64(bitString) {
// Ensure the bit string is exactly 64 bits long
bitString = bitString.padStart(64, "0").slice(-64);
// Validate that the string contains only '0' or '1'
for (let i = 0; i < 64; ++i) {
if (bitString[i] !== '0' && bitString[i] !== '1') {
throw new Error("A 64-bit string is expected.");
}
}
// Create an 8-byte ArrayBuffer
var buffer = new ArrayBuffer(8);
// Create a Uint8Array view on the buffer to manipulate each byte
var uint8View = new Uint8Array(buffer);
// Convert the 64-bit string into bytes and store them in the buffer
for (let i = 64, byteIndex = 0; i > 0; i -= 8) {
uint8View[byteIndex++] = parseInt(bitString.substring(i - 8, i), 2);
}
// Convert the buffer back into a float64
return new Float64Array(buffer)[0];
}
Example of use with a condensed snake-case version exploring the pi number in 32-bit/64-bit :
const float_32_to_bits = (n_float) => {
if (typeof n_float !== "number")
throw new Error("A float number is expected.")
const buf = new ArrayBuffer(4)
;(new Float32Array(buf))[0] = n_float
const tmp_1 = new Uint32Array(buf)[0]
const tmp_2 = tmp_1.toString(2).padStart(32, "0")
const sign = tmp_2.substring(0, 1)
const expo = tmp_2.substring(1, 9)
const mant = tmp_2.substring(9, 32)
return [ sign, expo, mant ]
}
const float_64_to_bits = (n_double) => {
if (typeof n_double !== "number")
throw new Error("A float number is expected.")
const buf = new ArrayBuffer(8);
;(new Float64Array(buf))[0] = n_double
const arr = new Uint32Array(buf)
let tmp = ""
for(let i = 1; i >=0; --i) {
const tmp_1 = arr[i]
const tmp_2 = tmp_1.toString(2).padStart(32, "0")
tmp += tmp_2
}
const sign = tmp.substring(0, 1)
const expo = tmp.substring(1, 12)
const mant = tmp.substring(12, 64)
return [ sign, expo, mant ]
}
const bits_to_float_32 = (bits_string) => {
bits_string = bits_string.padStart(32, "0").slice(-32)
for(let i = 0; i < 32; ++i)
if (bits_string[i] != '0' && bits_string[i] != '1')
throw new Error("A 32-bit string is expected.")
const buf = new ArrayBuffer(4);
const tmp = new Uint8Array(buf);
for(let a = 32, k = 0; a > 0; a -= 8)
tmp[k++] = parseInt(bits_string.substring(a - 8, a), 2)
return new Float32Array(buf)[0]
}
const bits_to_float_64 = (bits_string) => {
for(let i = 0; i < 64; ++i)
if (bits_string[i] != '0' && bits_string[i] != '1')
throw new Error("A 64-bit string is expected.")
bits_string = bits_string.padStart(64, "0").slice(-64)
const buf = new ArrayBuffer(8);
const tmp = new Uint8Array(buf);
for(let a = 64, k = 0; a > 0; a -= 8)
tmp[k++] = parseInt(bits_string.substring(a - 8, a), 2)
return new Float64Array(buf)[0]
}
console.log(float_32_to_bits(Math.PI).join(' '))
console.log(float_64_to_bits(Math.PI).join(' '))
console.log(bits_to_float_32('01000000010010010000111111011011'))
console.log(bits_to_float_64('0100000000001001001000011111101101010100010001000010110100011000'))
Upvotes: 0
Reputation: 4280
double
(IEEE 754) to represent all numbersdouble
consists of [sign, exponent(11bit), mantissa(52bit)] fields.
Value of number is computed using formula (-1)^sign * (1.mantissa) * 2^(exponent - 1023)
. (1.mantissa
- means that we take bits of mantissa add 1 at the beginning and tread that value as number, e.g. if mantissa = 101
we get number 1.101 (bin) = 1 + 1/2 + 1/8 (dec) = 1.625 (dec)
.sign
bit testing if number is greater than zero. There is a small issue with 0
here because double
have +0
and -0
values, but we can distinguish these two by computing 1/value
and checking if value is +Inf
or -Inf
.1 <= 1.mantissa < 2
we can get value of exponent using Math.log2
e.g. Math.floor(Math.log2(666.0)) = 9
so exponent is exponent - 1023 = 9
and exponent = 1032
, which in binary is (1032).toString(2) = "10000001000"
value = value / Math.pow(2, Math.floor(Math.log2(666.0)))
, now value represents number (-1)^sign * (1.mantissa)
. If we ignore sign and multiply that by 2^52
we get integer value that have same bits as 1.mantissa: ((666 / Math.pow(2, Math.floor(Math.log2(666)))) * Math.pow(2, 52)).toString(2) = "10100110100000000000000000000000000000000000000000000"
(we must ignore leading 1).This is only proof of concept, we didn't discuss denormalized numbers or special values such as NaN - but I think it can be expanded to account for these cases too.
@bensiu answers is fine, but if find yourself using some old JS interpreter you can use this approach.
Upvotes: 3
Reputation: 361
function DoubleToIEEE(f)
{
var buf = new ArrayBuffer(8);
(new Float64Array(buf))[0] = f;
return [ (new Uint32Array(buf))[0] ,(new Uint32Array(buf))[1] ];
}
Upvotes: 36
Reputation: 1238
function FloatToIEEE(f)
{
var buf = new ArrayBuffer(4);
(new Float32Array(buf))[0] = f;
return (new Uint32Array(buf))[0];
}
Unfortunately, this doesn't work with doubles and in old browsers.
Upvotes: 4
Reputation: 3927
Like the other posters have said, JavaScript is loose typed, so there is no differentiation in data types from float to int or vice versa.
However, what you're looking for is
float to int:
Math.floor( 3.9 ); // result: 3 (truncate everything past .) or
Math.round( 3.9 ); // result: 4 (round to nearest whole number)
Depending on which you'd like. In C/C++ it would essentially be using Math.floor
to convert to integer from float.
int to float:
var a = 10;
a.toFixed( 3 ); // result: 10.000
Upvotes: -3
Reputation: 536379
You certainly don't get anything low-level like that in JavaScript. It would be extremely dangerous to allow recasting and pointer-frobbing in a language that has to be safe for untrusted potential-attacker web sites to use.
If you want to get a 32-bit IEEE754 representation of a single-precision value in a Number (which remember is not an int either; the only number type you get in JavaScript is double
), you will have to make it yourself by fiddling the sign, exponent and mantissa bits together. There's example code here.
Upvotes: 7