Reputation: 43
How can I multiply a 64-bit unsigned int with a 32-bit unsigned int without using long long
. The 64-bit number is stored as an array of two 32-bit numbers.
In other words, how I can I obtain e
and f
from a
, b
and c
given the following:
( a * 2^32 + b ) * c = e * 2^32 + f
a
, b
, c
, e
and f
are all 32-bit unsigned integers, and only 32-bit math operations are available.
Upvotes: 2
Views: 789
Reputation: 385789
The first thing to realize is that we'll obtain a 96-bit number, as the following shows:
(264 - 1) × (232 - 1)
= 264 × 232 - 264 - 232 + 1
= 296 - 264 - 232 + 1
< 296
The next thing to realize is that we can't use 32-bit multiplication. Even without a carry, the results are too large.
(232 - 1) × (232 - 1)
= 232 × 232 - 232 - 232 + 1
= 264 - 233 + 1
≥ 232
We'll need to perform 16-bit multiplications instead. 16-bit multiplication can be done using 32-bit numbers without the possibility of overflow, even with a 16-bit carry.
FFFF16 × FFFF16 + FFFF16
= FFFF_000016
< 232
If we view the numbers as 16-bit values, the multiplication looks like:
+-----+-----+-----+-----+
| ahi | alo | bhi | blo |
+-----+-----+-----+-----+
+-----+-----+
× | chi | clo |
+-----+-----+
===========================================
+-----+-----+-----+-----+-----+-----+
| dhi | dlo | ehi | elo | fhi | flo |
+-----+-----+-----+-----+-----+-----+
We all know how to do the above multiplication from elementary school, right?
+-----+-----+-----+-----+ +-----+
| ahi | alo | bhi | blo | × | clo |
+-----+-----+-----+-----+ +-----+
+-----+-----+-----+-----+-----+ +-----+
+ | ahi | alo | bhi | blo | 0 | × | chi |
+-----+-----+-----+-----+-----+ +-----+
The means the following are all the tools we need:
void mul16c(
uint16_t i, uint16_t j, uint16_t cin,
uint16_t *prod_ptr, uint16_t *cout_ptr
) {
uint32_t prod = i * j + cin;
*prod_ptr = prod & 0xFFFF:
*cout_ptr = prod >> 16;
}
void add16c(
uint16_t i, uint16_t j, uint16_t cin,
uint16_t *sum_ptr, uint16_t* cout_ptr
) {
uint32_t sum = i + j + cin;
*sum_ptr = sum & 0xFFFF:
*cout_ptr = sum >> 16;
}
And we can use them as follows:
void mul64x32(
uint32_t i[2], uint32_t j,
uint32_t (*prod_ptr)[2], uint32_t *cout_ptr
) {
uint16_t i16[4];
i16[3] = i[1] >> 16;
i16[2] = i[1] & 0xFFFF;
i16[1] = i[0] >> 16;
i16[0] = i[0] & 0xFFFF;
uint16_t j16[2];
j16[1] = j[0] >> 16;
j16[0] = j[0] & 0xFFFF;
uint16_t p[6];
{
uint16_t carry = 0;
mul16c(i16[0], j16[0], carry, &(p[0]), &carry);
mul16c(i16[1], j16[0], carry, &(p[1]), &carry);
mul16c(i16[2], j16[0], carry, &(p[2]), &carry);
mul16c(i16[3], j16[0], carry, &(p[3]), &carry);
p[4] = carry;
p[5] = 0;
}
uint16_t q[6];
{
uint16_t carry = 0;
q[0] = 0;
mul16c(i16[0], j16[1], carry, &(q[1]), &carry);
mul16c(i16[1], j16[1], carry, &(q[2]), &carry);
mul16c(i16[2], j16[1], carry, &(q[3]), &carry);
mul16c(i16[3], j16[1], carry, &(q[4]), &carry);
q[5] = carry;
}
uint16_t product[6];
{
uint16_t carry = 0;
add16c(p[0], q[0], carry, &(product[0]), &carry);
add16c(p[1], q[1], carry, &(product[1]), &carry);
add16c(p[2], q[2], carry, &(product[2]), &carry);
add16c(p[3], q[3], carry, &(product[3]), &carry);
add16c(p[4], q[4], carry, &(product[4]), &carry);
add16c(p[5], q[5], carry, &(product[5]), &carry);
}
(*prod_ptr)[0] = ( product[1] << 16 ) | product[0];
(*prod_ptr)[1] = ( product[3] << 16 ) | product[2];
*cout_ptr = ( product[5] << 16 ) | product[4];
}
Element zero of each array is expected to be the least significant portion of the number.
Upvotes: 3