Reputation: 51313
I'm writing a helper class which I intend to use for reading bits in reverse from a data block.
I tried doing an optimization where I used "rol" instructions for masking the data. However, to my surprise this is actually slower than creating a new bitmask during each access.
class reverse_bit_reader
{
public:
static const size_t bits_per_block = sizeof(unsigned long)*8;
static const size_t high_bit = 1 << (bits_per_block-1);
reverse_bit_reader(const void* data, size_t size)
: data_(reinterpret_cast<const unsigned long*>(data))
, index_(size-1)
{
// Bits are stored in left to right order, potentially ignore the last bits
size_t last_bit_index = index_ % bits_per_block;
bit_mask_ = high_bit >> (last_bit_index+1);
if(bit_mask_ == 0)
bit_mask_ = high_bit;
}
bool next_bit1()
{
return get_bit(index_--);
}
bool next_bit2() // Why is next_bit1 faster?
{
__asm // Rotate bit_mask.
{
mov eax, [ecx+0];
rol eax, 1;
mov [ecx+0], eax;
}
return data_[index_-- / bits_per_block] & bit_mask_;
}
bool eof() const{return index_ < 0;}
private:
bool get_bit(size_t index) const
{
const size_t block_index = index / bits_per_block;
const size_t bit_index = index % bits_per_block;
const size_t bit_mask = high_bit >> bit_index;
return data_[block_index] & bit_mask;
}
unsigned long bit_mask_;
int index_;
const unsigned long* data_;
};
Can anyone explain why next_bit1 is faster than next_bit2?
Upvotes: 3
Views: 180
Reputation: 40709
If you're going to be reading bits sequentially out of longs, starting from the most significant bit, and you want it to be as fast as possible, could you do something along these lines?
#define GETBIT ((theBit = (theLong < 0)), (theLong <<= 1), theBit)
Upvotes: 1