Reputation: 943
When changing a Binary string to Hex, I could only do it to a certain size based off of the answers I found. But I want to change MASSIVE Binary strings into their complete Hex counterpart in a more efficient way than this which is the only way I've come across that does it completely:
for(size_t i = 0; i < (binarySubVec.size() - 1); i++){
string binToHex, tmp = "0000";
for (size_t j = 0; j < binaryVecStr[i].size(); j += 4){
tmp = binaryVecStr[i].substr(j, 4);
if (!tmp.compare("0000")) binToHex += "0";
else if (!tmp.compare("0001")) binToHex += "1";
else if (!tmp.compare("0010")) binToHex += "2";
else if (!tmp.compare("0011")) binToHex += "3";
else if (!tmp.compare("0100")) binToHex += "4";
else if (!tmp.compare("0101")) binToHex += "5";
else if (!tmp.compare("0110")) binToHex += "6";
else if (!tmp.compare("0111")) binToHex += "7";
else if (!tmp.compare("1000")) binToHex += "8";
else if (!tmp.compare("1001")) binToHex += "9";
else if (!tmp.compare("1010")) binToHex += "A";
else if (!tmp.compare("1011")) binToHex += "B";
else if (!tmp.compare("1100")) binToHex += "C";
else if (!tmp.compare("1101")) binToHex += "D";
else if (!tmp.compare("1110")) binToHex += "E";
else if (!tmp.compare("1111")) binToHex += "F";
else continue;
}
hexOStr << binToHex;
hexOStr << " ";
}
Its thorough and absolute, but slow.
Is there a simpler way of doing this?
Upvotes: 9
Views: 16911
Reputation:
I think use Trie Tree or sth else is too complex, I'd rather choose some costly calculation, but the algorithm behind seemed more reasonable and tidy: reverse-scan and grouping 4 elements.
But my solution below is still strong-robust and functional, she can deal with long binary string, and with checking code and removing prefix-0 function:
string bstohs(const string& bs) {
string hs = {};
auto c = bs.rbegin();
auto end = bs.rend();
if (bs.rfind("0b", 0) == 0) {
end -= 2;
}
int localSum = 0;
while (end - c > 3) {
localSum = (*c - '0') + ((*(c + 1) - '0') << 1) + ((*(c + 2) - '0') << 2) + ((*(c + 3) - '0') << 3);
hs = (localSum > 9 ? string(1, 'a' + localSum - 10) : to_string(localSum)) + hs;
c += 4;
}
localSum = 0;
for (auto lst = c; end - lst > 0; lst++) {
if (*lst != '0') {
localSum += ((*lst - '0') << (lst - c));
}
}
hs = to_string(localSum) + hs;
return "0x" + hs.erase(0, min(hs.find_first_not_of('0'), hs.size() - 1));
}
Here is test main func:
int main(){
string bs = "0b";
srand((unsigned)time(NULL));
int test_time, test_max_len;
cout << "input test time: ";
cin >> test_time;
cout << "input test max len: ";
cin >> test_max_len;
for (int i = 0; i < test_time; i++) {
int test_len = rand() % test_max_len;
bs += "1";
for (int j = 1; j < test_len; j++) {
bs += to_string(rand() % 2);
}
cout << "test case " << i << "\nraw bs: " << bs << "\nbin len: " << bs.size() - 2 << "\nnow hs: " << bstohs(bs) << endl;
bs = "0b";
}
return 0;
}
a test case:
input test time: 3
input test max len: 400
test case 0
raw bs: 0b1111000001010110010011101010111110001101111101001101000110111111000110011000111000001100111101010111010010000111011001000010100110010100001000101001010001111111010010111111111000000101100110100000000101110111110000110100011001100111111100101011101111010000001101001000011000110110011000011000110110001101010011000001111101111010010010100001010100010110010
bin len: 355
now hs: 0x782b2757c6fa68df8cc7067aba43b214ca114a3fa5ff02cd00bbe1a333f95de81a431b30c6c6a60fbd250a8b2
test case 1
raw bs: 0b110111101010110011001110000110001101010100011011100111000000000111010011000100000011110010100100001101111000100010011110001010100000101010001101011001110111010010001100111011101010100010000110011111011110100010110100010000010010001011111000101110101100110010111010001111010101110011111110
bin len: 288
now hs: 0xdeacce18d51b9c01d3103ca437889e2a0a8d67748ceea8867de8b44122f8baccba3d5cfe
test case 2
raw bs: 0b11010001010111100010001010100111011010111110001111011100000111111011001010110010011011110101001111011
bin len: 101
now hs: 0x1a2bc454ed7c7b83f6564dea7b
environment:
#include <iostream>
#include <string>
#include <stdlib.h>
using namespace std;
Upvotes: 1
Reputation: 3132
You could try a binary decision tree:
string binToHex;
for (size_t i = 0; i < binaryVecStr[a].size(); i += 4) {
string tmp = binaryVecStr[a].substr(i, 4);
if (tmp[0] == '0') {
if (tmp[1] == '0') {
if (tmp[2] == '0') {
if tmp[3] == '0') {
binToHex += "0";
} else {
binToHex += "1";
}
} else {
if tmp[3] == '0') {
binToHex += "2";
} else {
binToHex += "3";
}
}
} else {
if (tmp[2] == '0') {
if tmp[3] == '0') {
binToHex += "4";
} else {
binToHex += "5";
}
} else {
if tmp[3] == '0') {
binToHex += "6";
} else {
binToHex += "7";
}
}
}
} else {
if (tmp[1] == '0') {
if (tmp[2] == '0') {
if tmp[3] == '0') {
binToHex += "8";
} else {
binToHex += "9";
}
} else {
if tmp[3] == '0') {
binToHex += "A";
} else {
binToHex += "B";
}
}
} else {
if (tmp[2] == '0') {
if tmp[3] == '0') {
binToHex += "C";
} else {
binToHex += "D";
}
} else {
if tmp[3] == '0') {
binToHex += "E";
} else {
binToHex += "F";
}
}
}
}
}
hexOStr << binToHex;
You might also want to consider a more compact representation of the same decision tree, such as
string binToHex;
for (size_t i = 0; i < binaryVecStr[a].size(); i += 4) {
string tmp = binaryVecStr[a].substr(i, 4);
binToHex += (tmp[0] == '0' ?
(tmp[1] == '0' ?
(tmp[2] == '0' ?
(tmp[3] == '0' ? "0" : "1") :
(tmp[3] == '0' ? "2" : "3")) :
(tmp[2] == '0' ?
(tmp[3] == '0' ? "4" : "5") :
(tmp[3] == '0' ? "6" : "7"))) :
(tmp[1] == '0' ?
(tmp[2] == '0' ?
(tmp[3] == '0' ? "8" : "9") :
(tmp[3] == '0' ? "A" : "B")) :
(tmp[2] == '0' ?
(tmp[3] == '0' ? "C" : "D") :
(tmp[3] == '0' ? "E" : "F"))));
}
hexOStr << binToHex;
Update: In the vein of the ASCII-to-integer solutions:
unsigned int nibble = static_cast<unsigned int*>(buffer);
nibble &= 0x01010101; // 0x31313131 --> 0x01010101
nibble |= (nibble >> 15); // 0x01010101 --> 0x01010303
nibble |= (nibble >> 6); // 0x01010303 --> 0x0105070C
char* hexDigit = hexDigitTable[nibble & 15];
The contents of hexDigitTable
(of type char[16]
) would depend on whether
you are on a little-endian or big-endian machine.
Upvotes: 2
Reputation: 117681
This is the fastest I could come up with:
#include <iostream>
int main(int argc, char** argv) {
char buffer[4096];
while (std::cin.read(buffer, sizeof(buffer)), std::cin.gcount() > 0) {
size_t got = std::cin.gcount();
char* out = buffer;
for (const char* it = buffer; it < buffer + got; it += 4) {
unsigned long r;
r = it[3];
r += it[2] * 2;
r += it[1] * 4;
r += it[0] * 8;
*out++ = "0123456789abcdef"[r - 15*'0'];
}
std::cout.write(buffer, got / 4);
}
}
It's faster than anything else on this question, according to @sehe's benchmarks.
Upvotes: 3
Reputation: 393029
UPDATE Added comparison and benchmarks at the end
Here's another take, based on a perfect hash. The perfect hash was generated using gperf
(like described here: Is it possible to map string to int faster than using hashmap?).
I've further optimized by moving function local statics out of the way and marking hexdigit()
and hash()
as constexpr
. This removes unnecessary any initialization overhead and gives the compiler full room for optimization/
You could try reading e.g. 1024 nibbles at a time if possible, and give the compiler a chance to vectorize the operations using AVX/SSE instruction sets. (I have not inspected the generated code to see whether this would happen.)
The full sample code to translate std::cin
to std::cout
in streaming mode is:
#include <iostream>
int main()
{
char buffer[4096];
while (std::cin.read(buffer, sizeof(buffer)), std::cin.gcount())
{
size_t got = std::cin.gcount();
char* out = buffer;
for (auto it = buffer; it < buffer+got; it += 4)
*out++ = Perfect_Hash::hexchar(it);
std::cout.write(buffer, got/4);
}
}
Here's the Perfect_Hash
class, slightly redacted and extended with the hexchar
lookup. Note that it does validate input in DEBUG
builds using the assert
:
#include <array>
#include <algorithm>
#include <cassert>
class Perfect_Hash {
/* C++ code produced by gperf version 3.0.4 */
/* Command-line: gperf -L C++ -7 -C -E -m 100 table */
/* Computed positions: -k'1-4' */
/* maximum key range = 16, duplicates = 0 */
private:
static constexpr unsigned char asso_values[] = {
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 15, 7, 3, 1, 0, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27};
template <typename It>
static constexpr unsigned int hash(It str)
{
return
asso_values[(unsigned char)str[3] + 2] + asso_values[(unsigned char)str[2] + 1] +
asso_values[(unsigned char)str[1] + 3] + asso_values[(unsigned char)str[0]];
}
static constexpr char hex_lut[] = "???????????fbead9c873625140";
public:
#ifdef DEBUG
template <typename It>
static char hexchar(It binary_nibble)
{
assert(Perfect_Hash::validate(binary_nibble)); // for DEBUG only
return hex_lut[hash(binary_nibble)]; // no validation!
}
#else
template <typename It>
static constexpr char hexchar(It binary_nibble)
{
return hex_lut[hash(binary_nibble)]; // no validation!
}
#endif
template <typename It>
static bool validate(It str)
{
static constexpr std::array<char, 4> vocab[] = {
{{'?', '?', '?', '?'}}, {{'?', '?', '?', '?'}}, {{'?', '?', '?', '?'}},
{{'?', '?', '?', '?'}}, {{'?', '?', '?', '?'}}, {{'?', '?', '?', '?'}},
{{'?', '?', '?', '?'}}, {{'?', '?', '?', '?'}}, {{'?', '?', '?', '?'}},
{{'?', '?', '?', '?'}}, {{'?', '?', '?', '?'}},
{{'1', '1', '1', '1'}}, {{'1', '0', '1', '1'}},
{{'1', '1', '1', '0'}}, {{'1', '0', '1', '0'}},
{{'1', '1', '0', '1'}}, {{'1', '0', '0', '1'}},
{{'1', '1', '0', '0'}}, {{'1', '0', '0', '0'}},
{{'0', '1', '1', '1'}}, {{'0', '0', '1', '1'}},
{{'0', '1', '1', '0'}}, {{'0', '0', '1', '0'}},
{{'0', '1', '0', '1'}}, {{'0', '0', '0', '1'}},
{{'0', '1', '0', '0'}}, {{'0', '0', '0', '0'}},
};
int key = hash(str);
if (key <= 26 && key >= 0)
return std::equal(str, str+4, vocab[key].begin());
else
return false;
}
};
constexpr unsigned char Perfect_Hash::asso_values[];
constexpr char Perfect_Hash::hex_lut[];
#include <iostream>
int main()
{
char buffer[4096];
while (std::cin.read(buffer, sizeof(buffer)), std::cin.gcount())
{
size_t got = std::cin.gcount();
char* out = buffer;
for (auto it = buffer; it < buffer+got; it += 4)
*out++ = Perfect_Hash::hexchar(it);
std::cout.write(buffer, got/4);
}
}
Demo output for e.g. od -A none -t o /dev/urandom | tr -cd '01' | dd bs=1 count=4096 | ./test
03bef5fb79c7da917e3ebffdd8c41488d2b841dac86572cf7672d22f1f727627a2c4a48b15ef27eb0854dd99756b24c678e3b50022d695cc5f5c8aefaced2a39241bfd5deedcfa0a89060598c6b056d934719eba9ccf29e430d2def5751640ff17860dcb287df8a94089ade0283ee3d76b9fefcce3f3006b8c71399119423e780cef81e9752657e97c7629a9644be1e7c96b5d0324ab16d20902b55bb142c0451e675973489ae4891ec170663823f9c1c9b2a11fcb1c39452aff76120b21421069af337d14e89e48ee802b1cecd8d0886a9a0e90dea5437198d8d0d7ef59c46f9a069a83835286a9a8292d2d7adb4e7fb0ef42ad4734467063d181745aaa6694215af7430f95e854b7cad813efbbae0d2eb099523f215cff6d9c45e3edcaf63f78a485af8f2bfc2e27d46d61561b155d619450623b7aa8ca085c6eedfcc19209066033180d8ce1715e8ec9086a7c28df6e4202ee29705802f0c2872fbf06323366cf64ecfc5ea6f15ba6467730a8856a1c9ebf8cc188e889e783c50b85824803ed7d7505152b891cb2ac2d6f4d1329e100a2e3b2bdd50809b48f0024af1b5092b35779c863cd9c6b0b8e278f5bec966dd0e5c4756064cca010130acf24071d02de39ef8ba8bd1b6e9681066be3804d36ca83e7032274e4c8e8cacf520e8078f8fa80eb8e70af40367f53e53a7d7f7afe8704c46f58339d660b8151c91bddf82b4096
I came up with three different approaches:
In order to do some comparisons, I've
-O3 -march=native -g0 -DNDEBUG
)Here are the results:
naive
approach from the first answer does rather well *Summary I'd say the naive approach was plenty good as I posted it on whim 10 hours ago. If you really want high throughput, the perfect hash is a nice start, but consider hand-rolling a SIMD based solution
Upvotes: 8
Reputation: 490128
I have this strange feeling that I must be missing something important about the question here. At first glance, it seems like this should work:
template <class RanIt, class OutIt>
void make_hex(RanIt b, RanIt e, OutIt o) {
static const char rets[] = "0123456789ABCDEF";
if ((e-b) %4 != 0)
throw std::runtime_error("Length must be a multiple of 4");
while (b != e) {
int index =
((*(b + 0) - '0') << 3) |
((*(b + 1) - '0') << 2) |
((*(b + 2) - '0') << 1) |
((*(b + 3) - '0') << 0);
*o++ = rets[index];
b += 4;
}
}
At least offhand, this seems like it should just about as fast as anything could be--it looks to me like it closely approaches the minimum of processing on each input necessary to get to the output.
To maximize speed, it does minimize error checking on the input to (and perhaps below) the bare minimum. You could certainly assure that each character in the input was a '0' or a '1' before depending on the result of the subtraction. Alternatively, you could quite easily use (*(b + 0) != '0') << 3
to treat 0
as 0 and anything else as a 1
. Likewise, you could use: (*(b + 0) == '1') << 3
to get 1
treated as 1, and anything else as 0.
The code does avoid dependencies between the 4 computations needed to compute each index
value, so it should be possible for a smart compiler to do those computations in parallel.
Since it works only with iterators, it avoids making extra copies of the input data, as (for example) almost anything using substr
can (especially with an implementation of std::string
that doesn't include the short string optimization).
In any case, using this would look something like this:
int main() {
char input[] = "0000000100100011010001010110011110001001101010111100110111101111";
make_hex(input, input+64, std::ostream_iterator<char>(std::cout));
}
Since it does use iterators, it could just as easily (for only one obvious example) take input from an istreambuf_iterator to process data directly from a file. That's rarely the fastest possible way to do though--you'll usually get better speed using istream::read
to read a large chunk, and ostream::write
to write a large chunk at once. This wouldn't need to affect the actual conversion code though--you'd just pass it pointers into your input and output buffers, and it would use them as iterators.
Upvotes: 4
Reputation: 393029
UPDATE2 See here for my perfect-hash based solution. This solution would have my preference because
EDIT indeed now benchmarking has shown the perfect hash solution to be roughly 340 x faster than the Spirit approach. See here:
UPDATE
Added a Trie-based solution.
The lookup-table here uses Boost Spirit's internal Trie implementation for fast lookup.
Of course replace out
with e.g. a vector back_inserter
or a ostreambuf_iterator<char>
into your string stream if you prefer. Right now it never allocates even 4 characters (although of course the lookup table is allocated, once).
You can also trivially replace the input iterators with what ever input range you have available, without changing a line of the rest of the code.
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
int main() {
std::ostreambuf_iterator<char> out(std::cout);
qi::symbols<char, char> lookup_table;
lookup_table.add
("0000", '0')
("0001", '1')
("0010", '2')
("0011", '3')
("0100", '4')
("0101", '5')
("0110", '6')
("0111", '7')
("1000", '8')
("1001", '9')
("1010", 'a')
("1011", 'b')
("1100", 'c')
("1101", 'd')
("1110", 'e')
("1111", 'f')
;
boost::spirit::istream_iterator bof(std::cin), eof;
if (qi::parse(bof, eof, +lookup_table [ *boost::phoenix::ref(out) = qi::_1 ]))
return 0;
else
return 255;
}
When testing with some random data like od -A none -t o /dev/urandom | tr -cd '01' | dd bs=1 count=4096 | ./test
you get
dfc87abf674d8fdb28ed2e36d8ac99faa9c9c4a8aa2253763510482c887e07b2e24cf36ecb7abdcb31521becca54ba1c2ff4be0399f76c2ca28c87fe13a735a0f8031959e5ed213a5d02fb71cbf32b978d2ee9e390a0e2fc6b65b24b2922fb7554a9b211ca1db1b757d1cd0b468d1cd399b114f4f8ef93ade4f33a18bcdb25e2b8138dcd7ec7ef7d2a53f905369c261e19556356ab96f0608bd07f908d3430d3fe7ec21a234c321cc79788f934250da6d2d8e2cb51173ad64ffb4769e7a28224e9bc68123249bbd9c19c01ebbdf2fe4824fb854cf018268d7a988bfd0169f395b30937230733e0f17ba3d8f979341ebde6ff48aac764c2a460625a3ec1349351fe15c8cd4cd3e2933a2840392e381e3c8fc69456eaaf4e8257837f92124e8918a071d7a569fba5e7b189831aa761b3a63feb45d317b1724c53659c00bc82ce7a0c4bcbdc196bc5c990eddc70248d49cc419721d82714256ed13568c4f0740efe42401b0ce644dceaf3507e4acae718265101562f81c237ea8551d051cba38a087fc260af83e123f774e8da956d885d0f87e72e336d8599631f3a44d30676088149b5a1292ecc8682cfbd6982bc37b7e6a5c44f42fcfaabd32c29696a6985fdca5bd6c986dfd4670c4456ac0a7e6ae50ba4218e090f829a2391dd9fc863b31c05a
Old, elementary answer:
Here's the gist:
char nibble[4];
while (std::cin.read(nibble, 4))
{
std::cout << "0123456789abcdef"[
(nibble[0]!='0')*8 +
(nibble[1]!='0')*4 +
(nibble[2]!='0')*2 +
(nibble[3]!='0')*1
];
}
You could make the conversion a lookup table indeed. Don't use a map as it's tree based and will end up chasing a lot of pointers. However, boost::flat_map
could be fine.
Upvotes: 4
Reputation: 13320
As for a simple way to do this, i think that this one is pretty neat:
std::string bintxt_2_hextxt(const std::string &bin)
{
std::stringstream reader(bin);
std::stringstream result;
while (reader)
{
std::bitset<8> digit;
reader >> digit;
result << std::hex << digit.to_ulong();
}
return result.str();
}
I don't know from where your data should be readed so I've used a std::string
as input data; but if it is from a text file or a data stream it shouldn't be a headache to change the reader
to be a std::ifstream
.
Beware! I don't know what could happen if the stream characters aren't divisible by 8 and also I haven't tested the performance of this code.
Upvotes: 2
Reputation: 17114
Here's how I would do it:
Find the smallest positive integer n
such that these integers all have different remainders modulo n
:
0x30303030 0x30303031 0x30303130 0x30303131 0x30313030 0x30313031 0x30313130 0x30313131 0x31303030 0x31303031 0x31303130 0x31303131 0x31313030 0x31313031 0x31313130 0x31313131
These are the ASCII representations of "0000","0001" etc. I have listed them in order, assuming that your machine is big-endian; if it is little-endian, the representation of e.g. "0001" will be 0x31303030, not 0x30303031. You only have to do this once. n
will not be very large -- I would expect it to be less than 100.
char HexChar[n]
with HexChar[0x30303030 % n] = '0', HexChar[0x30303031 % n] = '1'
etc. (or HexChar[0x31303030 % n] = '1'
etc. if your machine is little-endian).Now the conversion is lightning-fast (I am assuming sizeof (int) = 4
):
unsigned int const* s = binaryVecStr[a].c_str();
for (size_t i = 0; i < binaryVecStr[a].size(); i += 4, s++)
hexOStr << HexChar[*s % n];
Upvotes: 4
Reputation: 59
something like this maybe
#include <iostream>
#include <string>
#include <iomanip>
#include <sstream>
int main()
{
std::cout << std::hex << std::stoll("100110110010100100111101010001001101100101010110000101111111111",NULL, 2) << std::endl;
std::stringstream ss;
ss << std::hex << std::stoll("100110110010100100111101010001001101100101010110000101111111111", NULL, 2);
std::cout << ss.str() << std::endl;
return 0;
}
Upvotes: 5
Reputation: 7490
This seems to work.
std::vector<char> binaryVecStr = { '0', '0', '0', '1', '1', '1', '1', '0' };
string binToHex;
binToHex.reserve(binaryVecStr.size()/4);
for (uint32_t * ptr = reinterpret_cast<uint32_t *>(binaryVecStr.data()); ptr < reinterpret_cast<uint32_t *>(binaryVecStr.data()) + binaryVecStr.size() / 4; ++ptr) {
switch (*ptr) {
case 0x30303030:
binToHex += "0";
break;
case 0x31303030:
binToHex += "1";
break;
case 0x30313030:
binToHex += "2";
break;
case 0x31313030:
binToHex += "3";
break;
case 0x30303130:
binToHex += "4";
break;
case 0x31303130:
binToHex += "5";
break;
case 0x30313130:
binToHex += "6";
break;
case 0x31313130:
binToHex += "7";
break;
case 0x30303031:
binToHex += "8";
break;
case 0x31303031:
binToHex += "9";
break;
case 0x30313031:
binToHex += "A";
break;
case 0x31313031:
binToHex += "B";
break;
case 0x30303131:
binToHex += "C";
break;
case 0x31303131:
binToHex += "D";
break;
case 0x30313131:
binToHex += "E";
break;
case 0x31313131:
binToHex += "F";
break;
default:
// invalid input
binToHex += "?";
}
}
std::cout << binToHex;
Beware, is uses few assumtpions:
1) char has 8 bits (not true on all platforms)
2) it requires little endian (meaning it works at least on x86, x86_64)
It assumes that binaryVecStr is std::vector, but would work with string as well. It assumes that binaryVecStr.size() % 4 == 0
Upvotes: 3