Thomas
Thomas

Reputation: 957

How to compress an hexadecimal number into a short string?

I have an hexadecimal number of unknown length and I would like to convert it into a string as short as possible. And later be able to convert the new string back to the original number.

Since each digit is on only 4 bits, I was thinking to convert the number into a UFT-16 or UFT-32 string. That would create a short string with weird characters.

Hypothetical example: 0x874FAB41900C -> "ÚΔЂ" -> 0x874FAB41900C.

So what I'm looking for is two Javascript functions like this:

function hexToString(hex) {
    for (var i=0; i < hex.length; i++) {
        // convert the digits into UFT-16
    }

    return string;
}

function stringToHex(string) {
    for (var i=0; i < string.length; i++) {
        // convert the char back into hex values
    }

    return hex;
}

Any idea how to do this?

Upvotes: 3

Views: 9781

Answers (1)

juvian
juvian

Reputation: 16068

In js string characters use 16 bits so we can pack 4 hex characters in 1 :

function hexToString(hex) {
  hex = hex.substring(2) // remove the '0x' part
  var string = ""

  while (hex.length % 4 != 0) { // we need it to be multiple of 4
    hex =  "0" + hex;
  }

  for (var i = 0; i < hex.length; i+= 4){
    string += String.fromCharCode(parseInt(hex.substring(i,i + 4), 16)) // get char from ascii code which goes from 0 to 65536
  }

  return string;
}

function stringToHex(string) {
  var hex = ""
  for (var i=0; i < string.length; i++) {
    hex += ( (i == 0 ? "" : "000") + string.charCodeAt(i).toString(16)).slice(-4) // get character ascii code and convert to hexa string, adding necessary 0s
  }

  return '0x' + hex.toUpperCase();
}



var str = hexToString("0x874FAB41900C")
console.log(str)
console.log(stringToHex(str))

If you want a 32 bit version you need to change the encoding version, the decode stays the same. Note that there are only unicode characters up to 21 bits so don´t think you can do better than the 16 bit one, this one won´t really make it with less characters :

function hexToString(hex) {
  hex = hex.substring(2) // remove the '0x' part
  var string = ""

  while (hex.length % 8 != 0) { // we need it to be multiple of 8
    hex =  "0" + hex;
  }

  for (var i = 0; i < hex.length; i+= 8){
    string += String.fromCharCode(parseInt(hex.substring(i,i + 4), 16), parseInt(hex.substring(i + 4,i + 8), 16))
  }

  return string;
}

You can read more about the 32 bit here : how to render 32bit unicode characters in google v8 (and nodejs)

Edit : A fun example

function hexToString(hex) {
  hex = hex.substring(2) // remove the '0x' part
  var string = ""

  while (hex.length % 8 != 0) { // we need it to be multiple of 8
    hex =  "0" + hex;
  }

  for (var i = 0; i < hex.length; i+= 8){
    string += String.fromCharCode(parseInt(hex.substring(i,i + 4), 16), parseInt(hex.substring(i + 4,i + 8), 16))
  }

  return string;
}

console.log(hexToString("0xD83DDCA3"))

Upvotes: 6

Related Questions