YosiFZ
YosiFZ

Reputation: 7900

JavaScript base64 not decode as needed

I have this String in my html code:

eyJzaW1wbGVUZXh0Ijoi8J+NjCBTVU1NRVIgU0VUIDIwMTkg8J+QnSBERSBMQSBLQVJJTkEg4pqhINeh15gg16fXmdelIDIwMTkg8J+MvSJ9

It represent this code in Base64:

{"simpleText":"🍌 SUMMER SET 2019 🐝 DE LA KARINA ⚡ סט קיץ 2019 🌽"}

I'm facing a problem when using my Base64 decode method:

function decode(data) {
var value, code, idx = 0, bytes = [], leftbits = 0, leftdata = 0;
var binTable = [
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
    52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1,
    -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
    15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
    -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
    41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
];

var padding = '=';

for (idx = 0; idx < data.length; idx++) {
  code = data.charCodeAt(idx);
  value = binTable[code & 0x7F];

  if (-1 === value) {
    log("WARN: Illegal characters (code=" + code + ") in position " + idx);
  } else {
    leftdata = (leftdata << 6) | value;
    leftbits += 6;

    if (leftbits >= 8) {
      leftbits -= 8;
      if (padding !== data.charAt(idx)) {
        bytes.push((leftdata >> leftbits) & 0xFF);
      }
      leftdata &= (1 << leftbits) - 1;
    }
  }
}

if (leftbits) {
  log("ERROR: Corrupted base64 string");
  return null;
}

return utf8Decode(bytes);
}

That gave me this string:

{"simpleText":"ߍ젓UMMER SET 2019 ߐ�E LA KARINA ⚡ סט קיץ 2019 ߌ�

I know that it's recommended to use atob function, but because it made me other issues I prefer to use this code instead, Any idea why it not working with Emoji inside a string.

Thanks!

Upvotes: 0

Views: 333

Answers (3)

R&#233;mi Sanchez
R&#233;mi Sanchez

Reputation: 71

I think your problem is not in the decode() function, but in the utf8Decode() function used inside.

However you did not provide it so I wrote one and it gives the correct result for your string (you can see {"simpleText":"🍌 SUMMER SET 2019 🐝 DE LA KARINA ⚡ סט קיץ 2019 🌽"} in the console):

const example = "eyJzaW1wbGVUZXh0Ijoi8J+NjCBTVU1NRVIgU0VUIDIwMTkg8J+QnSBERSBMQSBLQVJJTkEg4pqhINeh15gg16fXmdelIDIwMTkg8J+MvSJ9";

function decode(data) {
  var value, code, idx = 0, bytes = [], leftbits = 0, leftdata = 0;
  var binTable = [
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
    52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1,
    -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
    15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
    -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
    41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
  ];

  var padding = '=';

  for (idx = 0; idx < data.length; idx++) {
    code = data.charCodeAt(idx);
    value = binTable[code & 0x7F];

    if (-1 === value) {
      console.log("WARN: Illegal characters (code=" + code + ") in position " + idx);
    } else {
      leftdata = (leftdata << 6) | value;
      leftbits += 6;

      if (leftbits >= 8) {
        leftbits -= 8;
        if (padding !== data.charAt(idx)) {
          bytes.push((leftdata >> leftbits) & 0xFF);
        }
        leftdata &= (1 << leftbits) - 1;
      }
    }
  }

  if (leftbits) {
    console.log("ERROR: Corrupted base64 string");
    return null;
  }

  return utf8Decode(bytes);
}

function utf8Decode(bytes) {
     return new TextDecoder().decode(new Uint8Array(bytes));
}

const result = decode(example);
console.log(result);

Upvotes: 1

dpellier
dpellier

Reputation: 1039

You can use the encodeURIComponent/decodeURIComponent with escaped string to get back your emojis. (see here for more info http://ecmanaut.blogspot.com/2006/07/encoding-decoding-utf8-in-javascript.html)

Here is a code sample showing the encoding/decoding with your data: https://jsbin.com/wusokawacu/edit?js,console

const string = {"simpleText":"🍌 SUMMER SET 2019 🐝 DE LA KARINA ⚡ סט קיץ 2019 🌽"}


function utf8_to_b64(str) {
  return window.btoa(unescape(encodeURIComponent(str)));
}

function b64_to_utf8(str) {
  return decodeURIComponent(escape(window.atob(str)));
}

const encoded = utf8_to_b64(JSON.stringify(string));
const decoded = b64_to_utf8(encoded);

Upvotes: 1

Elad
Elad

Reputation: 971

I know you said that you don't want to use the native atob function

but have you tried like so:

let encodedData = 'eyJzaW1wbGVUZXh0Ijoi8J+NjCBTVU1NRVIgU0VUIDIwMTkg8J+QnSBERSBMQSBLQVJJTkEg4pqhINeh15gg16fXmdelIDIwMTkg8J+MvSJ9';

let decodedData = decodeURIComponent(escape(window.atob(encodedData)));
console.log(decodedData); // will print your data with the emojis

If you still want to implement it by yourself, take a look in this package: https://github.com/dankogai/js-base64/blob/master/base64.js

Upvotes: 0

Related Questions