Reputation: 775
I'm using org.apache.commons.codec.binary.Base64 do decode string which is utf8. Sometimes I get base64 encoded string which after decode looks like for example ^@k��@@
. How can I check if base64 is correct or if decoded utf8 string is valid utf8 string?
To clarify. I'm using
public static String base64Decode(String str) {
try {
return new String(base64Decode(str.getBytes(Constants.UTF_8)), Constants.UTF_8);
} catch (UnsupportedEncodingException e) {
public static byte[] base64Decode(byte[] byteArray) {
return Base64.decodeBase64(byteArray);
Upvotes: 17
Views: 101001
Reputation: 539
I created this method:
public static String descodificarDeBase64(String stringCondificado){
try {
return new String(Base64.decode(stringCondificado.getBytes("UTF-8"),Base64.DEFAULT));
} catch (UnsupportedEncodingException e) {
return "";
So I can decode from Base64 spanish characthers as á,ñ,í,ü.
will return: ¿Qué tal?
Upvotes: 0
Reputation: 2858
Try this:
var B64 = {
alphabet: 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=',
lookup: null,
ie: /MSIE /.test(navigator.userAgent),
ieo: /MSIE [67]/.test(navigator.userAgent),
encode: function (s) {
var buffer = B64.toUtf8(s),
position = -1,
len = buffer.length,
nan1, nan2, enc = [, , , ];
if ( {
var result = [];
while (++position < len) {
nan1 = buffer[position + 1], nan2 = buffer[position + 2];
enc[0] = buffer[position] >> 2;
enc[1] = ((buffer[position] & 3) << 4) | (buffer[++position] >> 4);
if (isNaN(nan1)) enc[2] = enc[3] = 64;
else {
enc[2] = ((buffer[position] & 15) << 2) | (buffer[++position] >> 6);
enc[3] = (isNaN(nan2)) ? 64 : buffer[position] & 63;
result.push(B64.alphabet[enc[0]], B64.alphabet[enc[1]], B64.alphabet[enc[2]], B64.alphabet[enc[3]]);
return result.join('');
} else {
result = '';
while (++position < len) {
nan1 = buffer[position + 1], nan2 = buffer[position + 2];
enc[0] = buffer[position] >> 2;
enc[1] = ((buffer[position] & 3) << 4) | (buffer[++position] >> 4);
if (isNaN(nan1)) enc[2] = enc[3] = 64;
else {
enc[2] = ((buffer[position] & 15) << 2) | (buffer[++position] >> 6);
enc[3] = (isNaN(nan2)) ? 64 : buffer[position] & 63;
result += B64.alphabet[enc[0]] + B64.alphabet[enc[1]] + B64.alphabet[enc[2]] + B64.alphabet[enc[3]];
return result;
decode: function (s) {
var buffer = B64.fromUtf8(s),
position = 0,
len = buffer.length;
if (B64.ieo) {
result = [];
while (position < len) {
if (buffer[position] < 128) result.push(String.fromCharCode(buffer[position++]));
else if (buffer[position] > 191 && buffer[position] < 224) result.push(String.fromCharCode(((buffer[position++] & 31) << 6) | (buffer[position++] & 63)));
else result.push(String.fromCharCode(((buffer[position++] & 15) << 12) | ((buffer[position++] & 63) << 6) | (buffer[position++] & 63)));
return result.join('');
} else {
result = '';
while (position < len) {
if (buffer[position] < 128) result += String.fromCharCode(buffer[position++]);
else if (buffer[position] > 191 && buffer[position] < 224) result += String.fromCharCode(((buffer[position++] & 31) << 6) | (buffer[position++] & 63));
else result += String.fromCharCode(((buffer[position++] & 15) << 12) | ((buffer[position++] & 63) << 6) | (buffer[position++] & 63));
return result;
toUtf8: function (s) {
var position = -1,
len = s.length,
chr, buffer = [];
if (/^[\x00-\x7f]*$/.test(s)) while (++position < len)
else while (++position < len) {
chr = s.charCodeAt(position);
if (chr < 128) buffer.push(chr);
else if (chr < 2048) buffer.push((chr >> 6) | 192, (chr & 63) | 128);
else buffer.push((chr >> 12) | 224, ((chr >> 6) & 63) | 128, (chr & 63) | 128);
return buffer;
fromUtf8: function (s) {
var position = -1,
len, buffer = [],
enc = [, , , ];
if (!B64.lookup) {
len = B64.alphabet.length;
B64.lookup = {};
while (++position < len)
B64.lookup[B64.alphabet[position]] = position;
position = -1;
len = s.length;
while (position < len) {
enc[0] = B64.lookup[s.charAt(++position)];
enc[1] = B64.lookup[s.charAt(++position)];
buffer.push((enc[0] << 2) | (enc[1] >> 4));
enc[2] = B64.lookup[s.charAt(++position)];
if (enc[2] == 64) break;
buffer.push(((enc[1] & 15) << 4) | (enc[2] >> 2));
enc[3] = B64.lookup[s.charAt(++position)];
if (enc[3] == 64) break;
buffer.push(((enc[2] & 3) << 6) | enc[3]);
return buffer;
View Here
Upvotes: 1
Reputation: 1109572
You should specify the charset during converting String
to byte[]
and vice versa.
byte[] bytes = string.getBytes("UTF-8");
// feed bytes to Base64
// get bytes from Base64
String string = new String(bytes, "UTF-8");
Otherwise the platform default encoding will be used which is not necessarily UTF-8 per se.
Upvotes: 32