Reputation: 2003
I have a method reads a json file and returns a const char*
that can be any text, including emojis. I don't have access to the source of this method.
For example, I created a json file with the england flag, 🏴 ({message: "\uD83C\uDFF4\uDB40\uDC67\uDB40\uDC62\uDB40\uDC65\uDB40\uDC6E\uDB40\uDC67\uDB40\uDC7F"}
).
When I call that method, it returns something like í ¼í¿´í€í±§í€í±¢í€í±¥í€í±®í€í±§í€í±¿
, but in order to use it properly, I need to convert it to icu::UnicodeString because I use another method (closed source again) that expects it.
The only way I found to make it work was something like:
icu::UnicodeString unicode;
unicode.setTo((UChar*)convertMessage().data());
std::string messageAsString;
unicode.toUTF8String(messageAsString);
after doing that, messageAsString is usable and everything works.
convertMessage()
is a method that uses std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>::from_bytes(str)
.
My question is, is there a way to create a icu::UnicodeString
without using that extra convertMessage()
call?
Upvotes: 0
Views: 636
Reputation: 588
This is sample usage of ucnv_toUChars function. I took these function from postgresql source code and used it for my project.
UConverter *icu_converter;
static int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, int32_t nbytes)
{
UErrorCode status;
int32_t len_uchar;
status = U_ZERO_ERROR;
len_uchar = ucnv_toUChars(icu_converter, NULL, 0,buff, nbytes, &status);
if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
return -1;
*buff_uchar = (UChar *) malloc((len_uchar + 1) * sizeof(**buff_uchar));
status = U_ZERO_ERROR;
len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar + 1,buff, nbytes, &status);
if (U_FAILURE(status))
assert(0); //(errmsg("ucnv_toUChars failed: %s", u_errorName(status))));
return len_uchar;
}
static int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
{
UErrorCode status;
int32_t len_result;
status = U_ZERO_ERROR;
len_result = ucnv_fromUChars(icu_converter, NULL, 0,
buff_uchar, len_uchar, &status);
if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
assert(0); // (errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));
*result = (char *) malloc(len_result + 1);
status = U_ZERO_ERROR;
len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
buff_uchar, len_uchar, &status);
if (U_FAILURE(status))
assert(0); // (errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));
return len_result;
}
void main() {
const char *utf8String = "Hello";
int len = 5;
UErrorCode status = U_ZERO_ERROR;
icu_converter = ucnv_open("utf8", &status);
assert(status <= U_ZERO_ERROR);
UChar *buff_uchar;
int32_t len_uchar = icu_to_uchar(&buff_uchar, ut8String, len);
// use buff_uchar
free(buff_uchar);
}
Upvotes: 1