Reputation: 1902
My attempts seem hacky and overly convoluted. Is there a simple way to convert ASCII to UTF16 on Windows and macOS?
(note that the prUTF16Char
I can't change 🙃)
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#if defined(__APPLE__) && defined(__MACH__)
#include <xcselect.h>
#include <wchar.h>
#include <CoreFoundation/CoreFoundation.h>
typedef unsigned short int prUTF16Char;
#else
typedef wchar_t prUTF16Char;
#endif
#define WIDEN2(x) L ## x
#define WIDEN(x) WIDEN2(x)
#define PROJECT_NAME "foo"
void copy2ConvertStringLiteralIntoUTF16(const wchar_t* inputString, prUTF16Char* destination) {
size_t length = wcslen(inputString);
#if (defined(_WIN32) || defined(__WIN32__) || defined(__WINDOWS__)) && defined(PLUGIN_MODE)
wcscpy_s(destination, length + 1, inputString);
#elif defined(__APPLE__) && defined(__MACH__)
CFRange range = {0, 150}; range.length = length;
CFStringRef inputStringCFSR = CFStringCreateWithBytes(
kCFAllocatorDefault, reinterpret_cast<const UInt8 *>(inputString),
length * sizeof(wchar_t), kCFStringEncodingUTF32LE, false);
CFStringGetBytes( inputStringCFSR, range, kCFStringEncodingUTF16, 0, false,
reiterpret_cast<UInt8 *>(destination), length * (sizeof (prUTF16Char)), NULL);
destination[length] = 0; // Set NULL-terminator
CFRelease(inputStringCFSR);
#endif
}
const prUTF16Char * to_wchar(const char* message) {
const size_t cSize = strlen(message);
wchar_t *w_str = new wchar_t[cSize];
#if defined(_WIN32) || defined(__WIN32__) || defined(__WINDOWS__)
size_t outSize;
mbstowcs_s(&outSize, w_str, cSize, message, cSize-1);
return w_str;
#else
mbstowcs(w_str, message, cSize);
#endif
#if defined(__APPLE__) && defined(__MACH__)
prUTF16Char *ut16str = new prUTF16Char[cSize];
copy2ConvertStringLiteralIntoUTF16(w_str, ut16str);
return ut16str;
#else
return w_str;
#endif
}
Then I can just define a global var:
static const prUTF16Char* PROJECT_NAME_W =
#if defined(__APPLE__) && defined(__MACH__)
to_wchar
#elif defined(_WIN32) || defined(__WIN32__) || defined(__WINDOWS__)
WIDEN
#endif
(PROJECT_NAME);
And the body of a generic print function taking message
:
#if WCHAR_UTF16
wprintf(L"%s",
#else
printf("%ls\n",
#endif
message);
Full attempt:
https://github.com/SamuelMarks/premiere-pro-cmake-plugin/blob/f0d2278/src/common/logger.cpp [rewriting from C++ to C]
Error:
error: initializer element is not a compile-time constant
EDIT: Super hacky, but with @barmak-shemirani's solution I can:
#if defined(__APPLE__) && defined(__MACH__)
extern
#elif defined(_WIN32) || defined(__WIN32__) || defined(__WINDOWS__)
static
#endif
const prUTF16Char* PROJECT_NAME_W
#if defined(__APPLE__) && defined(__MACH__)
;
#elif defined(_WIN32) || defined(__WIN32__) || defined(__WINDOWS__)
WIDEN(PROJECT_NAME);
#endif
…and only initialise and free
on the extern
variant.
Upvotes: 1
Views: 623
Reputation: 31669
message
includes the null terminating character. strlen
does not count this last character, cSize
has to increase by 1
.
Usually you need to call setlocal if for example message
was typed in non-English computer. But it's okay if message
is guaranteed to be ASCII.
Windows Example:
const wchar_t* to_wchar(const char* message)
{
const size_t cSize = strlen(message) + 1;
//wchar_t* w_str = new wchar_t[cSize]; using C++?
wchar_t* w_str = malloc(cSize * sizeof(wchar_t));
size_t outSize;
mbstowcs(w_str, message, cSize);
// or mbstowcs_s(&outSize, w_str, cSize, message, cSize);
return w_str;
}
Note that wchar_t
is 2 bytes in Windows, and 4 bytes in POSIX. Then UTF-16 has 2 different version, little-endian and big-endian. UTF-16 has 2-bytes per character for ASCII equivalent codes, but 4-bytes for some non-Latin languages.
You should consider UTF-8 output. Most Windows programs are prepared to read UTF-8 from file or network.
Windows byte output for "123"
:
49 00 50 00 51 00 00 00 <- little-endian
0 49 00 50 00 51 00 00 <- big-endian
Linux byte output from above code (this won't be recognized as UTF-16 by Winodws):
49 00 00 00 50 00 00 00 51 00 00 00 00 00 00 00
You can write your own function if you are 100% certain that the message is ASCII
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef unsigned short prUTF16Char;//remove this line later
prUTF16Char* to_wchar(const char* message)
{
if (!message) return NULL;
size_t len = strlen(message);
int bufsize = (len + 1) * 2;
char* buf = malloc(bufsize);
int little_endian = 1;
little_endian = ((char*)&little_endian)[0];
memset(buf, 0, bufsize);
for (size_t i = 0; i < len; i++)
buf[i * 2 + little_endian ? 0 : 1] = message[i];
return (prUTF16Char*)buf;
}
prUTF16Char* wstr;
int main()
{
wstr = to_wchar("ASCII");
wprintf(L"%s\n", wstr);
free(wstr);
return 0;
}
Upvotes: 1