Reputation: 1687
How can I read and access Unicode characters with standard C. Previously I was using C++ and std::wstring
for whole word and 'const wchar_t' for a single characters, which works perfectly( below is example code).
But now I am not allowed to use C++. How can I replace the 'wstring' in C? How can I convert my code to standard C?
...
...
const wchar_t small_char[10]={ L'锕',L'吖',L'啊',L'阿',L'呵',L'嗄',L'埃',L'哀',L'哎'};
std::wstring strIn=L"锕吖哎";
std::wstring s_temp=L"";
int length= strIn.length();
for(int i=0;i<length;i++){
if(strIn[i]==small_char[2]){
s_temp=s_temp+L"ba";
}
else if(strIn[i]==small_char[5]){
s_temp=s_temp+L"pe";
}
else{
s_temp=s_temp+strIn[i];
}
}
...
...
Upvotes: 4
Views: 2991
Reputation: 8237
The equivalent C routines are
== wcscmp or wcsncmp
+= wcscat or wcscat_s
= wcscpy or wcsncpy or wcscpy_s
.size() or length() wcslen
In your case since you are comparing one character at a time, you do not need wcscmp. Make sure all your strings are null terminated otherwise the non _s versions won't work.
Upvotes: 0
Reputation: 595367
How can I replace the 'wstring' in C? How can I convert my code to standard C?
std::wstring
is just a wrapper for wchar_t*
. You can use wchar_t
directly, you just have to manage the string memory and concatenations manually.
Try this:
...
const wchar_t small_char[10] = { L'锕', L'吖', L'啊', L'阿', L'呵', L'嗄', L'埃', L'哀', L'哎'};
wchar_t *strIn = L"锕吖哎";
int length = wcslen(strIn);
wchar_t *s_temp = (wchar_t*) calloc((length*2)+1, sizeof(wchar_t));
int s_temp_len = 0;
for(int i = 0; i < length; i++)
{
if (strIn[i] == small_char[2])
{
memcpy(&s_temp[s_temp_len], L"ba", 2*sizeof(wchar_t));
s_temp_len += 2;
s_temp[s_temp_len] = L'\0';
}
else if (strIn[i] == small_char[5])
{
memcpy(&s_temp[s_temp_len], L"pe", 2*sizeof(wchar_t));
s_temp_len += 2;
s_temp[s_temp_len] = L'\0';
}
else
{
s_temp[s_temp_len] = strIn[i];
s_temp_len += 1;
s_temp[s_temp_len] = L'\0';
}
}
// use s_temp up to s_temp_len characters as needed...
free(s_temp);
...
If you want something more like std::wstring
, you should pre-allocate a small buffer and resize it whenever you are going to exceed its capacity during concatenations. A struct
is useful for keeping track of that:
struct my_wstring
{
wchar_t *data;
int length;
int capacity;
};
void wstring_init(struct my_wstring *str)
{
str->data = NULL;
str->length = 0;
str->capacity = 0;
};
void wstring_clear(struct my_wstring *str)
{
free(str->data);
str->data = NULL;
str->length = 0;
str->capacity = 0;
};
// allocate in multiples of 32
const int delta = 32;
void wstring_append_str_len(struct my_wstring *str, const wchar_t *value, int valueLen)
{
if ((!str) || (!value) || (valueLen < 1)) return;
int newLen = str->length + valueLen;
if ((newLen + 1) > str->capacity)
{
// round to next highest multiple of 32
int newCap = ((newLen + 1) + (delta - 1)) & ~delta;
wchar_t *newData = (wchar_t*) realloc(str->data, newCap * sizeof(wchar_t));
if (!newData)
{
// memory allocation error, do something!
return;
}
str->data = newData;
str->capacity = newCap;
}
memcpy(&(str->data[str->length]), value, valueLen * sizeof(wchar_t));
str->length = newLen;
str->data[newLen] = L'\0';
}
void wstring_append_str(struct wstring *str, const wchar_t *value)
{
wstring_append_str_len(str, value, wcslen(value));
}
void wstring_append_chr(struct wstring *str, const wchar_t value)
{
wstring_append_str_len(str, &value, 1);
}
...
const wchar_t small_char[10] = { L'锕', L'吖', L'啊', L'阿', L'呵', L'嗄', L'埃', L'哀', L'哎'};
wchar_t *strIn = L"锕吖哎";
struct my_wstring s_temp;
wstring_init(&s_temp);
int length = wcslen(strIn);
for(int i = 0; i < length; i++)
{
if (strIn[i] == small_char[2])
{
wstring_append_str(&s_temp, L"ba");
}
else if (strIn[i] == small_char[5])
{
wstring_append_str(&s_temp, L"pe");
}
else
{
wstring_append_chr(&s_temp, strIn[i]);
}
}
// use s_temp.data up to s_temp.length characters as needed...
wstring_clear(&s_temp);
...
Upvotes: 4