Gleb Bessudnov
Gleb Bessudnov

Reputation: 23

C - wcstok() wrong results

I got an issue in my programm with one of the functions. I've got a text, which consist of sentences. In each sentence I need to find symbols '@', '#', '%' and change them to "(at)", "<решетка>", "". And I'm doing it using wcstok because I'm working with russian language. And I've got folowing problem.

Input:

He was an old man who fished alone in a skiff in the Gulf Stream and he had gone eighty-four days now without tak%ing a fish. In the first forty days a boy had been with him. But after forty days without a fish the boy’s parents had told him that the old man was now definitely and finally sa@lao, which is the worst form of unlucky, and the boy had gone at their orders in another boat which caught three good fis#h the first week.

Output:

He was an old man who fished alone in a skiff in the Gulf Stream and he had gone eighty-four days now without tak<>ing a fish. In the first forty days a boy had been with him. B(at) (at)f(at)er for(at)y d(at)ys wi(at)ho(at) (at) fish (at)he boy’s p(at)ren(at)s h(at)d (at)old him (at)h(at) (at)he old m(at)n w(at)s now defini(at)ely (at)nd fin(at)lly s(at)l(at)o, which is (at)he wors(at) form of (at)nl(at)cky, (at)nd (at)he boy h(at)d gone (at) (at)heir orders in (at)no(at)her bo(at) which c(at)gh(at) (at)hree good fis(at)h (at)he firs(at) week.

As you can see it changes all letters "a" and "t" to the "(at)". And I don't understand why this happening. It's the same situation with russian letters. This are two functions, which responsable for this work.

void changeSomeSymbols(Text *text) {
wchar_t atSymbol = L'@';
wchar_t atString[5] = L"(at)";
wchar_t percentSymbol = L'%';
wchar_t percentString[10] = L"<percent>";
wchar_t barsSymbol = L'#';
wchar_t barsString[10] = L"<решетка>";
for (int i = 0; i < text->textSize; i++) {
    for (int j = 0; j < text->sentences[i].sentenceSize; j++) {
        switch (text->sentences[i].symbols[j])
        {
        case L'@':
            changeSentence(&(text->sentences[i]), &atSymbol, atString);
            break;
        case L'#':
            changeSentence(&(text->sentences[i]), &barsSymbol, barsString);
            break;
        case L'%':
            changeSentence(&(text->sentences[i]), &percentSymbol, percentString);
            break;
        default:
            break;
        }
    }
}

}

void changeSentence(Sentence *sentence, wchar_t *flagSymbol, wchar_t *insertWstr) {
wchar_t *pwc;
wchar_t *newWcsentence;
wchar_t *buffer;
int insertionSize;
int tokenSize;
int newSentenceSize = 0;
insertionSize = wcslen(insertWstr);
newWcsentence = (wchar_t*)malloc(1 * sizeof(wchar_t));
newWcsentence[0] = L'\0';
pwc = wcstok(sentence->symbols, flagSymbol, &buffer);
do {
    tokenSize = wcslen(pwc);
    newWcsentence = (wchar_t*)realloc(newWcsentence, (newSentenceSize + tokenSize + 1) * sizeof(wchar_t));
    newSentenceSize += tokenSize;
    wcscat(newWcsentence, pwc);
    newWcsentence = (wchar_t*)realloc(newWcsentence, (newSentenceSize + insertionSize + 1) * sizeof(wchar_t));
    newSentenceSize += insertionSize;
    wcscat(newWcsentence, insertWstr);
    pwc = wcstok(NULL, flagSymbol, &buffer);
} while (pwc != NULL);
newSentenceSize -= insertionSize;
newWcsentence = (wchar_t*)realloc(newWcsentence, (newSentenceSize) * sizeof(wchar_t));
newWcsentence[newSentenceSize] = '\0';
free(sentence->symbols);
sentence->symbols = (wchar_t*)malloc((newSentenceSize + 1) * sizeof(wchar_t));
wcscpy(sentence->symbols, newWcsentence);
sentence->sentenceSize = newSentenceSize;
free(pwc);
free(newWcsentence);

}

Upvotes: 0

Views: 112

Answers (1)

Barmak Shemirani
Barmak Shemirani

Reputation: 31599

Text and Sentence are not defined, it's unclear what they are supposed to be. Just do it in one function.

void realloc_and_copy(wchar_t** dst, int *dstlen, const wchar_t *src)
{
    if(!src)
        return;
    int srclen = wcslen(src);
    *dst = realloc(*dst, (*dstlen + srclen + 1) * sizeof(wchar_t));
    if (*dstlen)
        wcscat(*dst, src);
    else
        wcscpy(*dst, src);
    *dstlen += srclen;
}

int main()
{
    const wchar_t* src = L"He was an old man who fished alone in a skiff \
in the Gulf Stream and he had gone eighty - four days now without tak%ing a fish.\
In the first forty days a boy had been with him.But after forty days without a fish \
the boy’s parents had told him that the old man was now definitely and finally sa@lao, \
which is the worst form of unlucky, and the boy had gone at their orders in another \
boat which caught three good fis#h the first week.";

    wchar_t *buf = wcsdup(src);
    wchar_t *dst = NULL;
    int dstlen = 0;

    wchar_t *context = NULL;
    const wchar_t* delimiter = L"@#%";
    wchar_t *token = wcstok(buf, delimiter, &context);
    while(token)
    {
        const wchar_t* modify = NULL;
        int cursor = token - buf - 1;
        if (cursor >= 0)
            switch(src[cursor])
            {
            case L'@': modify = L"(at)"; break;
            case L'%': modify = L"<percent>"; break;
            case L'#': modify = L"<решетка>"; break;
            }

        //append modified text
        realloc_and_copy(&dst, &dstlen, modify);

        //append token
        realloc_and_copy(&dst, &dstlen, token);

        token = wcstok(NULL, delimiter, &context);
    }

    wprintf(L"%s\n", dst);

    free(buf);
    free(dst);

    return 0;
}

Upvotes: 1

Related Questions