Bulka525
Bulka525

Reputation: 21

Problem with working with utf-8 text file c++

lately I approach a problem when trying to get line from file that is utf-8(?) formatted. I also need to copy that string to clipboard and be able to paste it in .txt file.

#include <iostream>
#include <windows.h>
#include <cstdio>
#include <conio.h>
#include <time.h>
#include <string>
#include <fstream>
#include <wchar.h>
using namespace std;
wstring lastLine;



void mesparse()
{
    wifstream client("Client.txt");
    if(client.is_open())
    {
        client.seekg(-7,ios_base::end);
        int kloop=0;
        while (kloop<1)
        {
            wchar_t ch;
            client.get(ch);


            if(ch == '\n') {                    
                kloop=1;                                
            }
            else {                                  
                client.seekg(-4,ios_base::cur);         
            }}            
        getline(client,lastLine);  


        client.close();
    }
    else
    {
        cout<<"Unable to open client.txt file.";
    }
}
void toClipboard(std::wstring s){
    const wchar_t* text = s.c_str();
int len = wcslen(text);

HGLOBAL hMem = GlobalAlloc(GMEM_MOVEABLE, (len + 1) * sizeof(wchar_t));
wchar_t* buffer = (wchar_t*)GlobalLock(hMem);
wcscpy_s(buffer, len + 1, text);
GlobalUnlock(hMem);

OpenClipboard(NULL);
EmptyClipboard();
SetClipboardData(CF_UNICODETEXT, hMem);
CloseClipboard();
}

int main()
{

mesparse();
toClipboard(lastLine);
wcout<<lastLine<<endl;

}

What I'm trying to copy:

йцукaеёśнгшщㅂхфывапрㅊджэячсмитъбюㅗ
йцукaеёśнгшщㅂхфывапрㅊджэя
йцукaеёśнгшщㅂхфывапрㅊ
йцукaеёśнгшщㅂхфыва

Upvotes: 1

Views: 444

Answers (2)

Rita Han
Rita Han

Reputation: 9710

This is an example code of solution pointed out by @Michael Chourdakis.

string mesparse()
{
    string lastLine = "";

    ifstream client("Client.txt");
    if (client.is_open())
    {
        client.seekg(-7, ios_base::end);
        int kloop = 0;
        while (kloop < 1)
        {
            char ch;
            client.get(ch);


            if (ch == '\n') {
                kloop = 1;
            }
            else {
                client.seekg(-4, ios_base::cur);
            }
        }
        getline(client, lastLine);


        client.close();
    }
    else
    {
        cout << "Unable to open client.txt file.";
    }

    return lastLine;
}

void toClipboard(std::string s) {

    int len;
    // Retrieve the length
    len = MultiByteToWideChar(CP_UTF8, MB_PRECOMPOSED, s.c_str(), -1, NULL, 0);

    HGLOBAL hMem = GlobalAlloc(GMEM_MOVEABLE, (len) * sizeof(wchar_t));
    wchar_t* buffer = (wchar_t*)GlobalLock(hMem);

    // Convert to wide char string
    len = MultiByteToWideChar(CP_UTF8, MB_PRECOMPOSED, s.c_str(), -1, buffer, len);
    GlobalUnlock(hMem);

    wcout << buffer << endl;

    OpenClipboard(NULL);
    EmptyClipboard();
    SetClipboardData(CF_UNICODETEXT, hMem);
    CloseClipboard();
}

int main()
{
    string copiedStr = mesparse();
    if (copiedStr.length() == 0)
        return 0;

    toClipboard(copiedStr);
}

Upvotes: 1

Michael Chourdakis
Michael Chourdakis

Reputation: 11178

CF_UNICODETEXT != UTF-8.

The first is wide chars, the second is 8 bit.

You first have to convert it with MultiByteToWideChar().

Therefore, read all the text to std::string, not to std::wstring. Then get the std::wstring with MultiByteToWideChar() and then copy it to clipboard.

Also, doing character search in UTF-8 text is usually a bad idea (variable encoding).

Upvotes: 3

Related Questions