Reputation: 3543
I'm trying to read data from XML file and store every element ("< some data/>") in vector container vector<TCHAR*>
, why the Task Manager shows the memory usage much greater than vector size(~80mb instead of ~59mb) :
#define _UNICODE
#include<tchar.h>
#include<iostream>
#include<windows.h>
#include<vector>
using namespace std;
HANDLE hFile;
HANDLE hThread;
vector<TCHAR*> tokens;
DWORD tokensSize;
DWORD WINAPI Thread(LPVOID lpVoid);
void main()
{
tokensSize = 0;
hFile = CreateFile("db.xml",GENERIC_READ,0,NULL,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,NULL);
if(hFile == INVALID_HANDLE_VALUE) {
cout<<"CreateFile Error # "<<GetLastError()<<endl;
}
DWORD fileSize = GetFileSize(hFile,NULL);
cout<<"fileSize = "<<fileSize<<" bytes = "<<fileSize/1024/1024<<" mb"<<endl;
TCHAR* buffer = new TCHAR[fileSize / sizeof(TCHAR) + 1];
ZeroMemory(buffer,fileSize);
DWORD bytesRead;
if(!ReadFile(hFile,buffer,fileSize,&bytesRead,NULL)){
cout<<"ReadFile Error # "<<GetLastError()<<endl;
}
CloseHandle(hFile);
hThread = CreateThread(NULL,0,Thread,(LPVOID)buffer,0,NULL);
WaitForSingleObject(hThread,INFINITE);
for(int i=0;i<tokens.size();i++)
tokensSize+=(_tcslen(tokens[i])+1)*sizeof(TCHAR);
cout<<"vector size = "<<tokensSize<<" bytes = "<<tokensSize/1024/1024<<" mb"<<endl;
cin.get();
}
DWORD WINAPI Thread(LPVOID lpVoid)
{
wstring entireDB = (TCHAR*)lpVoid;
delete[]lpVoid;
wstring currentElement;
wstring::size_type lastPos = 0;
wstring::size_type next;
next = entireDB.find(_T(">"),lastPos);
TCHAR* szStr;
do
{
currentElement = entireDB.substr(lastPos,next+1-lastPos);
szStr = new TCHAR[currentElement.length()+1];
_tcscpy(szStr,currentElement.c_str());
tokens.push_back(szStr);
lastPos = next+1;
next = entireDB.find(_T(">"),lastPos);
}
while(next != wstring::npos);
entireDB.clear();
return 0;
}
OUTPUT:~ fileSize = 57mb vectorSize = 58mb
but the TaskManager shows ~ 81mb. What am I doing wrong? THNX!
Upvotes: 0
Views: 1480
Reputation: 66254
First, as Aesthete as pointed out, you never clear the token vector once you're finished with it. This should be done, or change the token vector to utilize self-cleaning content like std::string or std::wstring.
Which brings me to the side-by-side below. Please review this against your existing code. There are a number of changes you'll want to compare. The one you will likely not see until you cmopile+run is the memory footprint difference, which may surprise you.
Major Changes
tokens
is now a vector of std::wstring
rather than raw wchar_t pointersMultiByteToWideChar
to translate the input file.std::wstring
dynamically as the thread parameter. This removes one full copy of the file image. The thread is responsible for delete
ing the wstring
once finished parsing the content._beginthreadex()
for starting the thread. The fundamental reason for this is because of the C/C++ runtime usage. In the past the runtime sets up various thread-local-storage that must be properly cleaned, and are so when using _beginthreadex()
. It is almost identical to CreateThread(), but honestly I look forward to the day when MS has their stuff together and gives us std::thread
officially like the rest of the civilized world.Minor/Meaningless Changes
tokens
vector.tokens
vector.I hope this gives you some ideas on cleaning this up, and more importantly, how yoy can do almost the entire task you're given without having to go new
and delete
nuts.
Notes: this does NOT check the input file for a byte-order-mark. I'm taking it on faith that your claim it is UTF8 is straight-up and doesn't have a BOM at the file beginning. If your input file does have a BOM, you need to adjust the code that reads the file in to account for this.
#include <windows.h>
#include <tchar.h>
#include <process.h>
#include <iostream>
#include <vector>
#include <string>
using namespace std;
// global map of tokens
vector<wstring> tokens;
// format required by _beginthreadex()
unsigned int _stdcall ThreadProc(void *p);
int main(int argc, char *argv[])
{
HANDLE hThread = NULL;
std::string xml;
std::wstring* pwstr = NULL;
// check early exit
if (argc != 2)
{
cout << "Usage: " << argv[0] << " filename" << endl;
return EXIT_FAILURE;
}
// use runtime library for reading the file content. the WIN32 CreateFile
// API is required for some things, but not for general file ops.
HANDLE hFile = CreateFileA(argv[1], GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (hFile != INVALID_HANDLE_VALUE)
{
DWORD dwFileSize = GetFileSize(hFile, NULL);
if (dwFileSize > 0)
{
// allocate a string large enough for the whole file.
std::string xml(dwFileSize, 0);
DWORD bytesRead = 0;
if (ReadFile(hFile, &xml.at(0), dwFileSize, &bytesRead, NULL) && (bytesRead == dwFileSize))
{
// invoke MB2WC to determine wide-char requirements
int ires = MultiByteToWideChar(CP_UTF8, 0, xml.c_str(), -1, NULL, 0);
if (ires > 0)
{
// allocate a wstring for our thread parameter.
pwstr = new wstring(ires, 0);
MultiByteToWideChar(CP_UTF8, 0, xml.c_str(), -1, &pwstr->at(0), ires);
// launch thread. it own the wstring we're sending, including cleanup.
hThread = (HANDLE)_beginthreadex(NULL, 0, ThreadProc, pwstr, 0, NULL);
}
}
}
// release the file handle
CloseHandle(hFile);
}
// wait for potential thread
if (hThread != NULL)
{
WaitForSingleObject(hThread, INFINITE);
CloseHandle(hThread);
}
// report space taken by tokens
size_t tokensSize = 0;
for (vector<wstring>::const_iterator it = tokens.begin(); it != tokens.end(); ++it)
tokensSize += it->size()+1;
cout << "tokens count = " << tokens.size() << endl
<< "tokens size = "<< tokensSize <<" bytes" << endl;
cin.get();
}
// our thread parameter is a dynamic-allocated wstring.
unsigned int _stdcall ThreadProc(void *p)
{
// early exit on null insertion
if (p == NULL)
return EXIT_FAILURE;
// use string passed to us.
wstring* pEntireDB = static_cast<wstring*>(p);
wstring::size_type last = 0;
wstring::size_type next = pEntireDB->find(L'>',last);
while(next != wstring::npos)
{
tokens.push_back(pEntireDB->substr(last, next-last+1));
last = next+1;
next = pEntireDB->find(L'>', last);
}
// delete the wstring (no longer needed)
delete pEntireDB;
return EXIT_SUCCESS;
}
Upvotes: 1
Reputation: 18858
You allocate memory here, in the do-while
loop:
szStr = new TCHAR[currentElement.length()+1];
And you never release it with the delete
operator
Upvotes: 1