Reputation: 2975
I have to implement an application where user passes multiple words via command line and the application finds count of the word in each line of file. Each word will search the file in its own thread.
So far I have implemented it as single threaded app.
The code looks like:
//Below function reads file line and returns it
char* readLine(FILE* file, char* line)
{
if (file == NULL) {
printf("Error: file pointer is null.");
exit(1);
}
int maximumLineLength = 128;
char *lineBuffer = (char *) malloc(sizeof(char) * maximumLineLength);
if (lineBuffer == NULL) {
printf("Error allocating memory for line buffer.");
exit(1);
}
char ch = getc(file);//Get each character
int count = 0;
//loop for line or EOF
while ((ch != '\n') && (ch != EOF))
{
if (count == maximumLineLength)
{
maximumLineLength += 128;
lineBuffer = realloc(lineBuffer, maximumLineLength);
if (lineBuffer == NULL)
{
printf("Error reallocating space for line buffer.");
exit(1);
}
}
lineBuffer[count] = ch;
count++;
ch = getc(file);
}
lineBuffer[count] = '\0';//Add null character
line = (char *) malloc(sizeof(char) * (count + 1));
strncpy(line, lineBuffer, (count + 1));
free(lineBuffer);
return line;
}
//Below function finds the occurance of
//word in the line
//Need to refine to take into consideration
//scenarios such that {"Am"," am "," am","?Am",".Am"}etc
int findWord(char* line,char* word)
{
int count=0;
int lineLen = strlen(line);
int wordLen = strlen(word);
char* temp= (char *) malloc(sizeof(char) * (lineLen+1));
strcpy(temp,line);
while(true)
{
if( strstr(temp,word) == NULL)
break;
strcpy(temp, strstr(temp,word));
// printf("@@%s\n",temp);
strcpy(temp,temp+wordLen+1);
// printf("##%s\n",temp);
count++;
}
//printf("%d\n",count);
free(temp);
return count;
}
//Below function fills the linked list for data structure lineCount
//with word occurance statistics
//line by line and the total
//The number of elements in the list would be number of lines in the
//file
LineCount* findCount(FILE* file, char* word,LineCount** lineCountHead)//Make it multithreaded fn()
{
LineCount* lineHead= NULL;
char* line = NULL;
int lineNumber=1;
int count=0;
if (file == NULL) {
printf("Error: file pointer is null.");
exit(1);
}
while (!feof(file)) {
LineCount* temp=NULL;
line = readLine(file, line);
//printf("%s\n", line);
count=findWord(line,word);
//Critical Section Start
temp=LineCountNode(lineNumber,count);
addToLineCountList(temp,lineCountHead);
//Criticla Section End
lineNumber++;
}
free(line);
return lineHead;
}
So basically I want my calling thread function to be LineCount* findCount(FILE* file, char* word,LineCount** lineCountHead)
My understanding is that, the file will be accessed - only for read purpose by the threads, so no need to take care of synchronization.
Currently I am opening the file as:
pFile = fopen (argv[1],"r");
. My question is how do I open in read shared mode ?
I know in C++
there exists a read shared mode. How to achieve this in c
?
Also how do I write my function LineCount* findCount(FILE* file, char* word,LineCount** lineCountHead)
in the form required by thread call function i.e. the form void* fn(void*)
Upvotes: 0
Views: 2878
Reputation: 86333
While in read-only mode there are no issues with the file itself, the IO functions in the standard C library are not designed to be usable from multiple threads in parallel. They are thread-safe (or at least, I think so) but using them correctly from multiple threads is not trivial.
At the lowest level, each FILE
structure contains a file position pointer - or the IO functions maintain an OS-provided pointer. Having multiple threads mess with the file cursor position sounds like a good way to make your life more difficult than it should be.
The best approach would be to open your file multiple times - once in each thread. Each thread would then have its own FILE
pointer, stream buffer etc. Note that this is not unique to C & POSIX threads - its an inherent issue with using multiple threads.
In any case, I am unsure what you are trying to achieve by using multiple threads. Generally search operations like this are I/O bound - multithreaded accesses to the same file are quite likely to make things worse.
The only case where it might make sense is if you had a huge amount of strings to search for and you had a single I/O thread feeding all other threads through a common buffer. That would distribute the CPU-intensive part, without causing undue I/O...
Upvotes: 1