Reputation: 769
I have been given an assignment. There is a dictionary of 25 files and each file has random text involving random IP addresses. The task is to find out and output the count of unique IP addresses among all files using the pthread
library in C.
I think I have solved the race condition on count variable by mutual exclusion. But, still there is a bug and the code has different count value in each execution.
Here is the code, please suggest fixes for the bug:
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <dirent.h>
#include <pthread.h>
#include <string.h>
//declaring structure of arguments to give arguments to thread function
struct arg_struct
{
char *arg1; //argument 1 : to pass directory name to thread function
struct dirent *arg2; //argument 2: to pass file name to thread function
};
//declaring structure of pointer which will point unique ip addresses
struct uniqueip
{
char *ip;
};
struct filenames
{
char full_filename[256];
};
struct uniqueip u[200];
int count=0;// global count variable stores total unique ip addresses.
void *ReadFile(void *thread_no);//thread declaration
pthread_mutex_t mutex;
int main(int argc, char *argv[])
{
DIR *dir; //directory stream
FILE *file; //file stream
struct dirent *ent; // directory entry structure
char *line = NULL; // pointer to
size_t len = 1000; //the length of bytes getline will allocate
size_t read;
char full_filename[256]; //will hold the entire file path with
//file name to read
int x=0;
pthread_attr_t attr;
int rc;
long thread_no;
void *status;
void *ReadFile(void *thread_no);
// check the arguments
if(argc < 2)
{
printf("Not enough arguments supplied\n");
return -1;
}
if(argc > 2)
{
printf("Too many arguments supplied\n");
return -1;
}
struct arg_struct args;
args.arg1 = argv[1];
pthread_mutex_init(&mutex, NULL); // initializing mutex
/* Initialize and set thread detached attribute */
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
// try to open the directory given by the argument
if ((dir = opendir (argv[1])) != NULL)
{
/* print all the files and directories within directory */
while ((ent = readdir (dir)) != NULL)
{
// Check if the list is a regular file
if(ent->d_type == DT_REG)
{
//Get the number of files first so that we would know number
//of threads to be created
x++;
}
}
}
pthread_t thread[x];
struct filenames filenames[x];
thread_no=0;
// try to open the directory given by the argument
if ((dir = opendir (argv[1])) != NULL)
{
/* print all the files and directories within directory */
while ((ent = readdir (dir)) != NULL)
{
// Check if the list is a regular file
if(ent->d_type == DT_REG)
{
// Create the absolute path of the filename
snprintf(filenames[thread_no].full_filename, sizeof filenames[thread_no].full_filename,
"./%s/%s", argv[1], ent->d_name);
//creating threads to read files
args.arg2 = ent; //assigning file name to argument 2
printf("main: creating thread %ld %s \n", thread_no,ent->d_name);
rc = pthread_create(&thread[thread_no], &attr, ReadFile, (void *) &args);
if (rc)
{
printf("ERROR; return code from pthread_create() is %d\n", rc);
exit(-1);
}
thread_no++;
}
}
// Close the directory structure
closedir (dir);
}
else
{
/* could not open directory */
perror ("");
return -1;
}
/* Free attribute and wait for the other threads*/
pthread_attr_destroy(&attr);
for(thread_no=0; thread_no<x; thread_no++)
{
rc = pthread_join(thread[thread_no], &status);
if (rc)
{
printf("ERROR; return code from pthread_join() is %d\n", rc);
exit(-1);
}
printf("Main: completed join with thread %ld having a status of %ld\n",thread_no,(long)status);
}
printf("Main: program completed. Exiting.\n");
printf("total no. of unique ip addresses are %d\n",count-1);
pthread_mutex_destroy(&mutex);
pthread_exit(NULL);
return 0;
}
void *ReadFile(void *thread_no)
{ // in thread function
struct filenames *my_data;
my_data = (struct filenames *)thread_no;
char full_filename[256];
FILE *file; //file stream
char *line = NULL;
char *split = NULL;
size_t len = 1000; // pointer to the length of bytes getline will allocate
size_t read;
const char s[2]=" "; //used as string split to get ip address
char *token;
int flag = 0,j;
// open the file
file = fopen(my_data -> full_filename, "r");
// file was not able to be open
if (file != NULL)
{
// Print out each line in the file
while ((read = getline(&line, &len, file)) != -1)
{
split=line;
token = strtok(split,s);
pthread_mutex_lock(&mutex);
if(count==0){
//locking mutex variable to avoid race condition
u[count].ip=malloc(sizeof(token)+1);
strcpy(u[count].ip,token);
printf("%d ------ %s\n",count,u[count].ip);
free(u[count].ip);
count++;
}
pthread_mutex_unlock(&mutex); // unlocking mutex
//comparing recently received ip address to all the stored unique ip address.
for(j=0;j<count;j++)
{
if(!(strcmp(u[j].ip,token)))
{
break;
}
else
{
if(j==count-1){
pthread_mutex_lock(&mutex); //locking mutex variable to avoid race condition
u[count].ip=malloc(sizeof(read));
strcpy(u[count].ip,token);
printf("%d ------ %s\n",count,u[count].ip);
count++;
free(u[count].ip);
pthread_mutex_unlock(&mutex); // unlocking mutex
}
}
}
}
}
fclose(file);
pthread_exit((void*) thread_no);
}
Upvotes: 0
Views: 140
Reputation: 239041
There's several issues in this code.
You only ever create one instance of arg_struct
, but you re-use it and pass it to every thread. This means that by the time a thread starts, the value of the arg_struct
you passed it may have changed. You need to give each thread its own arg_struct
- eg. you could declare an array of them alongside the pthread_t
array:
pthread_t thread[x];
struct arg_struct args[x];
A similar problem exists with the struct dirent *
pointer inside arg_struct
- the data pointed to by the struct dirent *
returned by readdir()
may be overwritten by the next call to readdir()
on the same directory stream. There are a few ways to solve this, but one way is to replace the char *arg1;
and struct dirent *
in arg_struct
with a buffer to hold the filename:
struct arg_struct
{
char full_filename[256]; //will hold the entire file path with
//file name to read
};
The main thread can then be changed to put the filename straight into the arg_struct
:
snprintf(args[thread_no].full_filename, sizeof args[thread_no].full_filename, "./%s/%s", argv[1], ent->d_name);
In the ReadFile()
function, this creates an array of one element and then tries to write to the (non-existent) second element, which has undefined behaviour:
char * argv[1];
argv[1]= my_data->arg1;
That code can be removed entirely, though - now that main()
is constructing the full filename for the thread, the thread can just directly open it from the the arg_struct
:
file = fopen(my_data->full_filename, "r");
(The thread doesn't need to worry about argv[1]
at all anymore).
Your thread function is reading the shared count
variable without holding the mutex - you need to lock the mutex before executing if (count == 0)
, and don't unlock it until after the for ()
loop (otherwise, you might get two threads deciding to add an IP to the same array location).
When you try to create a copy of the string you want to store, you aren't allocating enough space: sizeof read
is always the fixed size of a size_t
variable and isn't related to the size of the string you're copying. You want:
u[count].ip = malloc(strlen(token) + 1);
strcpy(u[count].ip, token);
You don't want to immediately free the u[count].ip
, either: you need that string to stay allocated. Remove the free(u[count].ip);
lines.
There's some easy optimisations you could make, once you get it working. For example, because count
only increases and the u[]
array is static below the value of count
, you can lock the mutex, save a copy of count
then unlock the mutex. Loop up to the saved value of count
- if you find the string then you can just move straight onto the next line of your input file. It's only if you don't find the string that you need to re-lock the mutex, then continue from the saved count value up to the current count value (which might have increased in the meantime), adding the new string to the array (and incrementing count
) if nececssary.
Upvotes: 1