Reputation: 95
When compiling my program, I get a LeakSanitizer error where it detected memory leaks at the line word_dict target = (word_dict)malloc(sizeof(dict))
in the newDict
function. I don't free target
because I return the value for use in other functions below. However, I'm struggling to find when I should call free on the allocated memory. I tried freeing spamDict
and nonspamDict
in the bayesian_spam_filer
function but the memory leak error still persists.
typedef struct dictionary dict;
typedef dict* word_dict;
typedef enum {false, true} bool;
/*
linked list, count is for the total word count and
occur is the numbers of the mails that had the word
*/
struct dictionary{
char word[WORDLENGTH + 1];
int occur;
int count;
word_dict next;
bool updated;
};
// if there is no matching words after searching, create a new node
word_dict newDict(char *string, word_dict next){
word_dict target = (word_dict)malloc(sizeof(dict));
int i = 0;
while(string[i] !='\0' && i<WORDLENGTH) {
target->word[i] = string[i];
i++;
}
target->word[i] = 0;
target->count = 1;
target->next = next;
target->occur = 1;
target->updated = true;
return target;
}
/*
search matching words, if a matching word is found
add 1 to count and return true
no matching word, then return false
*/
word_dict searchDict(char* string, word_dict pos){
word_dict first = pos;
if(strcmp(string, MAILSEPARATOR) == 0) { //end of an email
update(first);
return first;
}
string = preprocess(string);
if(string == NULL) {
return first;
}
int result;
word_dict prevPos=pos;
while(pos != NULL){
if((result = strcmp(pos->word, string)) == 0){
if(!pos->updated) {
pos->occur++;
pos->updated = true;
}
pos->count++;
return first;
} else if(result > 0) { // has passed the position and no matching word, need to add node.
if (prevPos == pos){
return newDict(string,pos);
}
prevPos->next = newDict(string, pos);
return first;
}
prevPos = pos;
pos = pos->next;
}
//printf("null found\n");
prevPos->next = newDict(string, pos);
return first;
}
/*
initialize training
reads the sample mails and creates a linked list of
the percentages of the words occuring in the sample mails
*/
word_dict initializeTraining(char* filename){
FILE *fp = NULL;
fp = fopen(filename, "r");
if(fp == NULL) {
printf("no file found\n");
return NULL;
}
char* string;
string = (char*)malloc(sizeof(char)*50);
word_dict first = NULL;
fscanf(fp, "%s\n", string);
string = preprocess(string);
first = newDict(string, NULL);
while(fscanf(fp,"%s", string) == 1) {
first = searchDict(string, first);
}
fclose(fp);
free(string);
return first;
}
/*
tests whether the mail is pam or not
takes the filename of the test mail,
returns true or false depending on the email's content
*/
bool bayesian_spam_filter(char * filename_for_test_email) {
word_dict spamDict=initializeTraining("spam.txt");
word_dict nonspamDict=initializeTraining("not_spam.txt");
#if DEBUG
printDict(spamDict);
printDict(nonspamDict);
#endif
FILE *stream=NULL;
stream = fopen(filename_for_test_email, "r");
if(stream == NULL){
printf("no file found\n");
return false;
}
char* string;
string = (char*)malloc(sizeof(char)*50);
int ps, pn; // probability of spam mail and non-spam mail
double prob = 0.5;
while(fscanf(stream,"%s", string) == 1){
char* tempString; // for handling the errors happening from string being null during preprocessing
tempString = preprocess(string);
if(tempString == NULL){
continue;
}
if((ps = searchTest(tempString, spamDict)) != 0) {
if((pn = searchTest(tempString, nonspamDict)) != 0) {
printf("ps:%3d, pn:%3d, %s\n", ps, pn, tempString);
prob = prob * (double) ps / ((prob* (double)ps + (1 - prob) * (double) pn));
printf("this probability: %.10f\n", prob);
}
}
}
//printf("%d, %d \n", pSProduct, pNProduct);
//proba=(float)(pSProduct/(pSProduct+pNProduct));
printf("Probability of mail being spam: %.10f\n", prob);
fclose(stream);
free(string);
free(spamDict);
free(nonspamDict);
if (prob > 0.9) {
return true;
}
return false;
}
Upvotes: 0
Views: 161
Reputation: 1069
The reason a leak is reported is that in the end of bayesian_spam_filter you are only freeing the first entry in each linked list associated with spamDict and nonSpamDict so the remainder of each list is leaked:
free(spamDict);
free(nonspamDict);
You need to loop through both of those lists to free all the nodes in the list.
Upvotes: 0