Mana
Mana

Reputation: 95

Still getting memory leak errors when trying to free malloc'ed pointer of struct

When compiling my program, I get a LeakSanitizer error where it detected memory leaks at the line word_dict target = (word_dict)malloc(sizeof(dict)) in the newDict function. I don't free target because I return the value for use in other functions below. However, I'm struggling to find when I should call free on the allocated memory. I tried freeing spamDict and nonspamDict in the bayesian_spam_filer function but the memory leak error still persists.

typedef struct dictionary dict;
typedef dict* word_dict;
typedef enum {false, true} bool;

/*
 linked list, count is for the total word count and
 occur is the numbers of the mails that had the word
 */
struct dictionary{
    char word[WORDLENGTH + 1];
    int occur;
    int count;
    word_dict next;
    bool updated;
};


// if there is no matching words after searching, create a new node
word_dict newDict(char *string, word_dict next){
    word_dict target = (word_dict)malloc(sizeof(dict));
    int i = 0;

    while(string[i] !='\0' && i<WORDLENGTH) {
        target->word[i] = string[i];
        i++;
    }
    target->word[i] = 0;
    target->count = 1;
    target->next = next;
    target->occur = 1;
    target->updated = true;
    return target;
}

/*
 search matching words, if a matching word is found
 add 1 to count and return true
 no matching word, then return false
*/
word_dict searchDict(char* string, word_dict pos){
    word_dict first = pos;
    if(strcmp(string, MAILSEPARATOR) == 0) { //end of an email
        update(first);
        return first;
    }

    string = preprocess(string);
    if(string == NULL) {
        return first;
    }
    
    int result;
    word_dict prevPos=pos;
    while(pos != NULL){
        if((result = strcmp(pos->word, string)) == 0){
            if(!pos->updated) {
                pos->occur++;
                pos->updated = true;
            }
            pos->count++;
            return first;
        } else if(result > 0) { // has passed the position and no matching word, need to add node.
            if (prevPos == pos){
                return newDict(string,pos);
            }
            prevPos->next = newDict(string, pos);
            return first;
        }
        prevPos = pos;
        pos = pos->next;
    }
    //printf("null found\n");
    prevPos->next = newDict(string, pos);
    return first;
}

/*
 initialize training
 reads the sample mails and creates a linked list of
 the percentages of the words occuring in the sample mails
*/
word_dict initializeTraining(char* filename){
    FILE *fp = NULL;
    fp = fopen(filename, "r");
    if(fp == NULL) {
        printf("no file found\n");
        return NULL;
    }
    
    char* string;
    string = (char*)malloc(sizeof(char)*50);
    word_dict first = NULL;
    fscanf(fp, "%s\n", string);
    string = preprocess(string);
    first = newDict(string, NULL);
    
    while(fscanf(fp,"%s", string) == 1) {
        first = searchDict(string, first);
    }
    fclose(fp);
    free(string);
    return first;
}

/*
 tests whether the mail is pam or not
 takes the filename of the test mail,
 returns true or false depending on the email's content
*/
bool bayesian_spam_filter(char * filename_for_test_email) {
    word_dict spamDict=initializeTraining("spam.txt");
    word_dict nonspamDict=initializeTraining("not_spam.txt");
    
#if DEBUG
    printDict(spamDict);
    printDict(nonspamDict);
#endif
    
    FILE *stream=NULL;
    stream = fopen(filename_for_test_email, "r");
    if(stream == NULL){
        printf("no file found\n");
        return false;
    }
    
    char* string;
    string = (char*)malloc(sizeof(char)*50);
    
    int ps, pn; // probability of spam mail and non-spam mail
    double prob = 0.5;
    while(fscanf(stream,"%s", string) == 1){
        char* tempString; // for handling the errors happening from string being null during preprocessing
        tempString = preprocess(string);
        if(tempString == NULL){
            continue;
        }

        if((ps = searchTest(tempString, spamDict)) != 0) {
            if((pn = searchTest(tempString, nonspamDict)) != 0) {
                printf("ps:%3d, pn:%3d, %s\n", ps, pn, tempString);
                prob = prob * (double) ps / ((prob* (double)ps + (1 - prob) * (double) pn));
                printf("this probability: %.10f\n", prob);
            }
        }
    }

    //printf("%d, %d \n", pSProduct, pNProduct);
    //proba=(float)(pSProduct/(pSProduct+pNProduct));
    printf("Probability of mail being spam: %.10f\n", prob);
    fclose(stream);
    free(string);
    free(spamDict);
    free(nonspamDict);
    if (prob > 0.9) {
        return true;
    }
    return false;
}

Upvotes: 0

Views: 161

Answers (1)

Tim Boddy
Tim Boddy

Reputation: 1069

The reason a leak is reported is that in the end of bayesian_spam_filter you are only freeing the first entry in each linked list associated with spamDict and nonSpamDict so the remainder of each list is leaked:

free(spamDict);
free(nonspamDict);

You need to loop through both of those lists to free all the nodes in the list.

Upvotes: 0

Related Questions