R0gue
R0gue

Reputation: 31

Using fgets() together with strcmp() strcmp is not properly comparing

My current task is to code a function that's given a FILE pointer and a pointer to a string. The function should analyze how many times the string occurs in the given file and return the value as a whole number. It also needs to pay attention to case sensitivity. In my current program, I take the word "dog" as a string to be found in the file. But it keeps giving me 0 even tho the .txt file has the word dog three times in it. This is my first post here and I checked the other posts about this topic, but they couldn't fix it.

This is what I tried:

#include <stdio.h>
#include <string.h>

int searchAndCount(FILE *fp, const char *searchWord) {
    int count = 0;
    char buffer[4096];
    
    while ((fgets(buffer, sizeof(buffer), fp)) != NULL) { 
        buffer[strlen(buffer) - 1] = '\0';
        if (strcmp(buffer, searchWord) == 0) {
            count++;
        }
    }
    return count;
}

int main() {
    FILE *fp;
    int searchedWord;
    const char *c = "dog";
    
    fp = fopen("test.txt", "r");
    if (fp == NULL) {
        perror("File couldn't open properly");
        return 1;
    }
    searchedWord = searchAndCount(fp, c);
    printf("The word 'dog' occurs %d-times in the file\n", searchedWord);

    fclose(fp);

    return 0;
}

my test.txt looks like this:

dog dog dogoggo dog.

and I get this:

The word 'dog' occurs 0-times in the file

Edit: So judging from the comments, it seems like i need to implement strtok() i will be researching this function.

But using:

int searchAndCount(FILE *fp, const char *searchWord) {
    int count = 0;
    char buffer[4096];
    
    while ((fscanf(fp, "%4095s", buffer)) == 1) { 
        if (strcmp(buffer, searchWord) == 0) {
            count++;
        }
    }
    return count;
}

more or less solved the problem whereas " dog. " does not get counted because of the dot.

Solution:

int searchAndCount(FILE *fp, const char *searchWord) {
    int count = 0;
    char buffer[4096];
    
    for (;;) {
        fscanf(fp, "%*[^a-zA-Z]");
        if (fscanf(fp, "%4095[a-zA-Z]", buffer) != 1)
            break;
        if (strcmp(buffer, searchWord) == 0)
            count++;
    }
    return count;
}

Upvotes: 0

Views: 169

Answers (2)

chqrlie
chqrlie

Reputation: 144989

Since you are matching words and not complete lines, you should use fscanf instead of fgets:

#include <stdio.h>
#include <string.h>

int searchAndCount(FILE *fp, const char *searchWord) {
    int count = 0;
    char buffer[4096];
    
    // %4095s means store up to 4095 characters into buffer
    // before the null terminator
    while (fscanf(fp, "%4095s", buffer) == 1) { 
        if (strcmp(buffer, searchWord) == 0) {
            count++;
        }
    }
    return count;
}

int main(void) {
    FILE *fp;
    int count;
    const char *str = "dog";
    
    fp = fopen("test.txt", "r");
    if (fp == NULL) {
        perror("File couldn't open properly");
        return 1;
    }
    count = searchAndCount(fp, str);
    printf("The word 'dog' occurs %d times in the file\n", count);

    fclose(fp);

    return 0;
}

If you want to ignore punctuation, here is a more elaborate version:

int searchAndCount(FILE *fp, const char *searchWord) {
    int count = 0;
    char buffer[4096];
    
    // repeat indefinitely
    for (;;) {
        // consume any separators (sequences of non letters)
        fscanf(fp, "%*[^a-zA-Z]");
        // try and read the next word
        if (fscanf(fp, "%4095[a-zA-Z]", buffer) != 1)
            break;
        // test the target word
        if (strcmp(buffer, searchWord) == 0)
            count++;
    }
    return count;
}

Upvotes: 1

chux
chux

Reputation: 154156

Instead of strcmp(), which can match if the entire line of input is only that, use strstr() to look for multiple occurrences within a line.

Code below uses s++ to count 4 when input line is "ababa aba abax\n" and key is "aba". If the count should have been 1, research strtok().

size_t len = strlen(searchWord);
while (fgets(buffer, sizeof buffer, fp)) { 
  char *s = buffer;
  while ((s = strstr(s, searchWord)) != NULL) {
    count++;

    // Use s++ to search for keys within matches
    s++; // Resume searching at the next character.

    // OR continue at the end of the match.
    s += len);
  }
}
return count;

Upvotes: 3

Related Questions