Breno Santos
Breno Santos

Reputation: 174

Counting different words from a text input

I want to do a code that searches a txt file and returns the number of different words and how many times they appear on the text. I' trying to do that, but I'm having a problem on comparing the word read from the input with the words already read. So I'm doing a code that adds a word to the vector of words if is new, and if it isn't, it increments by 1 the word count. But when I'm comparing the words, it doesn't states that they're equal even when they aren't. By exemple: txt is filled with:

test test test.
test test test.

("test." =/= from "test"). And it return 7 different words with 3 being NULL, "3 test" and 1 "test." . That should return 2 words and count 4 on test and count 2 on test.

Can anybody see what is wrong with my code?

#define MAX_PALAVRAS 1024
#define MAX_TAM_PALAVRA 32

typedef struct ocorrencia_ {
    char palavra[MAX_TAM_PALAVRA];
    int pos;
    int num_ocorrencias;
}ocorrencia;

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>


 int main (int argc, char * argv[]){
    ocorrencia palavras[MAX_PALAVRAS];
    int i,palavras_diferentes=0,palavra_atual=0;
    char aux[MAX_TAM_PALAVRA];
    bool nova_palavra=true;
    for (i=0;i<MAX_PALAVRAS;i++){
        palavras[i].pos=-1;
        palavras[i].num_ocorrencias=0;
    }
    FILE * D = fopen("input.txt","r");
    while (!feof(D)){
        char aux2[MAX_TAM_PALAVRA];
        fscanf(D,"%s",aux);
        for (i=0;i<palavras_diferentes;i++){
            if (strcmp(palavras[palavras_diferentes].palavra,aux)==0){
                nova_palavra=false;
                break;
            }
            palavra_atual++;
        }
        if (nova_palavra){
            strcpy(palavras[palavra_atual].palavra,aux);
            palavras_diferentes++;
        }
        palavras[palavra_atual].num_ocorrencias++;
        printf("%s\n",palavras[palavra_atual].palavra);
    }
    fclose (D);
    printf("diferent words=%i\n",palavras_diferentes);
    printf("success!\n");
    return (EXIT_SUCCESS);
}

Thanks for taking or time reading or trying to help!

Upvotes: 0

Views: 79

Answers (1)

francis
francis

Reputation: 9817

Following my comments, here are a few changes that may help you :

-Set palavra_atual to 0 and nova_palavra to true at the beginning of the while loop.

-Test the return of fscanf, add something like if(fscanf(D,"%s",aux)==1){...}

-Test all words ! if (strcmp(palavras[i].palavra,aux)==0)

Here goes the code :

#define MAX_PALAVRAS 1024
#define MAX_TAM_PALAVRA 32

typedef struct ocorrencia_ {
    char palavra[MAX_TAM_PALAVRA];
    int pos;
    int num_ocorrencias;
}ocorrencia;

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>


int main (int argc, char * argv[]){
    ocorrencia palavras[MAX_PALAVRAS];
    int i,palavras_diferentes=0,palavra_atual=0;
    char aux[MAX_TAM_PALAVRA];
    bool nova_palavra=true;
    for (i=0;i<MAX_PALAVRAS;i++){
        palavras[i].pos=-1;
        palavras[i].num_ocorrencias=0;
    }
    FILE * D = fopen("input.txt","r");
    while (!feof(D)){
        palavra_atual=0;
        nova_palavra=true;
        char aux2[MAX_TAM_PALAVRA];
        if(fscanf(D,"%s",aux)==1){
            for (i=0;i<palavras_diferentes;i++){
                if (strcmp(palavras[i].palavra,aux)==0){
                    nova_palavra=false;
                    break;
                }
                palavra_atual++;
            }
            if (nova_palavra==true){
                printf("new word %d %s\n",palavra_atual,aux);
                strcpy(palavras[palavra_atual].palavra,aux);
                palavras_diferentes++;
            }
            palavras[palavra_atual].num_ocorrencias++;
            printf("%s\n",palavras[palavra_atual].palavra);
        }
    }
    fclose (D);
    printf("diferent words=%i\n",palavras_diferentes);
    printf("success!\n");
    return (EXIT_SUCCESS);
}

You will be interesseted by ispunct() of ctypes.h here

Upvotes: 1

Related Questions