OzzyOsbourne
OzzyOsbourne

Reputation: 45

Counting chars, words and lines in a file

I try to count the number of characters, words, lines in a file. The txt file is:

The snail moves like a
Hovercraft, held up by a
Rubber cushion of itself,
Sharing its secret

And here is the code,

void count_elements(FILE* fileptr, char* filename, struct fileProps* properties) // counts chars, words and lines 
{
    fileptr = fopen(filename, "rb"); 
    int chars = 0, words = 0, lines = 0; 
    char ch;
    while ((ch = fgetc(fileptr)) != EOF  )
    {
        if(ch != ' ') chars++;
        if (ch == '\n') // check lines 
            lines++;
        if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\0') // check words
            words++;
      
    
    }
    fclose(fileptr); 
    properties->char_count = chars;
    properties->line_count = lines; 
    properties->word_count = words;

}

But when i print the num of chars, words and lines, outputs are 81, 18, 5 respectively What am i missing? (read mode does not changes anything, i tried "r" as well)

Upvotes: 0

Views: 131

Answers (2)

Ted Lyngmo
Ted Lyngmo

Reputation: 117871

Your function takes both a FILE* and filename as arguments and one of them should be removed. I've removed filename so that the function can be used with any FILE*, like stdin.

#include <ctype.h>
#include <stdint.h>
#include <stdio.h>

typedef struct { /* type defining the struct for easier usage */
    uintmax_t char_count;
    uintmax_t word_count;
    uintmax_t line_count;
} fileProps;

/* a helper function to print the content of a fileProps */
FILE* fileProps_print(FILE *fp, const fileProps *p) {
    fprintf(fp,
            "chars %ju\n"
            "words %ju\n"
            "lines %ju\n",
            p->char_count, p->word_count, p->line_count);
    return fp;
}

void count_elements(FILE *fileptr, fileProps *properties) {
    if(!fileptr) return;

    properties->char_count = 0;
    properties->line_count = 0;
    properties->word_count = 0;

    char ch;
    while((ch = fgetc(fileptr)) != EOF) {
        ++properties->char_count; /* count all characters */

        /* use isspace() to check for whitespace characters */
        if(isspace((unsigned char)ch)) {
            ++properties->word_count;      
            if(ch == '\n') ++properties->line_count;
        }
    }
}

int main() {
    fileProps p;

    FILE *fp = fopen("the_file.txt", "r");
    if(fp) {
        count_elements(fp, &p);
        fclose(fp);

        fileProps_print(stdout, &p);
    }
}

Output for the file you showed in the question:

chars 93
words 17
lines 4

Edit: I just noticed your comment "trying to count only alphabetical letters as a char". For that you can use isalpha and replace the while loop with:

    while((ch = fgetc(fileptr)) != EOF) {
        if(isalpha((unsigned char)ch)) ++properties->char_count;
        else if(isspace((unsigned char)ch)) {
            ++properties->word_count;
            if(ch == '\n') ++properties->line_count;
        }
    }

Output with the modified version:

chars 74
words 17
lines 4

A version capable of reading "wide" characters (multibyte):

#include <locale.h>
#include <stdint.h>
#include <stdio.h>
#include <wchar.h>
#include <wctype.h>

typedef struct {
    uintmax_t char_count;
    uintmax_t word_count;
    uintmax_t line_count;
} fileProps;

FILE* fileProps_print(FILE *fp, const fileProps *p) {
    fprintf(fp,
            "chars %ju\n"
            "words %ju\n"
            "lines %ju\n",
            p->char_count, p->word_count, p->line_count);
    return fp;
}

void count_elements(FILE *fileptr, fileProps *properties) {
    if(!fileptr) return;

    properties->char_count = 0;
    properties->line_count = 0;
    properties->word_count = 0;

    wint_t ch;
    while((ch = fgetwc(fileptr)) != WEOF) {
        if(iswalpha(ch)) ++properties->char_count;
        else if(iswspace(ch)) {
            ++properties->word_count;
            if(ch == '\n') ++properties->line_count;
        }
    }
}

int main() {
    setlocale(LC_ALL, "sv_SE.UTF-8");      // set your locale
    FILE *fp = fopen("the_file.txt", "r");
    if(fp) {
        fileProps p;
        count_elements(fp, &p);
        fclose(fp);
        fileProps_print(stdout, &p);
    }
}

If the_file.txt contains one line with öäü it'll report

chars 3
words 1
lines 1

and for your original file, it'd report the same as above.

Upvotes: 2

Nidhoegger
Nidhoegger

Reputation: 5232

The solution I whipped up gives me the same results as the gedit document statistics:

#include <stdio.h>

void count_elements(char* filename)
{
    // This can be a local variable as its not used externally. You do not have to put it into the functions signature.
    FILE *fileptr = fopen(filename, "rb"); 
    int chars = 0, words = 0, lines = 0; 
    int read;
    unsigned char last_char = ' '; // Save the last char to see if really a new word was there or multiple spaces
    while ((read = fgetc(fileptr)) != EOF) // Read is an int as fgetc returns an int, which is a unsigned char that got casted to int by the function (see manpage for fgetc)
    {
        unsigned char ch = (char)read; // This cast is safe, as it was already checked for EOF, so its an unsigned char.

        if (ch >= 33 && ch <= 126) // only do printable chars without spaces
        {
            ++chars;
        }
        else if (ch == '\n' || ch == '\t' || ch == '\0' || ch == ' ')
        {
            // Only if the last character was printable we count it as new word
            if (last_char >= 33 && last_char <= 126)
            {
                ++words;
            }
            if (ch == '\n')
            {
                ++lines;
            }
        }
        last_char = ch;     
    }
    fclose(fileptr); 
    
    printf("Chars: %d\n", chars);
    printf("Lines: %d\n", lines);
    printf("Words: %d\n", words);

}

int main()
{
    count_elements("test");
}

Please see the comments in the code for remarks and explanations. The code also would filter out any other special control sequences, like windows CRLF and account only the LF

Upvotes: 2

Related Questions