Reputation: 45
I try to count the number of characters, words, lines in a file. The txt file is:
The snail moves like a
Hovercraft, held up by a
Rubber cushion of itself,
Sharing its secret
And here is the code,
void count_elements(FILE* fileptr, char* filename, struct fileProps* properties) // counts chars, words and lines
{
fileptr = fopen(filename, "rb");
int chars = 0, words = 0, lines = 0;
char ch;
while ((ch = fgetc(fileptr)) != EOF )
{
if(ch != ' ') chars++;
if (ch == '\n') // check lines
lines++;
if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\0') // check words
words++;
}
fclose(fileptr);
properties->char_count = chars;
properties->line_count = lines;
properties->word_count = words;
}
But when i print the num of chars, words and lines, outputs are 81, 18, 5 respectively What am i missing? (read mode does not changes anything, i tried "r" as well)
Upvotes: 0
Views: 131
Reputation: 117871
Your function takes both a FILE*
and filename
as arguments and one of them should be removed. I've removed filename
so that the function can be used with any FILE*
, like stdin
.
#include <ctype.h>
#include <stdint.h>
#include <stdio.h>
typedef struct { /* type defining the struct for easier usage */
uintmax_t char_count;
uintmax_t word_count;
uintmax_t line_count;
} fileProps;
/* a helper function to print the content of a fileProps */
FILE* fileProps_print(FILE *fp, const fileProps *p) {
fprintf(fp,
"chars %ju\n"
"words %ju\n"
"lines %ju\n",
p->char_count, p->word_count, p->line_count);
return fp;
}
void count_elements(FILE *fileptr, fileProps *properties) {
if(!fileptr) return;
properties->char_count = 0;
properties->line_count = 0;
properties->word_count = 0;
char ch;
while((ch = fgetc(fileptr)) != EOF) {
++properties->char_count; /* count all characters */
/* use isspace() to check for whitespace characters */
if(isspace((unsigned char)ch)) {
++properties->word_count;
if(ch == '\n') ++properties->line_count;
}
}
}
int main() {
fileProps p;
FILE *fp = fopen("the_file.txt", "r");
if(fp) {
count_elements(fp, &p);
fclose(fp);
fileProps_print(stdout, &p);
}
}
Output for the file you showed in the question:
chars 93
words 17
lines 4
Edit: I just noticed your comment "trying to count only alphabetical letters as a char
". For that you can use isalpha
and replace the while
loop with:
while((ch = fgetc(fileptr)) != EOF) {
if(isalpha((unsigned char)ch)) ++properties->char_count;
else if(isspace((unsigned char)ch)) {
++properties->word_count;
if(ch == '\n') ++properties->line_count;
}
}
Output with the modified version:
chars 74
words 17
lines 4
A version capable of reading "wide" characters (multibyte):
#include <locale.h>
#include <stdint.h>
#include <stdio.h>
#include <wchar.h>
#include <wctype.h>
typedef struct {
uintmax_t char_count;
uintmax_t word_count;
uintmax_t line_count;
} fileProps;
FILE* fileProps_print(FILE *fp, const fileProps *p) {
fprintf(fp,
"chars %ju\n"
"words %ju\n"
"lines %ju\n",
p->char_count, p->word_count, p->line_count);
return fp;
}
void count_elements(FILE *fileptr, fileProps *properties) {
if(!fileptr) return;
properties->char_count = 0;
properties->line_count = 0;
properties->word_count = 0;
wint_t ch;
while((ch = fgetwc(fileptr)) != WEOF) {
if(iswalpha(ch)) ++properties->char_count;
else if(iswspace(ch)) {
++properties->word_count;
if(ch == '\n') ++properties->line_count;
}
}
}
int main() {
setlocale(LC_ALL, "sv_SE.UTF-8"); // set your locale
FILE *fp = fopen("the_file.txt", "r");
if(fp) {
fileProps p;
count_elements(fp, &p);
fclose(fp);
fileProps_print(stdout, &p);
}
}
If the_file.txt
contains one line with öäü
it'll report
chars 3
words 1
lines 1
and for your original file, it'd report the same as above.
Upvotes: 2
Reputation: 5232
The solution I whipped up gives me the same results as the gedit document statistics:
#include <stdio.h>
void count_elements(char* filename)
{
// This can be a local variable as its not used externally. You do not have to put it into the functions signature.
FILE *fileptr = fopen(filename, "rb");
int chars = 0, words = 0, lines = 0;
int read;
unsigned char last_char = ' '; // Save the last char to see if really a new word was there or multiple spaces
while ((read = fgetc(fileptr)) != EOF) // Read is an int as fgetc returns an int, which is a unsigned char that got casted to int by the function (see manpage for fgetc)
{
unsigned char ch = (char)read; // This cast is safe, as it was already checked for EOF, so its an unsigned char.
if (ch >= 33 && ch <= 126) // only do printable chars without spaces
{
++chars;
}
else if (ch == '\n' || ch == '\t' || ch == '\0' || ch == ' ')
{
// Only if the last character was printable we count it as new word
if (last_char >= 33 && last_char <= 126)
{
++words;
}
if (ch == '\n')
{
++lines;
}
}
last_char = ch;
}
fclose(fileptr);
printf("Chars: %d\n", chars);
printf("Lines: %d\n", lines);
printf("Words: %d\n", words);
}
int main()
{
count_elements("test");
}
Please see the comments in the code for remarks and explanations. The code also would filter out any other special control sequences, like windows CRLF and account only the LF
Upvotes: 2