Reputation: 1919
How to count words that only start with capitals?
See the example code from this demonstration:
https://codeforwin.org/2018/02/c-program-count-occurrences-of-all-words-a-file.html
Code example:
https://onlinegdb.com/HJgWn-K2E
/**
* C program to count occurrences of all words in a file.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAX_WORDS 50
char *strlwr(char *str)
{
unsigned char *p = (unsigned char *)str;
while (*p) {
*p = tolower((unsigned char)*p);
p++;
}
return str;
}
int main()
{
FILE *fptr;
char path[100];
int i, len, index, isUnique;
// List of distinct words
char words[MAX_WORDS][50];
char word[50];
// Count of distinct words
int count[MAX_WORDS];
/* Input file path */
printf("Enter file path: ");
scanf("%s", path);
/* Try to open file */
fptr = fopen(path, "r");
/* Exit if file not opened successfully */
if (fptr == NULL)
{
printf("Unable to open file.\n");
printf("Please check you have read privileges.\n");
exit(EXIT_FAILURE);
}
// Initialize words count to 0
for (i=0; i<MAX_WORDS; i++)
count[i] = 0;
index = 0;
while (fscanf(fptr, "%s", word) != EOF)
{
// Convert word to lowercase
strlwr(word);
// Remove last punctuation character
len = strlen(word);
if (ispunct(word[len - 1]))
word[len - 1] = '\0';
// Check if word exits in list of all distinct words
isUnique = 1;
for (i=0; i<index && isUnique; i++)
{
if (strcmp(words[i], word) == 0)
isUnique = 0;
}
// If word is unique then add it to distinct words list
// and increment index. Otherwise increment occurrence
// count of current word.
if (isUnique)
{
strcpy(words[index], word);
count[index]++;
index++;
}
else
{
count[i - 1]++;
}
}
// Close file
fclose(fptr);
/*
* Print occurrences of all words in file.
*/
printf("\nOccurrences of all distinct words in file: \n");
for (i=0; i<index; i++)
{
/*
* %-15s prints string in 15 character width.
* - is used to print string left align inside
* 15 character width space.
*/
printf("%-15s %d\n", words[i], count[i]);
}
return 0;
}
In this code example, they make all words lower case letters then count all those words.
Instead: How do you only add the words to the unique list if it starts with a capital, then count all the occurances of that word
Should you combine fscanf with if (isupper[0])
Test.txt file
Any girl jumped over one boy.
Some car skipped to some boy.
One town drove over the town.
Any town ran under some dog.
Some girl drove to a town.
The boy walked under any town.
A town jumped over any car.
Any boy jumped from a car.
A dog ran over a boy.
A girl ran to some car.
A car ran under the girl.
The car ran on any town.
One dog walked under any dog.
A car jumped on some town.
A boy ran to a boy.
The dog drove over a boy.
A boy jumped over the car.
Some car drove on some girl.
One boy drove under some girl.
A girl walked over some dog.
Expected Output:
Any 7
Some 3
One 4
The 6
A 8
Current Output:
any 7
girl 7
jumped 5
over 7
one 4
boy 10
some 10
car 9
skipped 1
to 4
town 8
drove 5
the 6
ran 6
under 5
dog 6
a 13
walked 3
from 1
on 3
Possible Solutions:
// skip the word if it does not contain a capital letter at start
if (islower(word[0])) {
continue;
}
Then create another FOR loop which checks how often those words appear in the file if they are starting with lowercase or uppercase
Upvotes: 0
Views: 670
Reputation: 84579
You are working in the correct direction, you just need to rearrange your code a bit. While you can use separate arrays to try and keep track each unique word, while coordinating that index with an index in an array holding the number of times the word occurs and the same index in another array holding whether the word occurs in your file capitalized or not -- but there is a better way.
Whenever you have to coordinate differing types of data as a single-unit, you need to be thinking struct
. With a struct
, you can coordinate each unique word, whether it appears capitalized in the file, and the number of times it occurs (case-insensitive) as a single unit, e.g.
typedef struct { /* use a struct to hold */
char word[MAX_WORD]; /* lowercase word, and */
int cap, count; /* if it appeast capitalized, and its count */
} words_t;
Now you can simply create an array of words_t
and be able to add each word
(lowercase) as the word within the struct, capture whether it ever appears cap
italized and the total count
of the times it occurs.
This simplifies the logic in your code. Now you simply declare an array of words_t
, e.g.
#define MAX_WORD 50 /* max word size */
#define MAX_WORDS 512 /* max number of words */
...
/* Array of struct of distinct words, initialized all zero */
words_t words[MAX_WORDS] = {{ .word = "" }};
You ask for your filename -- validating every user input, e.g.
/* Input file path */
printf ("Enter file path: ");
if (scanf ("%s", path) != 1) { /* validate every input */
fputs ("error: invalid file path or cancellation.\n", stderr);
return 1;
}
Now loop over your words -- while protecting your array bounds as well as reading each word:
while (index < MAX_WORDS && /* protect array bounds */
fscanf (fptr, "%s", word) == 1) { /* while valid word read */
Now the crux of the logic needed to store and keep track of which words have appeared capitalized comes into play. First you need a flag to test whether the word appears capitalized to capture the information before you convert the word to lowercase (along with your isunique
flag). You capture whether the word appears capitalized, simply by testing the first character with isupper()
, e.g.
int iscap = 0, isunique = 1; /* is captial, is unique flags */
if (isupper (*word)) /* is the word uppercase */
iscap = 1;
Rather than just trimming a single punctuation, you can just as easily trim all punctuation before converting the word to lowercase, e.g.
/* remove all trailing punctuation characters */
len = strlen (word); /* get length */
while (len && ispunct(word[len - 1])) /* only if len > 0 */
word[--len] = 0;
strlwr (word); /* convert word to lowercase */
Now all that remains from a data storage standpoint is to loop to determine whether the word isunique
, and if it is not, simply set the cap
flag if iscap
is TRUE and increment the count. If it is unique, then, after exiting the loop, you copy the word to a new element in your array (using memcpy
you have the length, no need to scan again) and you do the same thing with cap
and count
, incrementing index
when done,
/* check if word exits in list of all distinct words */
for (i = 0; i < index; i++) {
if (strcmp(words[i].word, word) == 0) {
isunique = 0; /* set unique flag zero */
if (iscap) /* if capital flag set */
words[i].cap = iscap; /* set capital flag in struct */
words[i].count++; /* increment word count */
break; /* bail - done */
}
}
if (isunique) { /* if unique, add to array, increment index */
memcpy (words[index].word, word, len + 1); /* have len */
if (iscap) /* if cap flag set */
words[index].cap = iscap; /* set capital flag in struct */
words[index++].count++; /* increment count & index */
}
}
fclose (fptr); /* close file */
All that remains is looping over the elements stored in the array and checking the .cap
member to determine if the word appears capitalized before printing the word and the number of times it appears (note: your Expected Output: counts are wrong based on your example text)
/*
* Print occurrences of all words in file.
*/
puts ("\nOccurrences of all distinct words with Cap in file:");
for (i = 0; i < index; i++) {
if (words[i].cap) {
strcpy (word, words[i].word);
*word = toupper (*word);
/*
* %-15s prints string in 15 character width.
* - is used to print string left align inside
* 15 character width space.
*/
printf("%-15s %d\n", word, words[i].count);
}
}
(note: the use of puts
instead of printf
for "Occurrences of..."
as there is no conversion needed... a good compiler will optimize this for you)
Putting it altogether, you could do:
/**
* C program to count occurrences of all words in a file.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#define MAX_WORD 50 /* max word size */
#define MAX_WORDS 512 /* max number of words */
#ifndef PATH_MAX
#define PATH_MAX 2048 /* max path (defined for Linux in limits.h) */
#endif
typedef struct { /* use a struct to hold */
char word[MAX_WORD]; /* lowercase word, and */
int cap, count; /* if it appeast capitalized, and its count */
} words_t;
char *strlwr (char *str) /* no need for unsigned char */
{
char *p = str;
while (*p) {
*p = tolower(*p);
p++;
}
return str;
}
int main (void) {
FILE *fptr;
char path[PATH_MAX], word[MAX_WORD];
size_t i, len, index = 0;
/* Array of struct of distinct words, initialized all zero */
words_t words[MAX_WORDS] = {{ .word = "" }};
/* Input file path */
printf ("Enter file path: ");
if (scanf ("%s", path) != 1) { /* validate every input */
fputs ("error: invalid file path or cancellation.\n", stderr);
return 1;
}
fptr = fopen (path, "r"); /* open file */
if (fptr == NULL) { /* validate file open */
fputs ( "Unable to open file.\n"
"Please check you have read privileges.\n", stderr);
exit (EXIT_FAILURE);
}
while (index < MAX_WORDS && /* protect array bounds */
fscanf (fptr, "%s", word) == 1) { /* while valid word read */
int iscap = 0, isunique = 1; /* is captial, is unique flags */
if (isupper (*word)) /* is the word uppercase */
iscap = 1;
/* remove all trailing punctuation characters */
len = strlen (word); /* get length */
while (len && ispunct(word[len - 1])) /* only if len > 0 */
word[--len] = 0;
strlwr (word); /* convert word to lowercase */
/* check if word exits in list of all distinct words */
for (i = 0; i < index; i++) {
if (strcmp(words[i].word, word) == 0) {
isunique = 0; /* set unique flag zero */
if (iscap) /* if capital flag set */
words[i].cap = iscap; /* set capital flag in struct */
words[i].count++; /* increment word count */
break; /* bail - done */
}
}
if (isunique) { /* if unique, add to array, increment index */
memcpy (words[index].word, word, len + 1); /* have len */
if (iscap) /* if cap flag set */
words[index].cap = iscap; /* set capital flag in struct */
words[index++].count++; /* increment count & index */
}
}
fclose (fptr); /* close file */
/*
* Print occurrences of all words in file.
*/
puts ("\nOccurrences of all distinct words with Cap in file:");
for (i = 0; i < index; i++) {
if (words[i].cap) {
strcpy (word, words[i].word);
*word = toupper (*word);
/*
* %-15s prints string in 15 character width.
* - is used to print string left align inside
* 15 character width space.
*/
printf("%-15s %d\n", word, words[i].count);
}
}
return 0;
}
Example Use/Output
Using your posted input
$ ./bin/unique_words_with_cap
Enter file path: dat/girljumped.txt
Occurrences of all distinct words with Cap in file:
Any 7
One 4
Some 10
The 6
A 13
(note: "Some/some"
appears 10
times and "A/a"
appears 13
times instead of the 3/8
shown in your Expected Output:, which you can confirm by simple count)
Look things over and let me know if you have further questions.
Upvotes: 1