Reputation: 27
I'm trying to find all the words with capital letters in a string, but am unable to process my data structure. i seem to be able to print out fileContent, indicating that it is loading in successfully, but my second function is not working on the file.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char* loadFile(char* fileName)
{
FILE *inputFile;
inputFile = fopen(fileName, "r");
//finds the end of the file
fseek(inputFile, 0, SEEK_END);
//stores the size of the file
int size = ftell(inputFile);
//Sets the scan to the start of the file
fseek(inputFile, 0, SEEK_SET);
char *documentStore = (char*)malloc(size);
int i = 0, c;
while((c = fgetc(inputFile)) != EOF)
{
documentStore[i] = c;
i++;
}
return documentStore;
}
void countImportantWords(char* fileContent, char** importantWords, int* frequencyWords)
{
int uniqueWordCount = 0;
int lengthWordStore = 10;
int i = 0;
int recording = 0;
char wordBuffer[50];
int wordBufferCount = 0;
int isWordPresent = 0;
while(fileContent[i] != EOF)
{
//To allocate more memory incase the structure is full
if(uniqueWordCount == lengthWordStore)
{
lengthWordStore += 10;
char** newWordStore = realloc(importantWords, lengthWordStore * sizeof(char*));
int* newFrequencyStore = realloc(frequencyWords, sizeof(int));
importantWords = newWordStore;
frequencyWords = newFrequencyStore;
}
printf("%s", wordBuffer);
//Conditions to fill if its a word
if(fileContent[i] >= 'A' && fileContent[i] <= 'Z' && recording == 0)
{
wordBuffer[0] = fileContent[i];
recording = 1;
}else if(fileContent[i] >= 'a' && fileContent[i] <= 'z' && recording == 1)
{
//each if is to check if the end of word is reached. Any character that is non alphabetical is considered end of word
wordBufferCount += 1;
wordBuffer[wordBufferCount] = fileContent[i];
} else if (fileContent[i] >= 'A' && fileContent[i] <= 'Z' && recording == 1)
{
wordBufferCount += 1;
wordBuffer[wordBufferCount] = fileContent[i];
} else {
//Adding a terminating character so that it strcpy only copies until that point
wordBuffer[wordBufferCount + 1] = '\0';
recording = 0;
//check to see if that word is in the array already, and if it is, it will just increment the frequency
for(int j = 0; j < uniqueWordCount; j++){
if(strcmp(wordBuffer, importantWords[j]) == 0)
{
frequencyWords[j] += 1;
isWordPresent = 1;
}
}
//if its not present, it should assign it to the structure
if(isWordPresent == 0)
{
char* wordStore = (char*)malloc(wordBufferCount * sizeof(char));
strcpy(wordStore, wordBuffer);
uniqueWordCount += 1;
importantWords[uniqueWordCount] = wordStore;
frequencyWords[uniqueWordCount] = 1;
}
}
i++;
}
}
int main() {
char fileName[50];
char *fileContent;
char **importantWords = (char**)malloc(10*sizeof(char**));
int *frequencyWords = (int*)malloc(10*sizeof(int));
printf("Please input the full file path: ");
scanf("%s", fileName);
fileContent = loadFile(fileName);
countImportantWords(fileContent, importantWords, frequencyWords);
int i = 0;
while(importantWords[i] != '\0')
{
printf("%s %d", importantWords[i], frequencyWords[i]);
i++;
}
return 0;
}
I've put in the full file so you can see how the structure was created incase that it is the issue, but ideally what would happen is that the final loop would print out all the words that are important and they're frequency. Currently i'm getting exit code 11, which i'm not sure what it means, but may be worth mentioning. I'd really appreciate any help :)
Upvotes: 0
Views: 542
Reputation: 3613
You can simplify the process dramatically but utilising functions and learning to manage your memory. I wrote a short example which does not take punctuation into account. It just assumes every word is separated by a space, which you can customise to your discretion.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
char* readfile(char* filename){
char* data = NULL;
FILE* file = fopen(filename, "r");
if(file == NULL){
return NULL;
}
fseek(file, 0, SEEK_END);
long size = ftell(file)+1;
fseek(file, 0, SEEK_SET);
data = (char*)malloc(size);
if(data == NULL){
return NULL;
}
fgets(data, (int)size, file);
return data;
}
typedef struct uppercase_t{
char** word;
int count;
}uppercase;
void copy(uppercase* u,char* token){
size_t length = strlen(token);
u->word[u->count] = (char*)malloc(length+1);
if(u->word[u->count] == NULL){
return;
}
strcpy(u->word[u->count], token);
++u->count;
}
void createuppercasedata(uppercase* u, char* data){
const char delimeter[] = " ";
char* token = strtok(data, delimeter);
if(token == NULL){
return;
}
u->word = (char**)malloc(u->count+1);
if(u->word == NULL){
return;
}
if(isupper(token[0])){
copy(u,token);
}
while(token != NULL){
token = strtok(0, delimeter);
if(token != NULL)
if(isupper(token[0])) {
char** reallocated = (char**)realloc(u->word, u->count+1);
if(reallocated == NULL){
return;
}
u->word = reallocated;
copy(u, token);
}
}
}
void destroyuppercasedata(uppercase* u){
for(int index = 0; index < u->count; ++index){
free(u->word[index]);
}
free(u->word);
}
int main(){
char filename[] = "textfile";
char* data = readfile(filename);
if(data == NULL){
return -1;
}
uppercase u = {0};
createuppercasedata(&u, data);
printf("found %i uppercase words\n",u.count);
for(int index = 0; index < u.count; ++index){
printf("%s\n", u.word[index]);
}
destroyuppercasedata(&u);
free(data);
}
The code will allocate a new pointer for each uppercase and memory for the word to be copied too. It will free all the memory it allocated in the structure with destroyuppercasedata
and it will free the initial data that was read from file. Error checking and memory management in C is really important. So utilise those properly.
This was the test file I used. textfile
How many Uppercase words can Be Found In this text File the answer should be Seven
And this was the output to the terminal: How Uppercase Be Found In File Seven
Upvotes: 1