nutellasandwich
nutellasandwich

Reputation: 27

How to find words with capital letters in a char using c?

I'm trying to find all the words with capital letters in a string, but am unable to process my data structure. i seem to be able to print out fileContent, indicating that it is loading in successfully, but my second function is not working on the file.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char* loadFile(char* fileName)
{
    FILE *inputFile;

    inputFile = fopen(fileName, "r");

    //finds the end of the file
    fseek(inputFile, 0, SEEK_END);

    //stores the size of the file
    int size = ftell(inputFile);

    //Sets the scan to the start of the file
    fseek(inputFile, 0, SEEK_SET);

    char *documentStore = (char*)malloc(size);

    int i = 0, c;
    while((c = fgetc(inputFile)) != EOF)
    {
        documentStore[i] = c;
        i++;
    }

    return documentStore;
}

void countImportantWords(char* fileContent, char** importantWords, int* frequencyWords)
{
    int uniqueWordCount = 0;
    int lengthWordStore = 10;
    int i = 0;
    int recording = 0;
    char wordBuffer[50];
    int wordBufferCount = 0;
    int isWordPresent = 0;

    while(fileContent[i] != EOF)
    {
        //To allocate more memory incase the structure is full
        if(uniqueWordCount == lengthWordStore)
        {
            lengthWordStore += 10;
            char** newWordStore = realloc(importantWords, lengthWordStore * sizeof(char*));
            int* newFrequencyStore = realloc(frequencyWords, sizeof(int));
            importantWords = newWordStore;
            frequencyWords = newFrequencyStore;
        }

        printf("%s", wordBuffer);
        //Conditions to fill if its a word
        if(fileContent[i] >= 'A' && fileContent[i] <= 'Z' && recording == 0)
        {
            wordBuffer[0] = fileContent[i];
            recording = 1;
        }else if(fileContent[i] >= 'a' && fileContent[i]  <= 'z' && recording == 1)
        {
            //each if is to check if the end of word is reached. Any character that is non alphabetical is considered end of word
            wordBufferCount += 1;
            wordBuffer[wordBufferCount] = fileContent[i];
        } else if (fileContent[i] >= 'A' && fileContent[i] <= 'Z' && recording == 1)
        {
            wordBufferCount += 1;
            wordBuffer[wordBufferCount] = fileContent[i];
        } else {

            //Adding a terminating character so that it strcpy only copies until that point
            wordBuffer[wordBufferCount + 1] = '\0';
            recording = 0;

            //check to see if that word is in the array already, and if it is, it will just increment the frequency
            for(int j = 0; j < uniqueWordCount; j++){
                if(strcmp(wordBuffer, importantWords[j]) == 0)
                {
                    frequencyWords[j] += 1;
                    isWordPresent = 1;
                }
            }

            //if its not present, it should assign it to the structure
            if(isWordPresent == 0)
            {
                char* wordStore = (char*)malloc(wordBufferCount * sizeof(char));
                strcpy(wordStore, wordBuffer);
                uniqueWordCount += 1;
                importantWords[uniqueWordCount] = wordStore;
                frequencyWords[uniqueWordCount] = 1;

            }
        }
        i++;
    }
}



int main() {
    char fileName[50];
    char *fileContent;
    char **importantWords = (char**)malloc(10*sizeof(char**));
    int *frequencyWords = (int*)malloc(10*sizeof(int));

    printf("Please input the full file path: ");
    scanf("%s", fileName);

    fileContent = loadFile(fileName);

    countImportantWords(fileContent, importantWords, frequencyWords);

    int i = 0;
    while(importantWords[i] != '\0')
    {
        printf("%s %d", importantWords[i], frequencyWords[i]);
        i++;
    }

    return 0;
}

I've put in the full file so you can see how the structure was created incase that it is the issue, but ideally what would happen is that the final loop would print out all the words that are important and they're frequency. Currently i'm getting exit code 11, which i'm not sure what it means, but may be worth mentioning. I'd really appreciate any help :)

Upvotes: 0

Views: 542

Answers (1)

Tofu
Tofu

Reputation: 3613

You can simplify the process dramatically but utilising functions and learning to manage your memory. I wrote a short example which does not take punctuation into account. It just assumes every word is separated by a space, which you can customise to your discretion.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
char* readfile(char* filename){
    char* data = NULL;

    FILE* file = fopen(filename, "r");

    if(file == NULL){
        return NULL;
    }

    fseek(file, 0, SEEK_END);

    long size = ftell(file)+1;

    fseek(file, 0, SEEK_SET);

    data = (char*)malloc(size);

    if(data == NULL){
        return NULL;
    }

    fgets(data, (int)size, file);

    return data;
}
typedef struct uppercase_t{
    char** word;
    int count;
}uppercase;

void copy(uppercase* u,char* token){

    size_t length = strlen(token);

    u->word[u->count] = (char*)malloc(length+1);

    if(u->word[u->count] == NULL){
        return;
    }

    strcpy(u->word[u->count], token);

    ++u->count;
}

void createuppercasedata(uppercase* u, char* data){
    const char delimeter[] = " ";

    char* token = strtok(data, delimeter);

    if(token == NULL){
        return;
    }

    u->word = (char**)malloc(u->count+1);

    if(u->word == NULL){
        return;
    }
    if(isupper(token[0])){
        copy(u,token);
    }


    while(token != NULL){

        token = strtok(0, delimeter);

        if(token != NULL)

            if(isupper(token[0])) {
                char** reallocated = (char**)realloc(u->word, u->count+1);

                if(reallocated == NULL){
                    return;
                }

                u->word = reallocated;

                copy(u, token);
            }
    }
}
void destroyuppercasedata(uppercase* u){

    for(int index = 0; index < u->count; ++index){

        free(u->word[index]);

    }

    free(u->word);
}
int main(){

    char filename[] = "textfile";

    char* data = readfile(filename);

    if(data == NULL){
        return -1;
    }

    uppercase u = {0};

    createuppercasedata(&u, data);

    printf("found %i uppercase words\n",u.count);

    for(int index = 0; index < u.count; ++index){
        printf("%s\n", u.word[index]);
    }

    destroyuppercasedata(&u);

    free(data);
}

The code will allocate a new pointer for each uppercase and memory for the word to be copied too. It will free all the memory it allocated in the structure with destroyuppercasedata and it will free the initial data that was read from file. Error checking and memory management in C is really important. So utilise those properly.

This was the test file I used. textfile

How many Uppercase words can Be Found In this text File the answer should be Seven

And this was the output to the terminal: How Uppercase Be Found In File Seven

Upvotes: 1

Related Questions