YPD
YPD

Reputation: 207

Problem with pointers to three different linked list

I'm having problem with pointers in a C program that count the occurrences of a string or more in a bunch of file. The program take in input a file which contains the paths of the files in which search the occurrences. All the files that i will mention are contained in the same folder of the project, whose name is "find". In my case, the input file is "path.txt":

C:\Users\Utente\Desktop\find\try.txt
C:\Users\Utente\Desktop\find\try1.txt

The try.txt content is:

abc
abc
abc
ac
ac
ac
ac

The try1.txt content is:

ac
ac
ac
ac
abc
abc
abc

My program is composed by 4 files, two header-files and two source files:

find.c:

#include "find.h"

int main(int argc, char * argv[]){

FILE *fInput = NULL;  
FILE *fp = NULL;
char *line1; 
char *line2;
int endOfLineDetected = 0;
size_t nrOfCharRead = 0;
char ch;

fWord *w = NULL;
fWord *start = NULL;
fWord *tail = NULL;

fPath *head = NULL;
fPath *current = NULL;


fInput = fopen(argv[1], "r"); //the file that contains the path of the file in which search.

if(fInput == NULL){
    fprintf(stderr, "Cannot open %s, exiting. . .\n", argv[1]);
    exit(1);
}

while(!endOfLineDetected){ //read line by line the input file in order to save the path in a structure
    line1 = getLineOfAnySize(fInput,128,&endOfLineDetected,&nrOfCharRead);
    fPath *node = malloc (sizeof(fPath));
    node->path = line1;
    node->fileOccurrences = 0;
    node->position = NULL;
    node->next = NULL;

    if(head == NULL){
        current = head = node;
    }else{
        current = current->next = node;
    }
}

fclose(fInput);

//create a linked list of the type fWord, one structure for each word.
do{
    fWord *app = malloc(sizeof(fWord));
    printf("Insert the word to search: ");
    scanf("%s", app->word);
    app->totalOccurences = 0;
    app->p = head;
    app->next = NULL;

    if(start == NULL){
        tail = start = app;
    }else{
        tail = tail->next = app;
    }
    printf("Do you want to insert another word? (Y/N): ");
    scanf(" %c", &ch);
}while(ch == 'y' || ch == 'Y');

w = start; //pointer back to the top of the fWord structure

//traverse all the structure and execute the algorithm
while(w != NULL){
    while(w->p != NULL){
        fp = fopen(w->p->path, "r");
        if(fp == NULL){
            fprintf(stderr, "Cannot open %s, exiting. . .\n", w->p->path);
            exit(1);
        }

        int countLine = 0;
        w->p->fileOccurrences = 0;
        endOfLineDetected = 0;

        while(!endOfLineDetected){
            line2 = getLineOfAnySize(fp,128,&endOfLineDetected,&nrOfCharRead);
            int n = strlen(line2);
            int m = strlen(w->word);
            w->p->fileOccurrences = w->p->fileOccurrences + KMP(line2, w->word, n, m, countLine, w->p);
            countLine = countLine + 1;
        }   

        w->totalOccurences = w->totalOccurences + w->p->fileOccurrences;
        w->p->position = getHead(); // //pointer back to the top of the  fPosition structure
        w->p = w->p->next;
        fclose(fp);
    }
    w->p = head; //pointer back to the top of the fPath structure
}

w = start; //pointer back to the top of the fWord structure

//traverse all the structure and print out the occurrences and their position
while(w != NULL){
    w->p = head;
    printf("WORD %s \r\n", w->word);
    printf("TOTAL %d \r\n", w->totalOccurences);
    while(w->p != NULL){
        printf("FILE %s \r\n", w->p->path);
        printf("OCCURENCES %d   \r\n", w->p->fileOccurrences);
        while (w->p->position != NULL){
            printf("%d %d\r\n", w->p->position->line, w->p->position->character);
            w->p->position = w->p->position->next;
        }
        w->p = w->p->next;
    }
    w = w->next;
}

printf("\r\n"); //the file ends with an empty line

return 0;
}

//method used for read line by line a file
char * getLineOfAnySize(FILE* fp, size_t typicalSize, int *endOfLineDetected,size_t *nrOfCharRead){ 
char *line;       // buffer for our string
int ch;           // we will read line character by character
size_t len = 0;   // number of characters read (character counter)
size_t lineSize = typicalSize;  // initial size of the buffer allocated for the line
*nrOfCharRead = 0;

if(!fp) return NULL; // protection

// allocating the buffer
line = realloc(NULL, sizeof(char)*lineSize); // expected size of the line is up to typicalSize

if (!line) return line; // protection, if we fail to allocate the memory we will return NULL

while (1) { // loop forever     
    ch = fgetc(fp);       // getting character by character from file

    if (ch == '\n') break; // end of line detected - breaking the loop 
    if( ch == EOF)  {
        *endOfLineDetected = 1;
        break; // end of file detected - breaking the loop
     }

    line[len++] = ch;     // store the character in the line buffer, increase character counter

    if (len == lineSize){ // we reached the end of line buffer (no more room)

        lineSize = lineSize + 64; // we have to increase the line size 
        line = realloc(line, sizeof(char)*(lineSize)); // line buffer has new size now

        if (!line) return line; // if we fail to allocate memory we will return NULL
    }
    if( (len == 0) && *endOfLineDetected){ // empty file
        *endOfLineDetected = 1;
        break; 
    } 
}


line[len++] ='\0';  // ending the string (notice there is no '\n' in the string)
*nrOfCharRead = len;

return line;       // return the string
}

find.h:

#include "kmp.h"

char * getLineOfAnySize(FILE* fp, size_t typicalSize, int *endOfLineDetected,size_t *nrOfCharRead);

kmp.c:

#include "kmp.h"

fPosition *head = NULL;
fPosition *current = NULL;

// Function to implement KMP algorithm
int KMP(const char* X, const char* Y, int m, int n, int line, fPath *app){

int count = 0;

// next[i] stores the index of next best partial match
int next[n + 1];

for (int i = 0; i < n + 1; i++)
    next[i] = 0;

for (int i = 1; i < n; i++){
    int j = next[i + 1];

    while (j > 0 && Y[j] != Y[i])
        j = next[j];

    if (j > 0 || Y[j] == Y[i])
        next[i + 1] = j + 1;
}

for (int i = 0, j = 0; i < m; i++){
    if (*(X + i) == *(Y + j)){
        if (++j == n){
            count = count + 1; //count the occurrences of the string in this file   
            fPosition *node = malloc (sizeof(fPosition));
            node->line = line; //the current line
            node->character = i - j + 1; //the shift in which occurs
            node->next = NULL;

            if(head == NULL){
                current = head = node;
            }else{
                current = current->next = node;
            }

            app->position = current;

        }
    }
    else if (j > 0) {
        j = next[j];
        i--;    // since i will be incremented in next iteration
    }
}

    return count; //return the number of occurences found
}

//take the pointer back to the top of fPosition
fPosition * getHead(){ 
     fPosition *app = head;
     head = NULL;
     return app;
}

kmp.h:

#include<stdio.h>
#include<stdlib.h>
#include<string.h>

struct filePath{
char *path; //the file path
struct filePath *next;
};

struct OccurrencesPosition{
int line; //line in which an occurrence is founded
int character; //shift at which the occurrences comes

struct filePath pathInfo;

struct OccurrencesPosition *next; //pointer to the next occurrences
};

struct fileWord{
char word[50]; //the string to search
int totalOccurences; //the total occurrences of the string

int fileOccurrences; //the occurrences of each file
struct OccurrencesPosition *position; //pointer to the linked list which tracks all the occurrences and their positions

struct fileWord *next; //pointer to the next word
};

typedef struct filePath fPath; 
typedef struct fileWord fWord;
typedef struct OccurrencesPosition fPosition;

fPosition * getHead();


int KMP(const char* X, const char* Y, int m, int n, int line, fPath *app);

The problem is that when i run my program passing in input "abc" and "ac" it returns wrong value. More precisely, returns the value corresponding to "ac" in both cases. Here's the execution:

PS C:\Users\Utente\Desktop\find> gcc find.c kmp.c -o "find.exe"
PS C:\Users\Utente\Desktop\find> .\find.exe "path.txt"
Insert the word to search: abc
Do you want to insert another word? (Y/N): Y
Insert the word to search: ac
Do you want to insert another word? (Y/N): N
WORD abc 
TOTAL 6 
FILE C:\Users\Utente\Desktop\find\try.txt 
OCCURENCES 4    
3 0
4 0
5 0
6 0
FILE C:\Users\Utente\Desktop\find\try1.txt 
OCCURENCES 4    
0 0
1 0
2 0
3 0
WORD ac 
TOTAL 8 
FILE C:\Users\Utente\Desktop\find\try.txt 
OCCURENCES 4    
FILE C:\Users\Utente\Desktop\find\try1.txt 
OCCURENCES 4

As you can see, the WORD and TOTAL are correct in both cases, but the occurrences not. They correspond to "ac" in both cases. The correct output should be:

WORD abc 
TOTAL 6 
FILE C:\Users\Utente\Desktop\find\try.txt 
OCCURENCES 3    
0 0
0 1
0 2
FILE C:\Users\Utente\Desktop\find\try1.txt 
OCCURENCES 3   
4 0
5 0
6 0
WORD ac 
TOTAL 8 
FILE C:\Users\Utente\Desktop\find\try.txt 
OCCURENCES 4
3 0
4 0
5 0
6 0    
FILE C:\Users\Utente\Desktop\find\try1.txt 
OCCURENCES 4
0 0
1 0
2 0
3 0

I think that the problem is with the fPosition pointers. Thanks to anyone who helps.

Upvotes: 1

Views: 92

Answers (1)

kiran Biradar
kiran Biradar

Reputation: 12732

You have design issue.

The problem is occurrences info you are maintaining as part of filePath list.

struct filePath{
    char *path; //the file path
    int fileOccurrences; //the occurrences of each file
    struct OccurrencesPosition *position;  // here *****************
    struct filePath *next;
};

And file path info you are maintaining as part of fileWord list.

struct fileWord{
    char word[50]; //the string to search
    int totalOccurences; //the total occurrences of the string
    struct filePath *p; //pointer to the linked list of all the files
    struct fileWord *next; //pointer to the next word
};

Since you only have one file path list, each word in fileWord list is actually pointing to same filepath list.


Every word is pointing to same file path list

fWord *app = malloc(sizeof(fWord));
printf("Insert the word to search: ");
scanf("%s", app->word);

app->p = head; //here

and you are updating the position info inside the filepath for every word.

      w->p->position = getHead(); // //pointer back to the top of the  fPosition structure

Thus filePath list is holding position info only for the latest word you search.


Update:

Your design should look as below.

struct filePath{
    char *path; //the file path
    struct filePath *next;
};

struct OccurrencesPosition{
    int line; //line in which an occurrences is founded
    int character; //shift at which the occurrences comes

    struct filePath pathInfo;

    struct OccurrencesPosition *next; //pointer to the next occurrences
};

struct fileWord{
    char word[50]; //the string to search
    int totalOccurences; //the total occurrences of the string

    int fileOccurrences; //the occurrences of each file
    struct OccurrencesPosition *position; //pointer to the linked list which tracks all the occurrences and their positions

    struct fileWord *next; //pointer to the next word
};

Upvotes: 2

Related Questions