Reputation: 207
I'm having problem with pointers in a C program that count the occurrences of a string or more in a bunch of file. The program take in input a file which contains the paths of the files in which search the occurrences. All the files that i will mention are contained in the same folder of the project, whose name is "find". In my case, the input file is "path.txt":
C:\Users\Utente\Desktop\find\try.txt
C:\Users\Utente\Desktop\find\try1.txt
The try.txt content is:
abc
abc
abc
ac
ac
ac
ac
The try1.txt content is:
ac
ac
ac
ac
abc
abc
abc
My program is composed by 4 files, two header-files and two source files:
find.c:
#include "find.h"
int main(int argc, char * argv[]){
FILE *fInput = NULL;
FILE *fp = NULL;
char *line1;
char *line2;
int endOfLineDetected = 0;
size_t nrOfCharRead = 0;
char ch;
fWord *w = NULL;
fWord *start = NULL;
fWord *tail = NULL;
fPath *head = NULL;
fPath *current = NULL;
fInput = fopen(argv[1], "r"); //the file that contains the path of the file in which search.
if(fInput == NULL){
fprintf(stderr, "Cannot open %s, exiting. . .\n", argv[1]);
exit(1);
}
while(!endOfLineDetected){ //read line by line the input file in order to save the path in a structure
line1 = getLineOfAnySize(fInput,128,&endOfLineDetected,&nrOfCharRead);
fPath *node = malloc (sizeof(fPath));
node->path = line1;
node->fileOccurrences = 0;
node->position = NULL;
node->next = NULL;
if(head == NULL){
current = head = node;
}else{
current = current->next = node;
}
}
fclose(fInput);
//create a linked list of the type fWord, one structure for each word.
do{
fWord *app = malloc(sizeof(fWord));
printf("Insert the word to search: ");
scanf("%s", app->word);
app->totalOccurences = 0;
app->p = head;
app->next = NULL;
if(start == NULL){
tail = start = app;
}else{
tail = tail->next = app;
}
printf("Do you want to insert another word? (Y/N): ");
scanf(" %c", &ch);
}while(ch == 'y' || ch == 'Y');
w = start; //pointer back to the top of the fWord structure
//traverse all the structure and execute the algorithm
while(w != NULL){
while(w->p != NULL){
fp = fopen(w->p->path, "r");
if(fp == NULL){
fprintf(stderr, "Cannot open %s, exiting. . .\n", w->p->path);
exit(1);
}
int countLine = 0;
w->p->fileOccurrences = 0;
endOfLineDetected = 0;
while(!endOfLineDetected){
line2 = getLineOfAnySize(fp,128,&endOfLineDetected,&nrOfCharRead);
int n = strlen(line2);
int m = strlen(w->word);
w->p->fileOccurrences = w->p->fileOccurrences + KMP(line2, w->word, n, m, countLine, w->p);
countLine = countLine + 1;
}
w->totalOccurences = w->totalOccurences + w->p->fileOccurrences;
w->p->position = getHead(); // //pointer back to the top of the fPosition structure
w->p = w->p->next;
fclose(fp);
}
w->p = head; //pointer back to the top of the fPath structure
}
w = start; //pointer back to the top of the fWord structure
//traverse all the structure and print out the occurrences and their position
while(w != NULL){
w->p = head;
printf("WORD %s \r\n", w->word);
printf("TOTAL %d \r\n", w->totalOccurences);
while(w->p != NULL){
printf("FILE %s \r\n", w->p->path);
printf("OCCURENCES %d \r\n", w->p->fileOccurrences);
while (w->p->position != NULL){
printf("%d %d\r\n", w->p->position->line, w->p->position->character);
w->p->position = w->p->position->next;
}
w->p = w->p->next;
}
w = w->next;
}
printf("\r\n"); //the file ends with an empty line
return 0;
}
//method used for read line by line a file
char * getLineOfAnySize(FILE* fp, size_t typicalSize, int *endOfLineDetected,size_t *nrOfCharRead){
char *line; // buffer for our string
int ch; // we will read line character by character
size_t len = 0; // number of characters read (character counter)
size_t lineSize = typicalSize; // initial size of the buffer allocated for the line
*nrOfCharRead = 0;
if(!fp) return NULL; // protection
// allocating the buffer
line = realloc(NULL, sizeof(char)*lineSize); // expected size of the line is up to typicalSize
if (!line) return line; // protection, if we fail to allocate the memory we will return NULL
while (1) { // loop forever
ch = fgetc(fp); // getting character by character from file
if (ch == '\n') break; // end of line detected - breaking the loop
if( ch == EOF) {
*endOfLineDetected = 1;
break; // end of file detected - breaking the loop
}
line[len++] = ch; // store the character in the line buffer, increase character counter
if (len == lineSize){ // we reached the end of line buffer (no more room)
lineSize = lineSize + 64; // we have to increase the line size
line = realloc(line, sizeof(char)*(lineSize)); // line buffer has new size now
if (!line) return line; // if we fail to allocate memory we will return NULL
}
if( (len == 0) && *endOfLineDetected){ // empty file
*endOfLineDetected = 1;
break;
}
}
line[len++] ='\0'; // ending the string (notice there is no '\n' in the string)
*nrOfCharRead = len;
return line; // return the string
}
find.h:
#include "kmp.h"
char * getLineOfAnySize(FILE* fp, size_t typicalSize, int *endOfLineDetected,size_t *nrOfCharRead);
kmp.c:
#include "kmp.h"
fPosition *head = NULL;
fPosition *current = NULL;
// Function to implement KMP algorithm
int KMP(const char* X, const char* Y, int m, int n, int line, fPath *app){
int count = 0;
// next[i] stores the index of next best partial match
int next[n + 1];
for (int i = 0; i < n + 1; i++)
next[i] = 0;
for (int i = 1; i < n; i++){
int j = next[i + 1];
while (j > 0 && Y[j] != Y[i])
j = next[j];
if (j > 0 || Y[j] == Y[i])
next[i + 1] = j + 1;
}
for (int i = 0, j = 0; i < m; i++){
if (*(X + i) == *(Y + j)){
if (++j == n){
count = count + 1; //count the occurrences of the string in this file
fPosition *node = malloc (sizeof(fPosition));
node->line = line; //the current line
node->character = i - j + 1; //the shift in which occurs
node->next = NULL;
if(head == NULL){
current = head = node;
}else{
current = current->next = node;
}
app->position = current;
}
}
else if (j > 0) {
j = next[j];
i--; // since i will be incremented in next iteration
}
}
return count; //return the number of occurences found
}
//take the pointer back to the top of fPosition
fPosition * getHead(){
fPosition *app = head;
head = NULL;
return app;
}
kmp.h:
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
struct filePath{
char *path; //the file path
struct filePath *next;
};
struct OccurrencesPosition{
int line; //line in which an occurrence is founded
int character; //shift at which the occurrences comes
struct filePath pathInfo;
struct OccurrencesPosition *next; //pointer to the next occurrences
};
struct fileWord{
char word[50]; //the string to search
int totalOccurences; //the total occurrences of the string
int fileOccurrences; //the occurrences of each file
struct OccurrencesPosition *position; //pointer to the linked list which tracks all the occurrences and their positions
struct fileWord *next; //pointer to the next word
};
typedef struct filePath fPath;
typedef struct fileWord fWord;
typedef struct OccurrencesPosition fPosition;
fPosition * getHead();
int KMP(const char* X, const char* Y, int m, int n, int line, fPath *app);
The problem is that when i run my program passing in input "abc" and "ac" it returns wrong value. More precisely, returns the value corresponding to "ac" in both cases. Here's the execution:
PS C:\Users\Utente\Desktop\find> gcc find.c kmp.c -o "find.exe"
PS C:\Users\Utente\Desktop\find> .\find.exe "path.txt"
Insert the word to search: abc
Do you want to insert another word? (Y/N): Y
Insert the word to search: ac
Do you want to insert another word? (Y/N): N
WORD abc
TOTAL 6
FILE C:\Users\Utente\Desktop\find\try.txt
OCCURENCES 4
3 0
4 0
5 0
6 0
FILE C:\Users\Utente\Desktop\find\try1.txt
OCCURENCES 4
0 0
1 0
2 0
3 0
WORD ac
TOTAL 8
FILE C:\Users\Utente\Desktop\find\try.txt
OCCURENCES 4
FILE C:\Users\Utente\Desktop\find\try1.txt
OCCURENCES 4
As you can see, the WORD and TOTAL are correct in both cases, but the occurrences not. They correspond to "ac" in both cases. The correct output should be:
WORD abc
TOTAL 6
FILE C:\Users\Utente\Desktop\find\try.txt
OCCURENCES 3
0 0
0 1
0 2
FILE C:\Users\Utente\Desktop\find\try1.txt
OCCURENCES 3
4 0
5 0
6 0
WORD ac
TOTAL 8
FILE C:\Users\Utente\Desktop\find\try.txt
OCCURENCES 4
3 0
4 0
5 0
6 0
FILE C:\Users\Utente\Desktop\find\try1.txt
OCCURENCES 4
0 0
1 0
2 0
3 0
I think that the problem is with the fPosition pointers. Thanks to anyone who helps.
Upvotes: 1
Views: 92
Reputation: 12732
You have design issue.
The problem is occurrences info you are maintaining as part of filePath
list.
struct filePath{
char *path; //the file path
int fileOccurrences; //the occurrences of each file
struct OccurrencesPosition *position; // here *****************
struct filePath *next;
};
And file path info you are maintaining as part of fileWord
list.
struct fileWord{
char word[50]; //the string to search
int totalOccurences; //the total occurrences of the string
struct filePath *p; //pointer to the linked list of all the files
struct fileWord *next; //pointer to the next word
};
Since you only have one file path list, each word in fileWord
list is actually pointing to same filepath
list.
Every word is pointing to same file path list
fWord *app = malloc(sizeof(fWord));
printf("Insert the word to search: ");
scanf("%s", app->word);
app->p = head; //here
and you are updating the position info inside the filepath
for every word.
w->p->position = getHead(); // //pointer back to the top of the fPosition structure
Thus filePath
list is holding position info only for the latest word you search.
Update:
Your design should look as below.
struct filePath{
char *path; //the file path
struct filePath *next;
};
struct OccurrencesPosition{
int line; //line in which an occurrences is founded
int character; //shift at which the occurrences comes
struct filePath pathInfo;
struct OccurrencesPosition *next; //pointer to the next occurrences
};
struct fileWord{
char word[50]; //the string to search
int totalOccurences; //the total occurrences of the string
int fileOccurrences; //the occurrences of each file
struct OccurrencesPosition *position; //pointer to the linked list which tracks all the occurrences and their positions
struct fileWord *next; //pointer to the next word
};
Upvotes: 2