Reputation: 115
I want to read some text files under different categories and build a linked list which consist of different words from these text files and total number of times this words occur in text files. Linked list should be in alphabetical order. But when I run the code just three different words printed hundreds of times and their occur values are always 1. There is no problem reading all words. I tested it by adding printf statements in while loops and it prints all words correctly. I guess insert function has some problems.
#include <stdio.h>
#include <stdlib.h >
#include <locale.h>
typedef struct Term {
char * termName;
int occur;
struct Term * next;
} term;
term * insert(term * root, char * word);
int main (void) {
setlocale(LC_ALL, "Turkish");
FILE *fPtr;
int counter = 1;
char path[50];
snprintf(path, sizeof(path), "dataset\\econ\\%d.txt", counter);
term * terms;
terms = NULL;
while (fPtr = fopen(path, "r")) {
while(!feof(fPtr)) {
char word[20];
fscanf(fPtr, "%s", &word);
terms = insert(terms, word);
}
fclose(fPtr);
counter++;
snprintf(path, sizeof(path), "dataset\\econ\\%d.txt", counter);
}
counter = 1;
snprintf(path, sizeof(path), "dataset\\health\\%d.txt", counter);
while (fPtr = fopen(path, "r")) {
while(!feof(fPtr)) {
char word[20];
fscanf(fPtr, "%s", &word);
terms = insert(terms, word);
}
fclose(fPtr);
counter++;
snprintf(path, sizeof(path), "dataset\\health\\%d.txt", counter);
}
counter = 1;
snprintf(path, sizeof(path), "dataset\\magazin\\%d.txt", counter);
while (fPtr = fopen(path, "r")) {
while(!feof(fPtr)) {
char word[20];
fscanf(fPtr, "%s", &word);
terms = insert(terms, word);
}
fclose(fPtr);
counter++;
snprintf(path, sizeof(path), "dataset\\magazin\\%d.txt", counter);
}
fclose(fPtr);
while (terms -> next != NULL) {
printf("%s: %d\n", terms -> termName, terms -> occur);
terms = terms -> next;
}
}
term * insert(term * root, char * word) {
if (root == NULL) {
root = (term *)malloc(sizeof(term));
root -> next = NULL;
root -> termName = word;
root -> occur = 1;
return root;
} else if((strcmp(root-> termName, word)) < 0) {
term * temp = (term *)malloc(sizeof(term));
temp -> termName = word;
temp -> occur = 1;
temp -> next = root;
return temp;
} else {
term * iter = root;
while ((iter -> next != NULL) && (strcmp(iter -> termName, word) >
0)) {
iter = iter -> next;
if (strcmp(iter -> termName, word) == 0) {
iter -> occur += 1;
return root;
}
}
term * temp = (term *)malloc(sizeof(term));
temp -> next = iter -> next;
iter -> next = temp;
temp -> termName = word;
temp -> occur = 1;
return root;
}
}
Upvotes: 1
Views: 69
Reputation: 8142
This line (and the others like it) are the problem.
temp -> termName = word;
You're assigning whatever word
is pointing to which is the array you declare here:
char word[20];
Which is limited in scope to the inner most while
loop. Once the loop finishes, the memory used by the array is fair game and could be used by another variable which means your code is being hit hard by undefined behaviour. That you're getting any recognisable words at the end is down to pure luck.
So make a copy of it using either of these methods
temp -> termName = strdup(word);
or
temp -> termName = malloc(strlen(word)+1); // Always remember to allocate enough space for the NUL terminating character
strcpy(temp->termName, word);
And don't forget to free it too.
The reason why you're also not seeing the count go up, which in theory it should do since the word
passed in will be the same string as in the list node, is because your check to see if the word already exists in the list is wrong.
The check to see if a string is identical is right, but you'll never trigger it because the while
loop only checks that the string is after the current node.
while ((iter -> next != NULL) && (strcmp(iter -> termName, word) > 0)) {
It'd make more sense to move this block of code outside of the while
loop
if (strcmp(iter -> termName, word) == 0) {
iter -> occur += 1;
return root;
}
Looking at it in more detail, the whole else
part of your insert
routine can't work as it currently is written.
Imagine if you were to insert into it the following strings: "A" "C" and "E". Your code will add them in reverse order, so you'd get "E", "C", "A" in your output.
If you then try adding "D", it'll put it after "C". It'll start by comparing it with "D" and strcmp
will return a positive number. Then it'll compare it with "C" and the loop will stop as that returns a negative number. You then add "D" after "C".
As per the previous block if((strcmp(root-> termName, word)) < 0)
, when strcmp
returns a negative value you want to insert the new node before the one you're comparing. But you can't do that as you don't know what the previous node was.
So by combining those two bits of code, add in some tracking of the of the previous node and a few other tweaks, your insert
function becomes:
term * insert(term * root, char * word) {
if (root == NULL) {
root = (term *)malloc(sizeof(term));
root -> next = NULL;
root -> termName = strdup(word);
root -> occur = 1;
return root;
} else {
term * iter = root, *last = NULL;
while ((iter != NULL) && (strcmp(iter -> termName, word) > 0)) {
last = iter;
iter = iter -> next;
}
if ((iter)&&(strcmp(iter -> termName, word) == 0)) {
iter -> occur += 1;
return root;
} else if (last == NULL) {
term * temp = (term *)malloc(sizeof(term));
temp -> termName = strdup(word);
temp -> occur = 1;
temp -> next = root;
return temp;
} else {
term * temp = (term *)malloc(sizeof(term));
temp -> next = last -> next;
last -> next = temp;
temp -> termName = strdup(word);
temp -> occur = 1;
return root;
}
}
}
So it is now checking to find the node where it is no longer alphabetically before the current node. If the node has the same word, then update the occur
. If we've not yet set last
that means we're at the beginning, so prepend the new node at the beginning and return it. Lastly we know that the last
node is after this word and the iter
node is before it (or doesn't exist), so we insert the new word in between.
Upvotes: 2