maosi100
maosi100

Reputation: 25

Filling a hash table with an linked list

I'm working on creating a hash table (array) with linked list collision control for a dictionary of words. The dictionary of words is first loaded into a linked list named "list" using the data structure below. However I have issues in creating the collision control if I have multiple entries per hash code in the array: The linked list is created ad infinitum for multiple entries..

That's the structure I'm using for the entries.

typedef struct node
{
    char word[LENGTH + 1];
    int hash;
    struct node *next;
}
node;

And here's the part to fill the hash table:

    while (list->next != NULL)
    {
        if (table[list->hash].hash != list->hash) // No entry in the array yet
        {
            node *tmp = list->next;
            table[list->hash] = *list;
            table[list->hash].next = NULL;
            list = tmp;
        }
        else                                     // If array is already filled
        {
            node *tmp = list->next;
            list->next = &table[list->hash];
            table[list->hash] = *list;
            list = tmp;
        }
    }

Hope you can help, thanks!

See full code here:

#include <ctype.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>

#define LENGTH 45

const unsigned int N = 4000;

typedef struct node
{
    char word[LENGTH + 1];
    int hash;
    struct node *next;
}
node;

bool load(const char *dictionary);
unsigned int hash(const char *word);

node table[N];

int main(void)
{
    char *dict = "dictionaries/medium";
    bool loaded = load(dict);
    if (!loaded)
    {
        printf("could not load dictionary\n");
        return 1;
    }
    printf("worked\n");
    return 0;
}

bool load(const char *dictionary)
{
    FILE *input = fopen(dictionary, "r");
    if (input == NULL)
    {
        return false;
    }

    node *list = NULL;
    char word[LENGTH + 1];
    int wordcount = 0;

    while(fscanf(input, "%s", word) != EOF)
    {
        if (list == NULL)
        {
            node *n = malloc(sizeof(node));
            if (n == NULL)
            {
                return false;
            }
            strcpy(n->word, word);
            n->hash = hash(word);
            n->next = NULL;
            list = n;
            wordcount++;
        }
        else
        {
            node *n = malloc(sizeof(node));
            if (n == NULL)
            {
                return false;
            }
            strcpy(n->word, word);
            n->hash = hash(word);
            n->next = list;
            list = n;
            wordcount++;
        }
    }
    while (list->next != NULL)
    {
        if (table[list->hash].hash != list->hash)
        {
            node *tmp = list->next;
            table[list->hash] = *list;
            table[list->hash].next = NULL;
            list = tmp;
        }
        else
        {
            node *tmp = list->next;
            list->next = &table[list->hash];
            table[list->hash] = *list;
            list = tmp;
        }
    }
    // test function to print
    printf("%s\n%s\n%s\n%s\n", table[196].word, table[196].next->word, table[196].next->next->word, table[196].next->next->next->word);
    return true;
    fclose(input);
}

unsigned int hash(const char *word)
{
    int hash = 0;

    for (int i = 0; i < strlen(word); i++)
    {
        if (word[i] != '\'')
        {
            hash += ((toupper(word[i]) - 64));
        }
        if (i % 2 == 0)
        {
            hash = (hash * 3);
        }
    }

    hash = hash % N;
    return hash;
}

The dictionaries/medium file contains the following items currently:

maxim
maxim
Nina
maxim
Lukas
Nina

That's also why I am using array element 196 to print out which is the hash code for 'Nina'.

Edit: thanks to the replies so far I have removed redundancies and tried to fill the array as the list is created. However I still have an issue which I think is due to self-referencing when an array spot is already filled. Really lost how to resolve that. New code is here:

#include <ctype.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>

#define LENGTH 45

const unsigned int N = 4000;

typedef struct node
{
    char word[LENGTH + 1];
    int hash;
    struct node *next;
}
node;

bool load(const char *dictionary);
unsigned int hash(const char *word);

node table[N];

int main(void)
{
    char *dict = "dictionaries/medium";
    bool loaded = load(dict);
    if (!loaded)
    {
        printf("could not load dictionary\n");
        return 1;
    }
    printf("worked\n");
    return 0;
}

bool load(const char *dictionary)
{
    FILE *input = fopen(dictionary, "r");
    if (input == NULL)
    {
        return false;
    }

    node *list = NULL;
    char word[LENGTH + 1];
    int wordcount = 0;

    while(fscanf(input, "%s", word) != EOF)
    {
        node *n = malloc(sizeof(node));
        if (n == NULL)
        {
            return false;
        }
        strcpy(n->word, word);
        n->hash = hash(word);
        if (table[n->hash].hash != n->hash)
        {
            n->next = list;
            table[n->hash] = *n;
        }
        else
        {
            n->next = &table[n->hash];
            table[n->hash] = *n;
        }
        free(n);
        wordcount++;
    }

    // test function to print
    printf("%s\n%s\n%s\n", table[196].word, table[196].next->word, table[196].next->next->word);
    return true;
    fclose(input);
}

unsigned int hash(const char *word)
{
    int hash = 0;

    for (int i = 0; i < strlen(word); i++)
    {
        if (word[i] != '\'')
        {
            hash += ((toupper(word[i]) - 64));
        }
        if (i % 2 == 0)
        {
            hash = (hash * 3);
        }
    }

    hash = hash % N;
    return hash;
}

Upvotes: 0

Views: 1176

Answers (1)

Ian Abbott
Ian Abbott

Reputation: 17403

This part of the code is wrong:

    while (list->next != NULL)
    {
        if (table[list->hash].hash != list->hash)
        {
            node *tmp = list->next;
            table[list->hash] = *list;
            table[list->hash].next = NULL;
            list = tmp;
        }
        else
        {
            node *tmp = list->next;
            list->next = &table[list->hash];
            table[list->hash] = *list;
            list = tmp;
        }
    }

In particular, the else block is wrong, and the loop ignores the final element on the list. The loop should be something like this:

    while (list != NULL)
    {
        node *next = list->next;
        node copy = table[list->hash];
        list->next = copy.next;
        table[list->hash] = *list;
        *list = copy;
        list = next;
    }

The original "empty" node elements in the table will end up being copied to the last element of each list in the hash table, which is probably not very useful. (The proposed redesign later in this answer turns the hash table into an array of pointers and does not store any "empty" node contents.)

Another problem is that the input file is never closed because these lines are in the wrong order:

    return true;
    fclose(input);

The fclose call is never reached.

The way that the length of the array table is specified is not defined by C:

const unsigned int N = 4000;
/*...*/
node table[N];

The length needs to be an integer constant expression. That could be fixed by replacing const unsigned int N = 4000; with a macro definition: #define N 4000.


The code design can by simplified by changing node table[N]; to node *table[N];, i.e. by turning it into an array of pointers. Also, there is no need to store the hash in the node. The hash is just an index into the hashtable. Unused entries in the hash table will be empty lists represented by null pointers. (node *table[N] will be implicitly initialized to contain null pointers.)

#include <ctype.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>

#define LENGTH 45

#define N 4000

typedef struct node
{
    char word[LENGTH + 1];
    struct node *next;
}
node;

bool load(const char *dictionary);
unsigned int hash(const char *word);

node *table[N];

int main(void)
{
    char *dict = "dictionaries/medium";
    bool loaded = load(dict);
    if (!loaded)
    {
        printf("could not load dictionary\n");
        return 1;
    }
    printf("worked\n");
    return 0;
}

bool load(const char *dictionary)
{
    FILE *input = fopen(dictionary, "r");
    if (input == NULL)
    {
        return false;
    }

    char word[LENGTH + 1];
    int wordcount = 0;

    while(fscanf(input, "%s", word) != EOF)
    {
        int wordhash = hash(word);
        node *n = malloc(sizeof(node));
        if (n == NULL)
        {
            return false;
        }
        strcpy(n->word, word);
        n->next = table[wordhash];
        table[wordhash] = n;
        wordcount++;
    }
    fclose(input);
    return true;
}

unsigned int hash(const char *word)
{
    int hash = 0;

    for (int i = 0; i < strlen(word); i++)
    {
        if (word[i] != '\'')
        {
            hash += ((toupper(word[i]) - 64));
        }
        if (i % 2 == 0)
        {
            hash = (hash * 3);
        }
    }

    hash = hash % N;
    return hash;
}

Upvotes: 1

Related Questions