Frank du Plessis
Frank du Plessis

Reputation: 73

need help rehashing a hashtable in c

I want to rehash a hashtable by allocating space for a new table, traverse the old table, and for each element, compute a new hash value and then link it into the new table. I have linked lists as entries into the hashtable thus the second for loop whilst traversing the old hashtable. I also want to free the old table, but first get the elements into the new table correctly.

I need help, where am I going wrong in traversing the old table? Also can I just point the original ht to the newht at the end? I need to free the old table(prevtable) afterwards also, which I will figure out later.

typedef struct hashtable {
    htentry_ptr *table;         /*<< a pointer to the underlying table        */
    unsigned int size;          /*<< the current size of the underlying table */
    unsigned int num_entries;   /*<< the current number of entries            */
    float max_loadfactor;       /*<< the maximum load factor before the
                                 *   underlying table is resized              */
    unsigned short idx;         /*<< the index into the delta array           */
    unsigned int (*hash)(void *, unsigned int); /*<< a pointer to the hash function   */
    int (*cmp)(void *, void *);         /*<< a pointer to the comparison
                                         *   function                         */
} hashtable_t;

The rehash function looks like this

static void rehash(hashtab_ptr ht)
{
    hashtab_ptr prevtable;
    /* store reference to the old table */
    prevtable->table = ht->table;
    htentry_ptr p;
    unsigned int i;
    unsigned int newidx;
    printf("\nrehashing\n");
    ht->size = getsize(prevtable);
    printf("\nnew table size %d\n", ht->size);
    ht->table = calloc(ht->size , sizeof(htentry_t));
    for (i = 0; i < prevtable->size; i++) {
        for (p = prevtable->table[i]; p; p = p->next_ptr) {
            newidx = ht->hash(p->key, ht->size);
            if(ht->table[newidx]){
                htentry_ptr next;
                htentry_ptr prev = NULL;
                next = ht->table[newidx];
                printf("\ncollision adding to linked list\n");
                while (next) {
                    prev = next;
                    next = next->next_ptr;
                }
                prev->next_ptr = p;
                p->next_ptr = NULL;
            } else {
                ht->table[newidx] = p; 
                ht->table[newidx]->next_ptr = NULL;
                ht->num_entries++;
            }
        }
    }
}

inserting into the hashtable. When the table gets too dense the rehash function is called at the end of the insert.

int ht_insert(hashtab_ptr ht, void *key, void *value)
{
/* key is the id of the variable like num1 and value is number 
index = value 
*/

unsigned int N = ht->size;
unsigned int ne;
float current_loadfactor;
int k;
htentry_ptr p;
p = calloc(1,sizeof(htentry_t));
p->key = key;
p->value = value;
k = ht->hash(key, ht->size);
if(ht->table[k]){
    htentry_ptr next;
    htentry_ptr prev = NULL;
    /* theres already something in the index*/
    next = ht->table[k];
    printf("\ncollision adding to linked list");
    while (next) {
        prev = next;
        next = next->next_ptr;
    }
    ht->num_entries++;
    prev->next_ptr = p;
    p->next_ptr = NULL;
} else {
    ht->table[k] = p;
    ht->table[k]->next_ptr = NULL; 
    ht->num_entries++;
}
ne = ht->num_entries;
current_loadfactor = ne / N;
if (current_loadfactor > ht->max_loadfactor) {
    rehash(ht);
}

Upvotes: 0

Views: 5227

Answers (3)

Frank du Plessis
Frank du Plessis

Reputation: 73

I think is might be the solution, but im not 100% sure.

static void rehash(hashtab_ptr ht)
{
        unsigned int old_size, new_size;
        unsigned int newindex;
        unsigned int i;
        htentry_ptr q, p;
        htentry_ptr *new_table;
        old_size = ht->size;
        /*gets new size in prime table */
        new_size = getsize(ht);
        new_table = malloc(sizeof(htentry_t) * new_size);
        /* nullify the new table */
        for (i = 0; i < new_size; i++) {
            new_table[i] = NULL;
        }
        printf("\n*****rehashing******\n");
        ht->size = new_size; 
        printf("%s %d\n", "new size:", new_size);
        for (i = 0; i < old_size; i++) {
            p = ht->table[i];
            while (p) {
                q = p->next_ptr;
                newindex = ht->hash(p->key, new_size);
                /*
                temp = malloc(sizeof(htentry_t));
                temp->key = p->key;
                temp->value = p->value;
                temp->next_ptr = new_table[ht->hash(temp->key, next_size)];
                new_table[ht->hash(temp->key, next_size)] = temp;
                */
                if (new_table[newindex]) {
                    p->next_ptr = new_table[newindex];
                    new_table[newindex] = p;    
                } else {
                    new_table[newindex] = p;
                    new_table[newindex]->next_ptr = NULL;
                }
                p = q;
            }
        }

    free(ht->table);
    ht->table = new_table;  
}

Upvotes: 0

John Hammond
John Hammond

Reputation: 477

Also can I just point the original ht to the newht at the end?

No.

The pointer ht is a copy on the local function stack. Changing the value with ht = newht; just changes the copy.

The easiest solution would be to let your rehash() function return the pointer to the new hashtable.

static hashtab_ptr rehash(hashtab_ptr ht)
{
  [...]
  return newht;
}

Then you can call it like:

current_ht = rehash(current_ht);

The second solution would be to change the prototype to pass a double pointer:

static void rehash(hashtab_ptr *ht)
{
  [...]
  *ht = newht;
}

This means that you need to change the use of ht everywhere in your rehash() function to reflect that it's a double pointer now.

The third solution would be to not create a new hashtable_t, but just create a new htentry_ptr *table area and update the values in ht; This would be my favorite solution in a code review.

I need help, where am I going wrong in traversing the old table?

  while (next)
  {
    prev = next;
    next = next->next_ptr;
    newht->num_entries++;
  }

The newht->num_entries++; is at the wrong place. When you look for the end of the linked list, the elements that are already there don't increase the size of your hashtable. You can move the expression newht->num_entries++; out of both if/else - your table increases by one no matter if there is a collision or not.

Second, at the end of the linked list loop it will look like this:

prev = [last_element of linked list];
next = null;
prev->next_ptr = old_element;

But.. where does old_element->next_ptr point to? There is no guarantee that it is null. So you need to add p->next_ptr = NULL; so that an element that wasn't formerly at the end of the collision and is now at the end of the collision properly ends the linked list.

The problem is you can't just do p->next_ptr = NULL; because then your loop thinks it's at the end. Your concept is screwed when a linked list element in the middle of the linked list gets reassigned to a new index in the new hashtable. The element can't have the correct value for the old and the new table in next_ptr at the same time.

So, there are two solutions:
a) Go backwards through your collision list, but as this is a single linked list as it seems, this is a very painful process of putting elements on a stack.
b) Rehash the table by creating new elements instead of trying to reuse the old elements.

EDIT:

Okay, with the insert function, the rehash function can look like this (quick & dirty):

static hashtab_ptr rehash(hashtab_ptr ht)
{
    hashtab_ptr prevtable = ht;
    hashtab_ptr newht;
    htentry_ptr p;
    unsigned int i;
    unsigned int newidx;

    printf("\nrehashing");

    newht->idx  = prevtable->idx + 1;
    newht->size = getsize(prevtable);
    newht->num_entries = 0;
    newht->hash = prevtable->hash;
    newht->cmp  = prevtable->cmp;

    newht->max_loadfactor = prevtable->max_loadfactor;

    newht->table = calloc(newht->size , sizeof(htentry_t));

    for (i = 0; i < ht->size; i++) {
      for (p = ht->table[i]; p; p = p->next_ptr) {
        ht_insert(newht, p->key, p->value);
    }

    return newht;
   }

Then you should have a function to free a hashtable, so you end up using it:

if (current_loadfactor > ht->max_loadfactor) {
    hashtab_ptr tempht = ht;
    ht = rehash(ht);
    ht_delete(tempht);
}

Upvotes: 1

wildplasser
wildplasser

Reputation: 44250

This is intended to show that:

  • you only need to reallocate the table[] member, not the envelope
  • pointers to pointers can simplify things
  • when moving an element from the old table to the new one, you should take care not to damage its next pointer

[Note: I removed the typedefines, because I hate them ...]


#include <stdio.h>
#include <stdlib.h>

struct hashentry {
    struct hashentry *next;
    char *key;
    void *payload;
    };
struct hashtable {
    struct hashentry **table;   /*<< a pointer to array of pointers        */
    unsigned int size;          /*<< current size */
    unsigned int num_entries;   /*<< current number of entries    */
    float max_loadfactor;
    /* unsigned short idx;      the index into the delta array(Quoi?) */
    unsigned int (*hash)(void *, unsigned int); /*<< a pointer to the hash function   */
    int (*cmp)(void *, void *);         /*<< a pointer to the comparison function     */
    };

static void rehash(struct hashtable *ht);
// The rehash function could look like this
static void rehash(struct hashtable *ht)
{
    struct hashentry **newtab;
    struct hashentry **pp, **qq, *this;
    unsigned int newsize, oldidx, newidx;

    newsize = ht->size * 2; /* or something like (max_loadfactor*num_entries), rounded up */
    fprintf(stderr, "new table size %u\n", newsize);
    newtab = malloc(newsize * sizeof *newtab );

    for (newidx=0; newidx < newsize; newidx++) {
        newtab[newidx] = NULL;
        }

    for (oldidx = 0; oldidx < ht->size; oldidx++) {
        for (pp = &ht->table[oldidx]; *pp; ) {
            this = *pp;
            *pp = this->next; /* this is important ! */
            this->next = NULL; /* ... because ... */

            newidx = ht->hash(this->key, newsize);
            for(qq = &newtab[newidx]; *qq; qq = &(*qq)->next) {
                /* You could count the number of "collisions" here */
                }

            *qq = this;
        }
    }
    free(ht->table);
    ht->table = newtab;
    ht->size = newsize;
    /* The rest of the fields does not need to change */
}

Upvotes: 0

Related Questions