Reputation: 2345
I am trying to play around with C data structure (hash table). I am not using any pre-built hashtable library (like STL), because I want to have a better understanding on how it works. So here I create a hash table, containing list of elements, where each element contains a key and a string element data (array of chars), and the length of the string element data.
My implementation works, but after discussing it with one of my colleagues I was told that my implementation was not efficient, especially in the context that: my implementation was not cache aware, causing my hashtable lookup to be in-efficient. I didn't really understand his explanation.
So I would like to know, what does a cache aware locality implementation actually means?
How can I make my hash table implementation become more efficient by using that cache aware locality property when doing a lookup? Is there a better way to build the structure for this, and a better way of organizing the elements (doing a lookup)?
Here is what I have done so far:
HASHTBL.h
struct hashelt {
struct hashelt* he_next; /* One way linked list pointer */
char* he_key; /* Pointer to element's key */
char* he_data; /* Pointer to element's data */
int he_data_len; /* Length of element's data */
};
typedef struct hashelt hashelt;
struct hashtbl {
hashelt** ht_links; /* Array of pointers to elemsnts */
int ht_prime; /* Length of hash table - it is a prime number */
};
typedef struct hashtbl hashtbl;
int prime_check( int num );
hashtbl* hashtbl_init( int tbl_size );
int hashtbl_key_convert( hashtbl* htable, char* hkey );
hashelt* hashtbl_lookup( hashtbl* htable, char* hkey, char* hdata );
int hashtbl_add( hashtbl* htable, char* hkey, char* hdata );
void hashtbl_print( hashtbl* htable );
HASHTBL.c
/*
* prime_check
* Check if num is a prime or not.
* num Number to use as an upper bound.
*
* The functions returns 0 for a prime and 1 otherwise.
*/
int prime_check( int num ) {
/* Local declarations */
int i; /* Loop counter */
/* Search through the first sqrt(num) integers */
for( i = 2; i*i < num; i++ )
if ( (num % i) == 0 )
return 1;
/* It is a prime */
return 0;
}
/*
* hashtbl_init
* Create and initialize a hash table.
* The input variable are
* tbl_size Suggested size of the table, which should be a prime or 0.
* The functions returns a pointer to a hashtbl or NULL for failure.
* The hash table is the size of the largest prime found for the suggested
* table size of the default size if 0 is given as a suggestion.
*/
hashtbl* hashtbl_init( int tbl_size ) {
/* Local declarations */
hashtbl* hp; /* Hash table pointer */
int i; /* Loop counter */
/* Initial values */
hp = NULL;
/* Determine the actual table size */
if ( tbl_size <= 0 )
tbl_size = DEFAULT_HASHTBL_LENGTH;
else if ( prime_check( tbl_size ) )
return NULL;
if ( tbl_size <= 0 )
return NULL;
/* Allocate the hash table */
if( ( hp = (hashtbl*)malloc( sizeof(hashtbl) ) ) == NULL )
return hp;
/* Allocate the linked list vector */
if( ( hp->ht_links = (hashelt**)malloc( tbl_size * sizeof(hashelt*) ) ) == NULL ) {
free( hp );
return NULL;
}
/* Fill in the hash table with no entries */
for( i = 0 ; i < tbl_size; i++ )
hp->ht_links[i] = NULL;
/* Fill in the hash table size */
hp->ht_prime = tbl_size;
/* All done */
return hp;
}
/*
* hashtbl_key_convert
* Make an index into the key chain in the hash table from a key:
* kindex = sum of the characters in hkey modulo ht_prime
* The input variable are
* htable A pointer to a hash table.
* hkey A pointer to a a key.
* The functions returns an index into the key chain.
*/
int hashtbl_key_convert( hashtbl* htable, char* hkey ) {
/* Local declarations */
int i; /* Loop counter */
int klen; /* Length of the key */
int kindex; /* Key index to return */
/* Compute the key index */
klen = strlen( hkey );
for( kindex = 0, i = 0 ; i < klen; i++ )
kindex += hkey[i];
kindex = kindex % htable->ht_prime;
/* All done */
return kindex;
}
/*
* hashtbl_lookup
* Lookup a (key,data) in a hash table.
* The input variable are
* htable A pointer to a hash table.
* hkey A key.
* hdata A pointer to the data.
* The functions returns 0 for found and 1 for not found.
*/
hashelt* hashtbl_lookup( hashtbl* htable, char* hkey, char* hdata ) {
/* Local declarations */
hashelt* hep; /* Hash element pointer */
int i; /* Loop counter */
int key; /* Hash table key index */
/* Get key index from hkey */
key = hashtbl_key_convert( htable, hkey );
/* Search through the hash table, including collicions */
for( hep = htable->ht_links[key] ; hep != NULL ; hep = hep->he_next )
if ( strcmp( hep->he_key, hkey ) == 0 )
return hep;
/* Not found */
return NULL;
}
/*
* hashtbl_add
* Add a (key,data) to a hash table.
* The input variable are
* htable A pointer to a hash table.
* hkey A key.
* hdata A pointer to the data.
* The functions returns 0 for success and 1-4 for failure.
*
*/
int hashtbl_add( hashtbl* htable, char* hkey, char* hdata ) {
/* Local declarations */
hashelt* hep; /* Element in linked list */
hashelt* newelt; /* New element in hash table */
int i; /* Loop counter */
int dlen; /* Length of data */
int key; /* Hash table key index */
/* Get key index from hkey */
key = hashtbl_key_convert( htable, hkey );
/* Check if the (key,data) is already in the hash table */
if ( ( hep = hashtbl_lookup( htable, hkey, hdata ) ) != NULL )
if ( strcmp( hep->he_data, hdata ) == 0 )
return 1;
/* Add the data */
dlen = strlen( hdata );
hep = htable->ht_links[key];
if ( ( newelt = (hashelt*)malloc( sizeof( hashelt ) ) ) == NULL ) {
fprintf( stderr, "hashtbl_add: Cannot allocate new hash element\n" );
return 3;
}
newelt->he_next = hep;
newelt->he_data_len = dlen;
newelt->he_key = (char*)malloc( (strlen(hkey)+1) * sizeof(char) );
newelt->he_data = (char*)malloc( (dlen+1) * sizeof(char) );
if ( newelt->he_key == NULL || newelt->he_data == NULL ) {
fprintf( stderr, "hashtbl_add: Cannot allocate new hash element contents\n" );
return 4;
}
strcpy( newelt->he_key, hkey );
strcpy( newelt->he_data, hdata );
htable->ht_links[key] = newelt;
/* All done */
return 0;
}
/*
* hashtbl_print
* Print an entire hash table.
* The input variable are
* htable A pointer to a hash table.
* The functions returns 0 for success and 1-4 for failure.
*/
void hashtbl_print( hashtbl* htable ) {
/* Local declarations */
hashelt* hep; /* Element in linked list */
int i; /* Loop counter */
int l; /* Link count */
/* Initial printing */
printf( "\nHash table contents\n" );
/* Print a has tbale out, one key bucket at a time */
for( i = 0 ; i < htable->ht_prime ; i++ ) {
printf( "\nkey index %i\n", i );
hep = htable->ht_links[i];
if ( hep == NULL )
printf( " No entries in the linked list\n" );
else {
l = 0;
while( hep != NULL ) {
printf( " Element %d\n", l );
printf( " key: %s\n", hep->he_key );
printf( " data: %s\n", hep->he_data );
printf( " data_len: %d\n", hep->he_data_len );
hep = hep->he_next;
}
}
}
/* All done */
}
MAIN FILE
void make_string( char* buffer, int blen ) {
/* Local declarations */
char viable_chars[] = { "abcdefghijklmnopqrstuvwxyz-0123456789_ABCDEFGHIJKLMNOPQRSTUVWXYZ" };
char* bp; /* Pointer into buffer */
int i; /* Loop counter */
int c; /* Index into viable_chars */
static int vc_len = 0; /* Length of viable_chars */
/* Do once only */
if ( vc_len == 0 )
vc_len = strlen( viable_chars );
/* Do the fill operation on a subset of buffer */
blen = rand() % blen;
if ( blen == 0 ) blen++;
for( bp = buffer, i = 0; i < blen ; i++ ) {
c = rand() % vc_len;
*bp++ = viable_chars[c];
}
*bp++ = '\0';
}
int main( int argc, char** argv ) {
/* Local declarations */
hashtbl* htable; /* Hash table pointer */
char* kstrings; /* Pointer to key vector */
char* dstrings; /* Pointer to data vector */
int tblsize = 0; /* Hash table size */
int nkeys = 0; /* Number of keys */
int dlen = 0; /* Maximum data length for (keys,data) */
int i; /* Loop counter */
double t1; /* Time keeper */
double t2; /* Time keeper */
double mrun(); /* Get timing information */
#ifdef GOOD_SEED
/* Get a good random number seed */
struct timeval t1; /* holder for time of day (seconds, microseconds) */
gettimeofday( &t1, NULL );
srand( t1.tv_usec * t1.tv_sec );
#endif
/* Get hash table size */
printf( "Hash table size (pick a prime or bound for one): " );
scanf( "%d", &tblsize );
fflush( stdin );
/* Initialize hash table */
if( ( htable = hashtbl_init( tblsize ) ) == NULL ) {
fprintf( stderr, "Oops... hashtbl_init returned NULL\n" );
return 1;
}
printf( "Actual size of hash table is %d\n", htable->ht_prime );
/* Now fill it up */
while ( nkeys <= 0 ) {
printf( "How many keys do you want? " );
scanf( "%d", &nkeys );
fflush( stdin );
}
while ( dlen <= 0 ) {
printf( "What is the maximum character string length? " );
scanf( "%d", &dlen );
fflush( stdin );
}
/* Allocate vectors for (key,data) */
kstrings = (char*)malloc( (dlen+1) * sizeof( char ) );
dstrings = (char*)malloc( (dlen+1) * sizeof( char ) );
if ( kstrings == NULL || dstrings == NULL ) {
fprintf( stderr, "main: Could not allocate string vectors for (key,data)\n" );
return 2;
}
/* Now fill it up */
for( i = 0 ; i < nkeys ; i++ ) {
make_string( kstrings, dlen );
make_string( dstrings, dlen );
hashtbl_add( htable, kstrings, dstrings );
}
/* Print it out */
hashtbl_print( htable );
/* All done */
return 0;
}
Upvotes: 3
Views: 2666
Reputation: 23560
The locality would be better if you changed
struct hashtbl {
hashelt** ht_links; /* Array of pointers to elemsnts
...
to be:
struct hashtbl {
...
hashelt* elements; /* Array of elements (as last entry of struct)
Think about how the hash-table looks like, in your version the table consists of pointers to elements, in my modification the table actually contains those structs packed one after the other!. There is of course a disadvange to it: An empty hash bin does not only contain a null-pointer (like in your version) but a whole sizeof(hashelt)
. Also, you have to make sure to allocate the hashtbl-struct not with a size of sizeof(hashtbl)
because your hashtbl contains the hashelt's: you have to allocate ht_prime*sizeof(hashelt)+sizeof(int)
(the first summand is for the ht_prime hashelt's you contain and the second summand is for storing the ht_prime itself).
Upvotes: 1