Tweak the hash computation using an application-defined "seed" value.

This commit is contained in:
LoRd_MuldeR 2022-12-01 23:20:25 +01:00
parent 3c513a66fc
commit 0e9434d939
9 changed files with 66 additions and 33 deletions

View File

@ -1,7 +1,7 @@
Introduction
============
**LibHashSet** is a *hash set* and *hash map* implementation for C99. It uses open addressing and double hashing.
**LibHashSet** is a [*hash set*](https://en.wikipedia.org/wiki/Hash_table) and [*hash map*](https://en.wikipedia.org/wiki/Hash_table) implementation for C99. It uses open addressing and double hashing.
At this time, the *only* types of elements supported are `uint16_t`, `uint32_t` and `uint64_t`.
@ -15,15 +15,18 @@ Here is a simple example of how to use LibHashSet in your application:
```C
#include <hash_set.h>
#include <time.h>
#include <stdio.h>
#define SEED ((uint64_t)time(NULL))
int main(void)
{
uint64_t item;
uintptr_t cursor = 0U;
/* create new hash set instance */
hash_set64_t* const hash_set = hash_set_create64(0U, -1.0);
hash_set64_t* const hash_set = hash_set_create64(0U, -1.0, SEED);
if (!hash_set)
{
fputs("Allocation has failed!\n", stderr);
@ -125,7 +128,8 @@ Allocates a new hash set instance. The new hash set instance is empty initially.
```C
hash_set_t *hash_set_create(
const size_t initial_capacity,
const double load_factor
const double load_factor,
const uint64_t seed
);
```
@ -137,6 +141,9 @@ hash_set_t *hash_set_create(
* `load_factor`
The load factor to be applied to the hash set. The given load factor will be clipped to the **0.1** to **1.0** range. Generally, the default load factor (0.8) offers a good trade-off between performance and memory usage. Higher load factors decrease the memory overhead, but also may increase the time required for insert, lookup and remove operations. If this parameter is less than or equal to *zero*, the *default* load factor is used.
* `seed`
The "seed" value that is used to tweak the internal hash computation. The application should set this parameter to a value that is hard to predict and that is unlikely to repeat (e.g., a high-resolution timer is suitable here).
#### Return value
On success, this function returns a pointer to a new hash set instance. On error, a `NULL` pointer is returned.
@ -468,7 +475,8 @@ Allocates a new hash map instance. The new hash map instance is empty initially.
```C
hash_map_t *hash_map_create(
const size_t initial_capacity,
const double load_factor
const double load_factor,
const uint64_t seed
);
```
@ -480,6 +488,9 @@ hash_map_t *hash_map_create(
* `load_factor`
The load factor to be applied to the hash map. The given load factor will be clipped to the **0.1** to **1.0** range. Generally, the default load factor (0.8) offers a good trade-off between performance and memory usage. Higher load factors decrease the memory overhead, but also may increase the time required for insert, lookup and remove operations. If this parameter is less than or equal to *zero*, the *default* load factor is used.
* `seed`
The "seed" value that is used to tweak the internal hash computation. The application should set this parameter to a value that is hard to predict and that is unlikely to repeat (e.g., a high-resolution timer is suitable here).
#### Return value
On success, this function returns a pointer to a new hash map instance. On error, a `NULL` pointer is returned.

View File

@ -6,8 +6,11 @@
#include <hash_map.h>
#include <stdio.h>
#include <inttypes.h>
#include <time.h>
#include "input.h"
#define SEED ((uint64_t)time(NULL))
/* ========================================================================= */
/* MAIN */
/* ========================================================================= */
@ -23,7 +26,7 @@ int main(void)
HASHSET_VERSION_MAJOR, HASHSET_VERSION_MINOR, HASHSET_VERSION_PATCH, HASHSET_BUILD_DATE);
/* create new hash map instance */
hash_map = hash_map_create64(0U, -1.0);
hash_map = hash_map_create64(0U, -1.0, SEED);
if (!hash_map)
{
fputs("Allocation has failed!\n", stderr);

View File

@ -6,8 +6,11 @@
#include <hash_set.h>
#include <stdio.h>
#include <inttypes.h>
#include <time.h>
#include "input.h"
#define SEED ((uint64_t)time(NULL))
/* ========================================================================= */
/* MAIN */
/* ========================================================================= */
@ -23,7 +26,7 @@ int main(void)
HASHSET_VERSION_MAJOR, HASHSET_VERSION_MINOR, HASHSET_VERSION_PATCH, HASHSET_BUILD_DATE);
/* create new hash set instance */
hash_set = hash_set_create64(0U, -1.0);
hash_set = hash_set_create64(0U, -1.0, SEED);
if (!hash_set)
{
fputs("Allocation has failed!\n", stderr);

View File

@ -64,9 +64,9 @@ typedef int (*hash_map_callback64_t)(const size_t index, const char status, cons
/* Functions */
/* ------------------------------------------------- */
HASHSET_API hash_map16_t *hash_map_create16(const size_t initial_capacity, const double load_factor);
HASHSET_API hash_map32_t *hash_map_create32(const size_t initial_capacity, const double load_factor);
HASHSET_API hash_map64_t *hash_map_create64(const size_t initial_capacity, const double load_factor);
HASHSET_API hash_map16_t *hash_map_create16(const size_t initial_capacity, const double load_factor, const uint64_t seed);
HASHSET_API hash_map32_t *hash_map_create32(const size_t initial_capacity, const double load_factor, const uint64_t seed);
HASHSET_API hash_map64_t *hash_map_create64(const size_t initial_capacity, const double load_factor, const uint64_t seed);
HASHSET_API void hash_map_destroy16(hash_map16_t *const instance);
HASHSET_API void hash_map_destroy32(hash_map32_t *const instance);

View File

@ -64,9 +64,9 @@ typedef int (*hash_set_callback64_t)(const size_t index, const char status, cons
/* Functions */
/* ------------------------------------------------- */
HASHSET_API hash_set16_t *hash_set_create16(const size_t initial_capacity, const double load_factor);
HASHSET_API hash_set32_t *hash_set_create32(const size_t initial_capacity, const double load_factor);
HASHSET_API hash_set64_t *hash_set_create64(const size_t initial_capacity, const double load_factor);
HASHSET_API hash_set16_t *hash_set_create16(const size_t initial_capacity, const double load_factor, const uint64_t seed);
HASHSET_API hash_set32_t *hash_set_create32(const size_t initial_capacity, const double load_factor, const uint64_t seed);
HASHSET_API hash_set64_t *hash_set_create64(const size_t initial_capacity, const double load_factor, const uint64_t seed);
HASHSET_API void hash_set_destroy16(hash_set16_t *const instance);
HASHSET_API void hash_set_destroy32(hash_set32_t *const instance);

View File

@ -14,6 +14,8 @@
#define DECLARE(X) CONCAT(X,NAME_SUFFIX)
static const uint64_t SEED_VALUE = UINT64_C(0x1C066DD8B2C5E0C4);
/* ------------------------------------------------- */
/* Data types */
/* ------------------------------------------------- */
@ -30,6 +32,7 @@ struct DECLARE(_hash_map)
{
double load_factor;
size_t valid, deleted, limit;
value_t tweak;
hash_data_t data;
};
@ -93,13 +96,14 @@ static INLINE void free_data(hash_data_t *const data)
#define INDEX(X) ((size_t)((X) % data->capacity))
static INLINE bool_t find_slot(const hash_data_t *const data, const value_t key, size_t *const index_out, bool_t *const reused_out)
static INLINE bool_t find_slot(const hash_data_t *const data, const value_t tweak, const value_t key, size_t *const index_out, bool_t *const reused_out)
{
uint64_t loop = 0U;
bool_t is_saved = FALSE;
size_t index;
const value_t base = key + tweak;
for (index = INDEX(hash_compute(loop, key)); get_flag(data->used, index); index = INDEX(hash_compute(++loop, key)))
for (index = INDEX(hash_compute(loop, base)); get_flag(data->used, index); index = INDEX(hash_compute(++loop, base)))
{
if (get_flag(data->deleted, index))
{
@ -179,7 +183,7 @@ static INLINE errno_t rebuild_map(hash_map_t *const instance, const size_t new_c
if (IS_VALID(instance->data, k))
{
const value_t key = instance->data.keys[k], value = instance->data.values[k];
if (find_slot(&temp, key, &index, NULL))
if (find_slot(&temp, instance->tweak, key, &index, NULL))
{
free_data(&temp);
return EFAULT; /*this should never happen!*/
@ -200,9 +204,9 @@ static INLINE errno_t rebuild_map(hash_map_t *const instance, const size_t new_c
/* PUBLIC FUNCTIONS */
/* ========================================================================= */
hash_map_t *DECLARE(hash_map_create)(const size_t initial_capacity, const double load_factor)
hash_map_t *DECLARE(hash_map_create)(const size_t initial_capacity, const double load_factor, const uint64_t seed)
{
hash_map_t* instance = (hash_map_t*) calloc(1U, sizeof(hash_map_t));
hash_map_t *instance = (hash_map_t*) calloc(1U, sizeof(hash_map_t));
if (!instance)
{
return NULL;
@ -215,6 +219,7 @@ hash_map_t *DECLARE(hash_map_create)(const size_t initial_capacity, const double
}
instance->load_factor = (load_factor > DBL_EPSILON) ? BOUND(0.125, load_factor, 1.0) : DEFAULT_LOADFCTR;
instance->tweak = (value_t) hash_compute(seed, SEED_VALUE);
instance->limit = compute_limit(instance->data.capacity, instance->load_factor);
return instance;
@ -240,7 +245,7 @@ errno_t DECLARE(hash_map_insert)(hash_map_t *const instance, const value_t key,
return EINVAL;
}
if (find_slot(&instance->data, key, &index, &slot_reused))
if (find_slot(&instance->data, instance->tweak, key, &index, &slot_reused))
{
instance->data.values[index] = value;
return EEXIST;
@ -253,7 +258,7 @@ errno_t DECLARE(hash_map_insert)(hash_map_t *const instance, const value_t key,
{
return error;
}
if (find_slot(&instance->data, key, &index, &slot_reused))
if (find_slot(&instance->data, instance->tweak, key, &index, &slot_reused))
{
return EFAULT;
}
@ -277,7 +282,7 @@ errno_t DECLARE(hash_map_contains)(const hash_map_t *const instance, const value
return EINVAL;
}
return (instance->valid && find_slot(&instance->data, key, NULL, NULL)) ? 0 : ENOENT;
return (instance->valid && find_slot(&instance->data, instance->tweak, key, NULL, NULL)) ? 0 : ENOENT;
}
errno_t DECLARE(hash_map_get)(const hash_map_t *const instance, const value_t key, value_t *const value)
@ -289,7 +294,7 @@ errno_t DECLARE(hash_map_get)(const hash_map_t *const instance, const value_t ke
return EINVAL;
}
if (!find_slot(&instance->data, key, &index, NULL))
if (!find_slot(&instance->data, instance->tweak, key, &index, NULL))
{
return ENOENT;
}
@ -307,7 +312,7 @@ errno_t DECLARE(hash_map_remove)(hash_map_t *const instance, const value_t key)
return EINVAL;
}
if ((!instance->valid) || (!find_slot(&instance->data, key, &index, NULL)))
if ((!instance->valid) || (!find_slot(&instance->data, instance->tweak, key, &index, NULL)))
{
return ENOENT;
}

View File

@ -14,6 +14,8 @@
#define DECLARE(X) CONCAT(X,NAME_SUFFIX)
static const uint64_t SEED_VALUE = UINT64_C(0xFE8BD3EF0C09CA67);
/* ------------------------------------------------- */
/* Data types */
/* ------------------------------------------------- */
@ -30,6 +32,7 @@ struct DECLARE(_hash_set)
{
double load_factor;
size_t valid, deleted, limit;
value_t tweak;
hash_data_t data;
};
@ -83,13 +86,14 @@ static INLINE void free_data(hash_data_t *const data)
#define INDEX(X) ((size_t)((X) % data->capacity))
static INLINE bool_t find_slot(const hash_data_t *const data, const value_t item, size_t *const index_out, bool_t *const reused_out)
static INLINE bool_t find_slot(const hash_data_t *const data, const value_t tweak, const value_t item, size_t *const index_out, bool_t *const reused_out)
{
uint64_t loop = 0U;
bool_t is_saved = FALSE;
size_t index;
const value_t base = item + tweak;
for (index = INDEX(hash_compute(loop, item)); get_flag(data->used, index); index = INDEX(hash_compute(++loop, item)))
for (index = INDEX(hash_compute(loop, base)); get_flag(data->used, index); index = INDEX(hash_compute(++loop, base)))
{
if (get_flag(data->deleted, index))
{
@ -168,7 +172,7 @@ static INLINE errno_t rebuild_set(hash_set_t *const instance, const size_t new_c
if (IS_VALID(instance->data, k))
{
const value_t item = instance->data.items[k];
if (find_slot(&temp, item, &index, NULL))
if (find_slot(&temp, instance->tweak, item, &index, NULL))
{
free_data(&temp);
return EFAULT; /*this should never happen!*/
@ -189,9 +193,9 @@ static INLINE errno_t rebuild_set(hash_set_t *const instance, const size_t new_c
/* PUBLIC FUNCTIONS */
/* ========================================================================= */
hash_set_t *DECLARE(hash_set_create)(const size_t initial_capacity, const double load_factor)
hash_set_t *DECLARE(hash_set_create)(const size_t initial_capacity, const double load_factor, const uint64_t seed)
{
hash_set_t* instance = (hash_set_t*) calloc(1U, sizeof(hash_set_t));
hash_set_t *instance = (hash_set_t*) calloc(1U, sizeof(hash_set_t));
if (!instance)
{
return NULL;
@ -204,6 +208,7 @@ hash_set_t *DECLARE(hash_set_create)(const size_t initial_capacity, const double
}
instance->load_factor = (load_factor > DBL_EPSILON) ? BOUND(0.125, load_factor, 1.0) : DEFAULT_LOADFCTR;
instance->tweak = (value_t) hash_compute(seed, SEED_VALUE);
instance->limit = compute_limit(instance->data.capacity, instance->load_factor);
return instance;
@ -229,7 +234,7 @@ errno_t DECLARE(hash_set_insert)(hash_set_t *const instance, const value_t item)
return EINVAL;
}
if (find_slot(&instance->data, item, &index, &slot_reused))
if (find_slot(&instance->data, instance->tweak, item, &index, &slot_reused))
{
return EEXIST;
}
@ -241,7 +246,7 @@ errno_t DECLARE(hash_set_insert)(hash_set_t *const instance, const value_t item)
{
return error;
}
if (find_slot(&instance->data, item, &index, &slot_reused))
if (find_slot(&instance->data, instance->tweak, item, &index, &slot_reused))
{
return EFAULT;
}
@ -265,7 +270,7 @@ errno_t DECLARE(hash_set_contains)(const hash_set_t *const instance, const value
return EINVAL;
}
return (instance->valid && find_slot(&instance->data, item, NULL, NULL)) ? 0 : ENOENT;
return (instance->valid && find_slot(&instance->data, instance->tweak, item, NULL, NULL)) ? 0 : ENOENT;
}
errno_t DECLARE(hash_set_remove)(hash_set_t *const instance, const value_t item)
@ -277,7 +282,7 @@ errno_t DECLARE(hash_set_remove)(hash_set_t *const instance, const value_t item)
return EINVAL;
}
if ((!instance->valid) || (!find_slot(&instance->data, item, &index, NULL)))
if ((!instance->valid) || (!find_slot(&instance->data, instance->tweak, item, &index, NULL)))
{
return ENOENT;
}

View File

@ -6,6 +6,7 @@
#include "tests.h"
#include <stdio.h>
#include <inttypes.h>
#include <time.h>
#define RUN_TEST_CASE(X) do \
{ \
@ -16,6 +17,8 @@
} \
while(0)
#define SEED ((uint64_t)time(NULL))
/* ========================================================================= */
/* MAIN */
/* ========================================================================= */
@ -27,7 +30,7 @@ int main(void)
printf("LibHashSet Hash-Map Test v%" PRIu16 ".%" PRIu16 ".%" PRIu16 " [%s]\n\n",
HASHSET_VERSION_MAJOR, HASHSET_VERSION_MINOR, HASHSET_VERSION_PATCH, HASHSET_BUILD_DATE);
hash_set = hash_map_create64(0U, -1.0);
hash_set = hash_map_create64(0U, -1.0, SEED);
if (!hash_set)
{
puts("Allocation has failed!");

View File

@ -6,6 +6,7 @@
#include "tests.h"
#include <stdio.h>
#include <inttypes.h>
#include <time.h>
#define RUN_TEST_CASE(X) do \
{ \
@ -16,6 +17,8 @@
} \
while(0)
#define SEED ((uint64_t)time(NULL))
/* ========================================================================= */
/* MAIN */
/* ========================================================================= */
@ -27,7 +30,7 @@ int main(void)
printf("LibHashSet Hash-Set Test v%" PRIu16 ".%" PRIu16 ".%" PRIu16 " [%s]\n\n",
HASHSET_VERSION_MAJOR, HASHSET_VERSION_MINOR, HASHSET_VERSION_PATCH, HASHSET_BUILD_DATE);
hash_set = hash_set_create64(0U, -1.0);
hash_set = hash_set_create64(0U, -1.0, SEED);
if (!hash_set)
{
puts("Allocation has failed!");