From 0e9434d9398ede04152ad7f74e69f1cbd7434242 Mon Sep 17 00:00:00 2001 From: LoRd_MuldeR Date: Thu, 1 Dec 2022 23:20:25 +0100 Subject: [PATCH] Tweak the hash computation using an application-defined "seed" value. --- README.md | 19 +++++++++++++++---- example/hash-map/src/main.c | 5 ++++- example/hash-set/src/main.c | 5 ++++- libhashset/include/hash_map.h | 6 +++--- libhashset/include/hash_set.h | 6 +++--- libhashset/src/generic_hash_map.h | 25 +++++++++++++++---------- libhashset/src/generic_hash_set.h | 23 ++++++++++++++--------- test/hash-map/src/main.c | 5 ++++- test/hash-set/src/main.c | 5 ++++- 9 files changed, 66 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 4a12080..b24017f 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ Introduction ============ -**LibHashSet** is a *hash set* and *hash map* implementation for C99. It uses open addressing and double hashing. +**LibHashSet** is a [*hash set*](https://en.wikipedia.org/wiki/Hash_table) and [*hash map*](https://en.wikipedia.org/wiki/Hash_table) implementation for C99. It uses open addressing and double hashing. At this time, the *only* types of elements supported are `uint16_t`, `uint32_t` and `uint64_t`. @@ -15,15 +15,18 @@ Here is a simple example of how to use LibHashSet in your application: ```C #include +#include #include +#define SEED ((uint64_t)time(NULL)) + int main(void) { uint64_t item; uintptr_t cursor = 0U; /* create new hash set instance */ - hash_set64_t* const hash_set = hash_set_create64(0U, -1.0); + hash_set64_t* const hash_set = hash_set_create64(0U, -1.0, SEED); if (!hash_set) { fputs("Allocation has failed!\n", stderr); @@ -125,7 +128,8 @@ Allocates a new hash set instance. The new hash set instance is empty initially. ```C hash_set_t *hash_set_create( const size_t initial_capacity, - const double load_factor + const double load_factor, + const uint64_t seed ); ``` @@ -137,6 +141,9 @@ hash_set_t *hash_set_create( * `load_factor` The load factor to be applied to the hash set. The given load factor will be clipped to the **0.1** to **1.0** range. Generally, the default load factor (0.8) offers a good trade-off between performance and memory usage. Higher load factors decrease the memory overhead, but also may increase the time required for insert, lookup and remove operations. If this parameter is less than or equal to *zero*, the *default* load factor is used. +* `seed` + The "seed" value that is used to tweak the internal hash computation. The application should set this parameter to a value that is hard to predict and that is unlikely to repeat (e.g., a high-resolution timer is suitable here). + #### Return value On success, this function returns a pointer to a new hash set instance. On error, a `NULL` pointer is returned. @@ -468,7 +475,8 @@ Allocates a new hash map instance. The new hash map instance is empty initially. ```C hash_map_t *hash_map_create( const size_t initial_capacity, - const double load_factor + const double load_factor, + const uint64_t seed ); ``` @@ -480,6 +488,9 @@ hash_map_t *hash_map_create( * `load_factor` The load factor to be applied to the hash map. The given load factor will be clipped to the **0.1** to **1.0** range. Generally, the default load factor (0.8) offers a good trade-off between performance and memory usage. Higher load factors decrease the memory overhead, but also may increase the time required for insert, lookup and remove operations. If this parameter is less than or equal to *zero*, the *default* load factor is used. +* `seed` + The "seed" value that is used to tweak the internal hash computation. The application should set this parameter to a value that is hard to predict and that is unlikely to repeat (e.g., a high-resolution timer is suitable here). + #### Return value On success, this function returns a pointer to a new hash map instance. On error, a `NULL` pointer is returned. diff --git a/example/hash-map/src/main.c b/example/hash-map/src/main.c index d7065ca..b71ee9d 100644 --- a/example/hash-map/src/main.c +++ b/example/hash-map/src/main.c @@ -6,8 +6,11 @@ #include #include #include +#include #include "input.h" +#define SEED ((uint64_t)time(NULL)) + /* ========================================================================= */ /* MAIN */ /* ========================================================================= */ @@ -23,7 +26,7 @@ int main(void) HASHSET_VERSION_MAJOR, HASHSET_VERSION_MINOR, HASHSET_VERSION_PATCH, HASHSET_BUILD_DATE); /* create new hash map instance */ - hash_map = hash_map_create64(0U, -1.0); + hash_map = hash_map_create64(0U, -1.0, SEED); if (!hash_map) { fputs("Allocation has failed!\n", stderr); diff --git a/example/hash-set/src/main.c b/example/hash-set/src/main.c index 5885847..a6adf77 100644 --- a/example/hash-set/src/main.c +++ b/example/hash-set/src/main.c @@ -6,8 +6,11 @@ #include #include #include +#include #include "input.h" +#define SEED ((uint64_t)time(NULL)) + /* ========================================================================= */ /* MAIN */ /* ========================================================================= */ @@ -23,7 +26,7 @@ int main(void) HASHSET_VERSION_MAJOR, HASHSET_VERSION_MINOR, HASHSET_VERSION_PATCH, HASHSET_BUILD_DATE); /* create new hash set instance */ - hash_set = hash_set_create64(0U, -1.0); + hash_set = hash_set_create64(0U, -1.0, SEED); if (!hash_set) { fputs("Allocation has failed!\n", stderr); diff --git a/libhashset/include/hash_map.h b/libhashset/include/hash_map.h index 16700c2..4c3b25f 100644 --- a/libhashset/include/hash_map.h +++ b/libhashset/include/hash_map.h @@ -64,9 +64,9 @@ typedef int (*hash_map_callback64_t)(const size_t index, const char status, cons /* Functions */ /* ------------------------------------------------- */ -HASHSET_API hash_map16_t *hash_map_create16(const size_t initial_capacity, const double load_factor); -HASHSET_API hash_map32_t *hash_map_create32(const size_t initial_capacity, const double load_factor); -HASHSET_API hash_map64_t *hash_map_create64(const size_t initial_capacity, const double load_factor); +HASHSET_API hash_map16_t *hash_map_create16(const size_t initial_capacity, const double load_factor, const uint64_t seed); +HASHSET_API hash_map32_t *hash_map_create32(const size_t initial_capacity, const double load_factor, const uint64_t seed); +HASHSET_API hash_map64_t *hash_map_create64(const size_t initial_capacity, const double load_factor, const uint64_t seed); HASHSET_API void hash_map_destroy16(hash_map16_t *const instance); HASHSET_API void hash_map_destroy32(hash_map32_t *const instance); diff --git a/libhashset/include/hash_set.h b/libhashset/include/hash_set.h index 1d62606..cc30194 100644 --- a/libhashset/include/hash_set.h +++ b/libhashset/include/hash_set.h @@ -64,9 +64,9 @@ typedef int (*hash_set_callback64_t)(const size_t index, const char status, cons /* Functions */ /* ------------------------------------------------- */ -HASHSET_API hash_set16_t *hash_set_create16(const size_t initial_capacity, const double load_factor); -HASHSET_API hash_set32_t *hash_set_create32(const size_t initial_capacity, const double load_factor); -HASHSET_API hash_set64_t *hash_set_create64(const size_t initial_capacity, const double load_factor); +HASHSET_API hash_set16_t *hash_set_create16(const size_t initial_capacity, const double load_factor, const uint64_t seed); +HASHSET_API hash_set32_t *hash_set_create32(const size_t initial_capacity, const double load_factor, const uint64_t seed); +HASHSET_API hash_set64_t *hash_set_create64(const size_t initial_capacity, const double load_factor, const uint64_t seed); HASHSET_API void hash_set_destroy16(hash_set16_t *const instance); HASHSET_API void hash_set_destroy32(hash_set32_t *const instance); diff --git a/libhashset/src/generic_hash_map.h b/libhashset/src/generic_hash_map.h index 240ee99..7b9ef39 100644 --- a/libhashset/src/generic_hash_map.h +++ b/libhashset/src/generic_hash_map.h @@ -14,6 +14,8 @@ #define DECLARE(X) CONCAT(X,NAME_SUFFIX) +static const uint64_t SEED_VALUE = UINT64_C(0x1C066DD8B2C5E0C4); + /* ------------------------------------------------- */ /* Data types */ /* ------------------------------------------------- */ @@ -30,6 +32,7 @@ struct DECLARE(_hash_map) { double load_factor; size_t valid, deleted, limit; + value_t tweak; hash_data_t data; }; @@ -93,13 +96,14 @@ static INLINE void free_data(hash_data_t *const data) #define INDEX(X) ((size_t)((X) % data->capacity)) -static INLINE bool_t find_slot(const hash_data_t *const data, const value_t key, size_t *const index_out, bool_t *const reused_out) +static INLINE bool_t find_slot(const hash_data_t *const data, const value_t tweak, const value_t key, size_t *const index_out, bool_t *const reused_out) { uint64_t loop = 0U; bool_t is_saved = FALSE; size_t index; + const value_t base = key + tweak; - for (index = INDEX(hash_compute(loop, key)); get_flag(data->used, index); index = INDEX(hash_compute(++loop, key))) + for (index = INDEX(hash_compute(loop, base)); get_flag(data->used, index); index = INDEX(hash_compute(++loop, base))) { if (get_flag(data->deleted, index)) { @@ -179,7 +183,7 @@ static INLINE errno_t rebuild_map(hash_map_t *const instance, const size_t new_c if (IS_VALID(instance->data, k)) { const value_t key = instance->data.keys[k], value = instance->data.values[k]; - if (find_slot(&temp, key, &index, NULL)) + if (find_slot(&temp, instance->tweak, key, &index, NULL)) { free_data(&temp); return EFAULT; /*this should never happen!*/ @@ -200,9 +204,9 @@ static INLINE errno_t rebuild_map(hash_map_t *const instance, const size_t new_c /* PUBLIC FUNCTIONS */ /* ========================================================================= */ -hash_map_t *DECLARE(hash_map_create)(const size_t initial_capacity, const double load_factor) +hash_map_t *DECLARE(hash_map_create)(const size_t initial_capacity, const double load_factor, const uint64_t seed) { - hash_map_t* instance = (hash_map_t*) calloc(1U, sizeof(hash_map_t)); + hash_map_t *instance = (hash_map_t*) calloc(1U, sizeof(hash_map_t)); if (!instance) { return NULL; @@ -215,6 +219,7 @@ hash_map_t *DECLARE(hash_map_create)(const size_t initial_capacity, const double } instance->load_factor = (load_factor > DBL_EPSILON) ? BOUND(0.125, load_factor, 1.0) : DEFAULT_LOADFCTR; + instance->tweak = (value_t) hash_compute(seed, SEED_VALUE); instance->limit = compute_limit(instance->data.capacity, instance->load_factor); return instance; @@ -240,7 +245,7 @@ errno_t DECLARE(hash_map_insert)(hash_map_t *const instance, const value_t key, return EINVAL; } - if (find_slot(&instance->data, key, &index, &slot_reused)) + if (find_slot(&instance->data, instance->tweak, key, &index, &slot_reused)) { instance->data.values[index] = value; return EEXIST; @@ -253,7 +258,7 @@ errno_t DECLARE(hash_map_insert)(hash_map_t *const instance, const value_t key, { return error; } - if (find_slot(&instance->data, key, &index, &slot_reused)) + if (find_slot(&instance->data, instance->tweak, key, &index, &slot_reused)) { return EFAULT; } @@ -277,7 +282,7 @@ errno_t DECLARE(hash_map_contains)(const hash_map_t *const instance, const value return EINVAL; } - return (instance->valid && find_slot(&instance->data, key, NULL, NULL)) ? 0 : ENOENT; + return (instance->valid && find_slot(&instance->data, instance->tweak, key, NULL, NULL)) ? 0 : ENOENT; } errno_t DECLARE(hash_map_get)(const hash_map_t *const instance, const value_t key, value_t *const value) @@ -289,7 +294,7 @@ errno_t DECLARE(hash_map_get)(const hash_map_t *const instance, const value_t ke return EINVAL; } - if (!find_slot(&instance->data, key, &index, NULL)) + if (!find_slot(&instance->data, instance->tweak, key, &index, NULL)) { return ENOENT; } @@ -307,7 +312,7 @@ errno_t DECLARE(hash_map_remove)(hash_map_t *const instance, const value_t key) return EINVAL; } - if ((!instance->valid) || (!find_slot(&instance->data, key, &index, NULL))) + if ((!instance->valid) || (!find_slot(&instance->data, instance->tweak, key, &index, NULL))) { return ENOENT; } diff --git a/libhashset/src/generic_hash_set.h b/libhashset/src/generic_hash_set.h index 0dc566c..3ad1597 100644 --- a/libhashset/src/generic_hash_set.h +++ b/libhashset/src/generic_hash_set.h @@ -14,6 +14,8 @@ #define DECLARE(X) CONCAT(X,NAME_SUFFIX) +static const uint64_t SEED_VALUE = UINT64_C(0xFE8BD3EF0C09CA67); + /* ------------------------------------------------- */ /* Data types */ /* ------------------------------------------------- */ @@ -30,6 +32,7 @@ struct DECLARE(_hash_set) { double load_factor; size_t valid, deleted, limit; + value_t tweak; hash_data_t data; }; @@ -83,13 +86,14 @@ static INLINE void free_data(hash_data_t *const data) #define INDEX(X) ((size_t)((X) % data->capacity)) -static INLINE bool_t find_slot(const hash_data_t *const data, const value_t item, size_t *const index_out, bool_t *const reused_out) +static INLINE bool_t find_slot(const hash_data_t *const data, const value_t tweak, const value_t item, size_t *const index_out, bool_t *const reused_out) { uint64_t loop = 0U; bool_t is_saved = FALSE; size_t index; + const value_t base = item + tweak; - for (index = INDEX(hash_compute(loop, item)); get_flag(data->used, index); index = INDEX(hash_compute(++loop, item))) + for (index = INDEX(hash_compute(loop, base)); get_flag(data->used, index); index = INDEX(hash_compute(++loop, base))) { if (get_flag(data->deleted, index)) { @@ -168,7 +172,7 @@ static INLINE errno_t rebuild_set(hash_set_t *const instance, const size_t new_c if (IS_VALID(instance->data, k)) { const value_t item = instance->data.items[k]; - if (find_slot(&temp, item, &index, NULL)) + if (find_slot(&temp, instance->tweak, item, &index, NULL)) { free_data(&temp); return EFAULT; /*this should never happen!*/ @@ -189,9 +193,9 @@ static INLINE errno_t rebuild_set(hash_set_t *const instance, const size_t new_c /* PUBLIC FUNCTIONS */ /* ========================================================================= */ -hash_set_t *DECLARE(hash_set_create)(const size_t initial_capacity, const double load_factor) +hash_set_t *DECLARE(hash_set_create)(const size_t initial_capacity, const double load_factor, const uint64_t seed) { - hash_set_t* instance = (hash_set_t*) calloc(1U, sizeof(hash_set_t)); + hash_set_t *instance = (hash_set_t*) calloc(1U, sizeof(hash_set_t)); if (!instance) { return NULL; @@ -204,6 +208,7 @@ hash_set_t *DECLARE(hash_set_create)(const size_t initial_capacity, const double } instance->load_factor = (load_factor > DBL_EPSILON) ? BOUND(0.125, load_factor, 1.0) : DEFAULT_LOADFCTR; + instance->tweak = (value_t) hash_compute(seed, SEED_VALUE); instance->limit = compute_limit(instance->data.capacity, instance->load_factor); return instance; @@ -229,7 +234,7 @@ errno_t DECLARE(hash_set_insert)(hash_set_t *const instance, const value_t item) return EINVAL; } - if (find_slot(&instance->data, item, &index, &slot_reused)) + if (find_slot(&instance->data, instance->tweak, item, &index, &slot_reused)) { return EEXIST; } @@ -241,7 +246,7 @@ errno_t DECLARE(hash_set_insert)(hash_set_t *const instance, const value_t item) { return error; } - if (find_slot(&instance->data, item, &index, &slot_reused)) + if (find_slot(&instance->data, instance->tweak, item, &index, &slot_reused)) { return EFAULT; } @@ -265,7 +270,7 @@ errno_t DECLARE(hash_set_contains)(const hash_set_t *const instance, const value return EINVAL; } - return (instance->valid && find_slot(&instance->data, item, NULL, NULL)) ? 0 : ENOENT; + return (instance->valid && find_slot(&instance->data, instance->tweak, item, NULL, NULL)) ? 0 : ENOENT; } errno_t DECLARE(hash_set_remove)(hash_set_t *const instance, const value_t item) @@ -277,7 +282,7 @@ errno_t DECLARE(hash_set_remove)(hash_set_t *const instance, const value_t item) return EINVAL; } - if ((!instance->valid) || (!find_slot(&instance->data, item, &index, NULL))) + if ((!instance->valid) || (!find_slot(&instance->data, instance->tweak, item, &index, NULL))) { return ENOENT; } diff --git a/test/hash-map/src/main.c b/test/hash-map/src/main.c index 8748dcb..8319ed0 100644 --- a/test/hash-map/src/main.c +++ b/test/hash-map/src/main.c @@ -6,6 +6,7 @@ #include "tests.h" #include #include +#include #define RUN_TEST_CASE(X) do \ { \ @@ -16,6 +17,8 @@ } \ while(0) +#define SEED ((uint64_t)time(NULL)) + /* ========================================================================= */ /* MAIN */ /* ========================================================================= */ @@ -27,7 +30,7 @@ int main(void) printf("LibHashSet Hash-Map Test v%" PRIu16 ".%" PRIu16 ".%" PRIu16 " [%s]\n\n", HASHSET_VERSION_MAJOR, HASHSET_VERSION_MINOR, HASHSET_VERSION_PATCH, HASHSET_BUILD_DATE); - hash_set = hash_map_create64(0U, -1.0); + hash_set = hash_map_create64(0U, -1.0, SEED); if (!hash_set) { puts("Allocation has failed!"); diff --git a/test/hash-set/src/main.c b/test/hash-set/src/main.c index 623c084..9b6f2ef 100644 --- a/test/hash-set/src/main.c +++ b/test/hash-set/src/main.c @@ -6,6 +6,7 @@ #include "tests.h" #include #include +#include #define RUN_TEST_CASE(X) do \ { \ @@ -16,6 +17,8 @@ } \ while(0) +#define SEED ((uint64_t)time(NULL)) + /* ========================================================================= */ /* MAIN */ /* ========================================================================= */ @@ -27,7 +30,7 @@ int main(void) printf("LibHashSet Hash-Set Test v%" PRIu16 ".%" PRIu16 ".%" PRIu16 " [%s]\n\n", HASHSET_VERSION_MAJOR, HASHSET_VERSION_MINOR, HASHSET_VERSION_PATCH, HASHSET_BUILD_DATE); - hash_set = hash_set_create64(0U, -1.0); + hash_set = hash_set_create64(0U, -1.0, SEED); if (!hash_set) { puts("Allocation has failed!");