From 466a502adfb7538d460b49385626770e942d5e48 Mon Sep 17 00:00:00 2001 From: LoRd_MuldeR Date: Fri, 2 Dec 2022 14:50:37 +0100 Subject: [PATCH] Optimized hash computation. --- libhashset/include/hash_map.h | 6 +++--- libhashset/include/hash_set.h | 6 +++--- libhashset/src/common.h | 8 +++++++- libhashset/src/generic_hash_map.h | 23 ++++++++++++----------- libhashset/src/generic_hash_set.h | 21 +++++++++++---------- 5 files changed, 36 insertions(+), 28 deletions(-) diff --git a/libhashset/include/hash_map.h b/libhashset/include/hash_map.h index eab4d6a..4c3b25f 100644 --- a/libhashset/include/hash_map.h +++ b/libhashset/include/hash_map.h @@ -64,9 +64,9 @@ typedef int (*hash_map_callback64_t)(const size_t index, const char status, cons /* Functions */ /* ------------------------------------------------- */ -HASHSET_API hash_map16_t *hash_map_create16(const size_t initial_capacity, const double load_factor, const uint32_t seed); -HASHSET_API hash_map32_t *hash_map_create32(const size_t initial_capacity, const double load_factor, const uint32_t seed); -HASHSET_API hash_map64_t *hash_map_create64(const size_t initial_capacity, const double load_factor, const uint32_t seed); +HASHSET_API hash_map16_t *hash_map_create16(const size_t initial_capacity, const double load_factor, const uint64_t seed); +HASHSET_API hash_map32_t *hash_map_create32(const size_t initial_capacity, const double load_factor, const uint64_t seed); +HASHSET_API hash_map64_t *hash_map_create64(const size_t initial_capacity, const double load_factor, const uint64_t seed); HASHSET_API void hash_map_destroy16(hash_map16_t *const instance); HASHSET_API void hash_map_destroy32(hash_map32_t *const instance); diff --git a/libhashset/include/hash_set.h b/libhashset/include/hash_set.h index 7a18df3..cc30194 100644 --- a/libhashset/include/hash_set.h +++ b/libhashset/include/hash_set.h @@ -64,9 +64,9 @@ typedef int (*hash_set_callback64_t)(const size_t index, const char status, cons /* Functions */ /* ------------------------------------------------- */ -HASHSET_API hash_set16_t *hash_set_create16(const size_t initial_capacity, const double load_factor, const uint32_t seed); -HASHSET_API hash_set32_t *hash_set_create32(const size_t initial_capacity, const double load_factor, const uint32_t seed); -HASHSET_API hash_set64_t *hash_set_create64(const size_t initial_capacity, const double load_factor, const uint32_t seed); +HASHSET_API hash_set16_t *hash_set_create16(const size_t initial_capacity, const double load_factor, const uint64_t seed); +HASHSET_API hash_set32_t *hash_set_create32(const size_t initial_capacity, const double load_factor, const uint64_t seed); +HASHSET_API hash_set64_t *hash_set_create64(const size_t initial_capacity, const double load_factor, const uint64_t seed); HASHSET_API void hash_set_destroy16(hash_set16_t *const instance); HASHSET_API void hash_set_destroy32(hash_set32_t *const instance); diff --git a/libhashset/src/common.h b/libhashset/src/common.h index 1a90de3..674cb85 100644 --- a/libhashset/src/common.h +++ b/libhashset/src/common.h @@ -116,9 +116,15 @@ static FORCE_INLINE void hash_update(uint64_t *const hash, uint64_t value) while (value >>= CHAR_BIT); } -static INLINE uint64_t hash_compute(const uint64_t i, const uint64_t value) +static INLINE uint64_t hash_initialize(const uint64_t seed) { uint64_t hash = UINT64_C(14695981039346656037); + hash_update(&hash, seed); + return hash; +} + +static INLINE uint64_t hash_compute(uint64_t hash, const uint64_t i, const uint64_t value) +{ hash_update(&hash, i); hash_update(&hash, value); return hash; diff --git a/libhashset/src/generic_hash_map.h b/libhashset/src/generic_hash_map.h index 99a293a..06b3b6a 100644 --- a/libhashset/src/generic_hash_map.h +++ b/libhashset/src/generic_hash_map.h @@ -30,7 +30,7 @@ struct DECLARE(_hash_map) { double load_factor; size_t valid, deleted, limit; - uint32_t tweak; + uint64_t basis; hash_data_t data; }; @@ -94,12 +94,13 @@ static INLINE void free_data(hash_data_t *const data) #define INDEX(X) ((size_t)((X) % data->capacity)) -static INLINE bool_t find_slot(const hash_data_t *const data, uint64_t tweak, const value_t key, size_t *const index_out, bool_t *const reused_out) +static INLINE bool_t find_slot(const hash_data_t *const data, const uint64_t basis, const value_t key, size_t *const index_out, bool_t *const reused_out) { size_t index; bool_t is_saved = FALSE; + uint64_t loop = 0U; - for (index = INDEX(hash_compute(tweak, key)); get_flag(data->used, index); index = INDEX(hash_compute(++tweak, key))) + for (index = INDEX(hash_compute(basis, loop, key)); get_flag(data->used, index); index = INDEX(hash_compute(basis, ++loop, key))) { if (get_flag(data->deleted, index)) { @@ -179,7 +180,7 @@ static INLINE errno_t rebuild_map(hash_map_t *const instance, const size_t new_c if (IS_VALID(instance->data, k)) { const value_t key = instance->data.keys[k], value = instance->data.values[k]; - if (find_slot(&temp, instance->tweak, key, &index, NULL)) + if (find_slot(&temp, instance->basis, key, &index, NULL)) { free_data(&temp); return EFAULT; /*this should never happen!*/ @@ -200,7 +201,7 @@ static INLINE errno_t rebuild_map(hash_map_t *const instance, const size_t new_c /* PUBLIC FUNCTIONS */ /* ========================================================================= */ -hash_map_t *DECLARE(hash_map_create)(const size_t initial_capacity, const double load_factor, const uint32_t seed) +hash_map_t *DECLARE(hash_map_create)(const size_t initial_capacity, const double load_factor, const uint64_t seed) { hash_map_t *instance = (hash_map_t*) calloc(1U, sizeof(hash_map_t)); if (!instance) @@ -215,7 +216,7 @@ hash_map_t *DECLARE(hash_map_create)(const size_t initial_capacity, const double } instance->load_factor = (load_factor > DBL_EPSILON) ? BOUND(0.125, load_factor, 1.0) : DEFAULT_LOADFCTR; - instance->tweak = (seed ^ SEED) & UINT32_C(0x7FFFFFFF); + instance->basis = hash_initialize(seed); instance->limit = compute_limit(instance->data.capacity, instance->load_factor); return instance; @@ -241,7 +242,7 @@ errno_t DECLARE(hash_map_insert)(hash_map_t *const instance, const value_t key, return EINVAL; } - if (find_slot(&instance->data, instance->tweak, key, &index, &slot_reused)) + if (find_slot(&instance->data, instance->basis, key, &index, &slot_reused)) { instance->data.values[index] = value; return EEXIST; @@ -254,7 +255,7 @@ errno_t DECLARE(hash_map_insert)(hash_map_t *const instance, const value_t key, { return error; } - if (find_slot(&instance->data, instance->tweak, key, &index, &slot_reused)) + if (find_slot(&instance->data, instance->basis, key, &index, &slot_reused)) { return EFAULT; } @@ -278,7 +279,7 @@ errno_t DECLARE(hash_map_contains)(const hash_map_t *const instance, const value return EINVAL; } - return (instance->valid && find_slot(&instance->data, instance->tweak, key, NULL, NULL)) ? 0 : ENOENT; + return (instance->valid && find_slot(&instance->data, instance->basis, key, NULL, NULL)) ? 0 : ENOENT; } errno_t DECLARE(hash_map_get)(const hash_map_t *const instance, const value_t key, value_t *const value) @@ -290,7 +291,7 @@ errno_t DECLARE(hash_map_get)(const hash_map_t *const instance, const value_t ke return EINVAL; } - if (!find_slot(&instance->data, instance->tweak, key, &index, NULL)) + if (!find_slot(&instance->data, instance->basis, key, &index, NULL)) { return ENOENT; } @@ -308,7 +309,7 @@ errno_t DECLARE(hash_map_remove)(hash_map_t *const instance, const value_t key) return EINVAL; } - if ((!instance->valid) || (!find_slot(&instance->data, instance->tweak, key, &index, NULL))) + if ((!instance->valid) || (!find_slot(&instance->data, instance->basis, key, &index, NULL))) { return ENOENT; } diff --git a/libhashset/src/generic_hash_set.h b/libhashset/src/generic_hash_set.h index facc196..4a2e147 100644 --- a/libhashset/src/generic_hash_set.h +++ b/libhashset/src/generic_hash_set.h @@ -30,7 +30,7 @@ struct DECLARE(_hash_set) { double load_factor; size_t valid, deleted, limit; - uint32_t tweak; + uint64_t basis; hash_data_t data; }; @@ -84,12 +84,13 @@ static INLINE void free_data(hash_data_t *const data) #define INDEX(X) ((size_t)((X) % data->capacity)) -static INLINE bool_t find_slot(const hash_data_t *const data, uint64_t tweak, const value_t item, size_t *const index_out, bool_t *const reused_out) +static INLINE bool_t find_slot(const hash_data_t *const data, const uint64_t basis, const value_t item, size_t *const index_out, bool_t *const reused_out) { size_t index; bool_t is_saved = FALSE; + uint64_t loop = 0U; - for (index = INDEX(hash_compute(tweak, item)); get_flag(data->used, index); index = INDEX(hash_compute(++tweak, item))) + for (index = INDEX(hash_compute(basis, loop, item)); get_flag(data->used, index); index = INDEX(hash_compute(basis, ++loop, item))) { if (get_flag(data->deleted, index)) { @@ -168,7 +169,7 @@ static INLINE errno_t rebuild_set(hash_set_t *const instance, const size_t new_c if (IS_VALID(instance->data, k)) { const value_t item = instance->data.items[k]; - if (find_slot(&temp, instance->tweak, item, &index, NULL)) + if (find_slot(&temp, instance->basis, item, &index, NULL)) { free_data(&temp); return EFAULT; /*this should never happen!*/ @@ -189,7 +190,7 @@ static INLINE errno_t rebuild_set(hash_set_t *const instance, const size_t new_c /* PUBLIC FUNCTIONS */ /* ========================================================================= */ -hash_set_t *DECLARE(hash_set_create)(const size_t initial_capacity, const double load_factor, const uint32_t seed) +hash_set_t *DECLARE(hash_set_create)(const size_t initial_capacity, const double load_factor, const uint64_t seed) { hash_set_t *instance = (hash_set_t*) calloc(1U, sizeof(hash_set_t)); if (!instance) @@ -204,7 +205,7 @@ hash_set_t *DECLARE(hash_set_create)(const size_t initial_capacity, const double } instance->load_factor = (load_factor > DBL_EPSILON) ? BOUND(0.125, load_factor, 1.0) : DEFAULT_LOADFCTR; - instance->tweak = (seed ^ SEED) & UINT32_C(0x7FFFFFFF); + instance->basis = hash_initialize(seed); instance->limit = compute_limit(instance->data.capacity, instance->load_factor); return instance; @@ -230,7 +231,7 @@ errno_t DECLARE(hash_set_insert)(hash_set_t *const instance, const value_t item) return EINVAL; } - if (find_slot(&instance->data, instance->tweak, item, &index, &slot_reused)) + if (find_slot(&instance->data, instance->basis, item, &index, &slot_reused)) { return EEXIST; } @@ -242,7 +243,7 @@ errno_t DECLARE(hash_set_insert)(hash_set_t *const instance, const value_t item) { return error; } - if (find_slot(&instance->data, instance->tweak, item, &index, &slot_reused)) + if (find_slot(&instance->data, instance->basis, item, &index, &slot_reused)) { return EFAULT; } @@ -266,7 +267,7 @@ errno_t DECLARE(hash_set_contains)(const hash_set_t *const instance, const value return EINVAL; } - return (instance->valid && find_slot(&instance->data, instance->tweak, item, NULL, NULL)) ? 0 : ENOENT; + return (instance->valid && find_slot(&instance->data, instance->basis, item, NULL, NULL)) ? 0 : ENOENT; } errno_t DECLARE(hash_set_remove)(hash_set_t *const instance, const value_t item) @@ -278,7 +279,7 @@ errno_t DECLARE(hash_set_remove)(hash_set_t *const instance, const value_t item) return EINVAL; } - if ((!instance->valid) || (!find_slot(&instance->data, instance->tweak, item, &index, NULL))) + if ((!instance->valid) || (!find_slot(&instance->data, instance->basis, item, &index, NULL))) { return ENOENT; }