From caef45b17781d4b56b191ee06b731c7776b1de0d Mon Sep 17 00:00:00 2001 From: LoRd_MuldeR Date: Tue, 22 Nov 2022 23:30:57 +0100 Subject: [PATCH] Various improvements. --- hashset/src/main.c | 212 +++++++++++++++++++++++++++++--------- libhashset/src/hash_set.c | 58 ++++++----- 2 files changed, 198 insertions(+), 72 deletions(-) diff --git a/hashset/src/main.c b/hashset/src/main.c index fff4008..ed2fb88 100644 --- a/hashset/src/main.c +++ b/hashset/src/main.c @@ -3,41 +3,75 @@ /* This work has been released under the CC0 1.0 Universal license! */ /******************************************************************************/ -#define _CRT_RAND_S 1 +#include "hash_set.h" #include -#include #include #include -#include "hash_set.h" +#ifndef _WIN32 +# include +#endif -static uint64_t next_rand(void) +#if defined(__GNUC__) +# define INLINE __inline__ +#elif defined(_MSC_VER) +# define INLINE __inline +#else +# define INLINE +#endif + +/* ========================================================================= */ +/* Utilities */ +/* ========================================================================= */ + +#define ARRAY_SIZE(X) (sizeof(X) / sizeof((X)[0U])) + +#ifdef _WIN32 +unsigned char __stdcall SystemFunction036(void *buffer, unsigned long length); +static int getentropy(void* const buffer, const size_t length) { - uint32_t a, b; - if (rand_s(&a) || rand_s(&b)) - { - abort(); - } - return (((uint64_t)a) << 32) | b; + return SystemFunction036(buffer, (unsigned long)length) ? 0 : (-1); } -#define MAXIMUM 393216U +#endif -int main() +static INLINE uint64_t random_uint64(void) { - uint64_t rnd; - size_t capacity, valid, deleted, limit; - uint8_t spinner; - clock_t last_update; + static size_t offset = SIZE_MAX; + static uint64_t buffer[16U]; - hash_set_t *const hash_set = hash_set_create(0U, -1.0, HASHSET_OPT_FAILFAST); - if (!hash_set) + if (offset >= ARRAY_SIZE(buffer)) { - puts("Allocation has failed!"); - return EXIT_FAILURE; + offset = 0U; + if (getentropy(&buffer, sizeof(buffer)) < 0) + { + abort(); + } } - for (size_t r = 0U; r < 5U; ++r) + return buffer[offset++]; +} + +#define PRINT_SET_INFO() do \ +{\ + if (!hash_set_info(hash_set, &capacity, &valid, &deleted, &limit)) \ + { \ + printf("capacity: %010zu, valid: %010zu, deleted: %010zu, limit: %010zu\n", capacity, valid, deleted, limit); \ + } \ +} \ +while(0) + +/* ========================================================================= */ +/* TEST #1 */ +/* ========================================================================= */ + +#define MAXIMUM 425984U + +static int test_function_1(hash_set_t *const hash_set) +{ + size_t capacity, valid, deleted, limit; + + for (size_t r = 0U; r < 3U; ++r) { for (uint64_t i = 0; i < MAXIMUM; ++i) { @@ -50,10 +84,7 @@ int main() return EXIT_FAILURE; } } - if (!hash_set_info(hash_set, &capacity, &valid, &deleted, &limit)) - { - printf("%010zu, %010zu, %010zu, %010zu\n", capacity, valid, deleted, limit); - } + PRINT_SET_INFO(); } if (hash_set_size(hash_set) != MAXIMUM - 4U) @@ -102,10 +133,7 @@ int main() return EXIT_FAILURE; } } - if (!hash_set_info(hash_set, &capacity, &valid, &deleted, &limit)) - { - printf("%010zu, %010zu, %010zu, %010zu\n", capacity, valid, deleted, limit); - } + PRINT_SET_INFO(); } if (hash_set_size(hash_set) != 1U) @@ -142,20 +170,27 @@ int main() return EXIT_FAILURE; } - if (!hash_set_info(hash_set, &capacity, &valid, &deleted, &limit)) - { - printf("%010zu, %010zu, %010zu, %010zu\n", capacity, valid, deleted, limit); - } - - puts("-----"); + PRINT_SET_INFO(); + puts("--------"); } - spinner = 0U; - last_update = clock(); + return EXIT_SUCCESS; +} + +/* ========================================================================= */ +/* TEST #2 */ +/* ========================================================================= */ + +static int test_function_2(hash_set_t *const hash_set) +{ + size_t capacity, valid, deleted, limit; + uint8_t spinner = 0U; + clock_t last_update = clock(); for (;;) { - const errno_t error = hash_set_insert(hash_set, rnd = next_rand() & 0x3FFFFFFFFFFFFFFllu); + const uint64_t rnd = random_uint64() & UINT64_C(0x3FFFFFFFFFFFFFF); + const errno_t error = hash_set_insert(hash_set, rnd); if (error) { if (error != EEXIST) @@ -165,31 +200,112 @@ int main() } else { + PRINT_SET_INFO(); printf("Collision detected! [%016llX]\n", rnd); break; } } if (!(++spinner & 0x7F)) { - const clock_t now = clock(); - if ((now < last_update) || (now >= last_update + CLOCKS_PER_SEC)) + const clock_t clock_now = clock(); + if ((clock_now < last_update) || (clock_now >= last_update + CLOCKS_PER_SEC)) { - if (!hash_set_info(hash_set, &capacity, &valid, &deleted, &limit)) - { - printf("%010zu, %010zu, %010zu, %010zu\n", capacity, valid, deleted, limit); - } - last_update = now; + PRINT_SET_INFO(); + last_update = clock_now; } } } - if (!hash_set_info(hash_set, &capacity, &valid, &deleted, &limit)) + PRINT_SET_INFO(); + + if (hash_set_clear(hash_set)) { - printf("%010zu, %010zu, %010zu, %010zu\n", capacity, valid, deleted, limit); + puts("Clear operation has failed!"); + return EXIT_FAILURE; + } + + if (hash_set_shrink(hash_set)) + { + puts("Shrink operation has failed!"); + return EXIT_FAILURE; + } + + PRINT_SET_INFO(); + puts("--------"); + + return EXIT_SUCCESS; +} + +/* ========================================================================= */ +/* TEST #3 */ +/* ========================================================================= */ + +static int test_function_3(hash_set_t *const hash_set) +{ + size_t capacity, valid, deleted, limit; + uint8_t spinner = 0U; + clock_t last_update = clock(); + + for (uint64_t value = 0U; value < ((uint64_t)INT32_MAX); ++value) + { + const errno_t error = hash_set_insert(hash_set, value); + if (error) + { + PRINT_SET_INFO(); + printf("Insert operation has failed! (error: %d)\n", error); + return EXIT_FAILURE; + } + if (!(++spinner & 0x7F)) + { + const clock_t clock_now = clock(); + if ((clock_now < last_update) || (clock_now >= last_update + CLOCKS_PER_SEC)) + { + PRINT_SET_INFO(); + last_update = clock_now; + } + } + } + + PRINT_SET_INFO(); + puts("--------"); + + return EXIT_SUCCESS; +} + +/* ========================================================================= */ +/* MAIN */ +/* ========================================================================= */ + +int main() +{ + hash_set_t *const hash_set = hash_set_create(0U, -1.0, 0U /*HASHSET_OPT_FAILFAST*/); + if (!hash_set) + { + puts("Allocation has failed!"); + return EXIT_FAILURE; + } + + if (test_function_1(hash_set) != EXIT_SUCCESS) + { + goto failure; + } + + if (test_function_2(hash_set) != EXIT_SUCCESS) + { + goto failure; + } + + if (test_function_3(hash_set) != EXIT_SUCCESS) + { + goto failure; } hash_set_destroy(hash_set); puts("Test completed successfully."); - return EXIT_SUCCESS; + +failure: + hash_set_destroy(hash_set); + puts("Something went wrong !!!"); + return EXIT_FAILURE; } diff --git a/libhashset/src/hash_set.c b/libhashset/src/hash_set.c index e24ef2d..e44e76d 100644 --- a/libhashset/src/hash_set.c +++ b/libhashset/src/hash_set.c @@ -8,6 +8,7 @@ /* CRT */ #include #include +#include typedef int bool_t; #define TRUE 1 @@ -40,6 +41,7 @@ struct _hash_set static const size_t MINIMUM_CAPACITY = 128U; static const size_t DEFAULT_CAPACITY = 16384U; +static const double DEFAULT_LOADFCTR = 0.8125; /* ========================================================================= */ /* PRIVATE FUNCTIONS */ @@ -55,6 +57,12 @@ static const size_t DEFAULT_CAPACITY = 16384U; /* Math */ /* ------------------------------------------------- */ +static FORCE_INLINE size_t safe_mult(const size_t a, const size_t b) +{ + const size_t result = a * b; + return ((a == 0U) || (result / a == b)) ? result : SIZE_MAX; +} + static FORCE_INLINE size_t div_ceil(const size_t value, const size_t divisor) { return (value / divisor) + ((value % divisor != 0U) ? 1U : 0U); @@ -65,11 +73,6 @@ static FORCE_INLINE size_t round_sz(double d) return (d >= 0.0) ? ((d + 0.5 >= ((double)SIZE_MAX)) ? SIZE_MAX : ((size_t)(d + 0.5))) : 0U; } -static FORCE_INLINE size_t safe_mult2(const size_t value) -{ - return (value < (SIZE_MAX >> 1)) ? (value << 1) : SIZE_MAX; -} - static FORCE_INLINE size_t safe_add(const size_t a, const size_t b) { return ((SIZE_MAX - a) > b) ? (a + b) : SIZE_MAX; @@ -91,7 +94,7 @@ static FORCE_INLINE size_t next_pow2(const size_t target) while (result < target) { - result = safe_mult2(result); + result = safe_mult(result, 2U); } return result; @@ -101,19 +104,21 @@ static FORCE_INLINE size_t next_pow2(const size_t target) /* Hash function */ /* ------------------------------------------------- */ -#define HASH_OFFSET UINT64_C(14695981039346656037) - -#define INDEX(X,Y) ((size_t)((X) % (Y))) - -static FORCE_INLINE uint64_t hash_compute(uint64_t hash, uint64_t value) +static FORCE_INLINE void hash_update(uint64_t *const hash, uint64_t value) { do { - hash ^= value & 0xFF; - hash *= UINT64_C(1099511628211); + *hash ^= value & 0xFF; + *hash *= UINT64_C(1099511628211); } while (value >>= CHAR_BIT); +} +static INLINE uint64_t hash_compute(const uint64_t i, const uint64_t value) +{ + uint64_t hash = UINT64_C(14695981039346656037); + hash_update(&hash, i); + hash_update(&hash, value); return hash; } @@ -121,9 +126,14 @@ static FORCE_INLINE uint64_t hash_compute(uint64_t hash, uint64_t value) /* Allocation */ /* ------------------------------------------------- */ +static INLINE void zero_memory(void* const addr, const size_t count, const size_t size) +{ + memset(addr, 0, safe_mult(count, size)); +} + static INLINE bool_t alloc_data(struct _hash_set_data *const data, const size_t capacity) { - memset(data, 0, sizeof(struct _hash_set_data)); + zero_memory(data, 1U, sizeof(struct _hash_set_data)); data->values = (uint64_t*) calloc(capacity, sizeof(uint64_t)); if (!data->values) @@ -184,15 +194,15 @@ static INLINE void clear_flag(uint8_t* const flags, const size_t index) /* Set functions */ /* ------------------------------------------------- */ +#define INDEX(X) ((size_t)((X) % data->capacity)) + static INLINE bool_t find_slot(const struct _hash_set_data *const data, const uint64_t value, size_t *const index_out) { + uint64_t loop = 0U; size_t index; bool_t index_saved = FALSE; - uint64_t tweak = 0U; - const uint64_t hash = hash_compute(HASH_OFFSET, value); - - for (index = INDEX(hash, data->capacity); get_flag(data->used, index); index = INDEX(hash_compute(hash, tweak++), data->capacity)) + for (index = INDEX(hash_compute(loop, value)); get_flag(data->used, index); index = INDEX(hash_compute(++loop, value))) { if (!get_flag(data->deleted, index)) { @@ -295,7 +305,7 @@ hash_set_t *hash_set_create(const size_t initial_capacity, const double load_fac return NULL; } - instance->load_factor = (load_factor > 0.0) ? BOUND(0.125, load_factor, 1.0) : 0.8; + instance->load_factor = (load_factor > DBL_EPSILON) ? BOUND(0.125, load_factor, 1.0) : DEFAULT_LOADFCTR; instance->options = options; instance->limit = round_sz(instance->data.capacity * instance->load_factor); @@ -307,7 +317,7 @@ void hash_set_destroy(hash_set_t *instance) if (instance) { free_data(&instance->data); - memset(instance, 0, sizeof(hash_set_t)); + zero_memory(instance, 1U, sizeof(hash_set_t)); SAFE_FREE(instance); } } @@ -344,7 +354,7 @@ errno_t hash_set_insert(hash_set_t *const instance, const uint64_t value) } else { - const errno_t error = rebuild_set(instance, safe_mult2(instance->data.capacity)); + const errno_t error = rebuild_set(instance, safe_mult(instance->data.capacity, 2U)); if (error) { if (error == ENOMEM) @@ -414,10 +424,10 @@ errno_t hash_set_clear(hash_set_t *const instance) return EAGAIN; } - memset(instance->data.used, 0, sizeof(uint8_t) * div_ceil(instance->data.capacity, 8U)); - memset(instance->data.deleted, 0, sizeof(uint8_t) * div_ceil(instance->data.capacity, 8U)); - instance->valid = instance->deleted = 0U; + zero_memory(instance->data.used, div_ceil(instance->data.capacity, 8U), sizeof(uint8_t)); + zero_memory(instance->data.deleted, div_ceil(instance->data.capacity, 8U), sizeof(uint8_t)); + instance->valid = instance->deleted = 0U; return 0; }