LibHashSet/libhashset/src/generic_hash_set.h

403 lines
9.1 KiB
C
Raw Normal View History

2022-11-19 17:34:56 +01:00
/******************************************************************************/
/* HashSet for C99, by LoRd_MuldeR <MuldeR2@GMX.de> */
/* This work has been released under the CC0 1.0 Universal license! */
/******************************************************************************/
2022-11-29 21:22:23 +01:00
#ifndef _LIBHASHSET_GENERIC_SET_INCLUDED
#define _LIBHASHSET_GENERIC_SET_INCLUDED
2022-11-19 17:34:56 +01:00
2022-11-28 02:26:52 +01:00
#include "common.h"
#ifndef NAME_SUFFIX
#error NAME_SUFFIX must be defined!
#endif
2022-11-28 02:26:52 +01:00
#define DECLARE(X) CONCAT(X,NAME_SUFFIX)
2022-11-20 18:02:12 +01:00
/* ------------------------------------------------- */
/* Data types */
2022-11-20 18:02:12 +01:00
/* ------------------------------------------------- */
2022-11-19 17:34:56 +01:00
2022-11-28 02:26:52 +01:00
typedef struct DECLARE(_hash_set_data)
2022-11-21 15:02:07 +01:00
{
value_t *items;
uint8_t *used, *deleted;
size_t capacity;
2022-11-28 02:26:52 +01:00
}
hash_data_t;
2022-11-25 02:53:34 +01:00
2022-11-28 02:26:52 +01:00
struct DECLARE(_hash_set)
2022-11-19 17:34:56 +01:00
{
double load_factor;
size_t valid, deleted, limit;
2022-12-02 00:38:43 +01:00
uint32_t tweak;
2022-11-28 02:26:52 +01:00
hash_data_t data;
};
2022-11-19 17:34:56 +01:00
2022-11-20 18:02:12 +01:00
/* ------------------------------------------------- */
/* Allocation */
/* ------------------------------------------------- */
2022-11-28 02:26:52 +01:00
static INLINE bool_t alloc_data(hash_data_t *const data, const size_t capacity)
2022-11-19 17:34:56 +01:00
{
2022-11-28 02:26:52 +01:00
zero_memory(data, 1U, sizeof(hash_data_t));
2022-11-24 15:45:12 +01:00
data->items = (value_t*) calloc(capacity, sizeof(value_t));
2022-11-29 21:22:23 +01:00
if (!data->items)
2022-11-19 17:34:56 +01:00
{
2022-11-20 14:21:55 +01:00
return FALSE;
2022-11-19 17:34:56 +01:00
}
2022-11-20 18:02:12 +01:00
data->used = (uint8_t*) calloc(div_ceil(capacity, 8U), sizeof(uint8_t));
2022-11-19 17:34:56 +01:00
if (!data->used)
{
2022-11-29 21:22:23 +01:00
SAFE_FREE(data->items);
2022-11-20 14:21:55 +01:00
return FALSE;
}
2022-11-24 15:45:12 +01:00
data->deleted = (uint8_t*) calloc(div_ceil(capacity, 8U), sizeof(uint8_t));
if (!data->deleted)
{
SAFE_FREE(data->used);
2022-11-29 21:22:23 +01:00
SAFE_FREE(data->items);
2022-11-20 14:21:55 +01:00
return FALSE;
2022-11-19 17:34:56 +01:00
}
data->capacity = capacity;
2022-11-20 14:21:55 +01:00
return TRUE;
2022-11-19 17:34:56 +01:00
}
2022-11-28 02:26:52 +01:00
static INLINE void free_data(hash_data_t *const data)
2022-11-19 17:34:56 +01:00
{
if (data)
{
2022-11-29 21:22:23 +01:00
SAFE_FREE(data->items);
SAFE_FREE(data->used);
SAFE_FREE(data->deleted);
2022-11-19 18:42:09 +01:00
data->capacity = 0U;
2022-11-19 17:34:56 +01:00
}
}
2022-11-20 18:02:12 +01:00
/* ------------------------------------------------- */
/* Set functions */
/* ------------------------------------------------- */
2022-11-22 23:30:57 +01:00
#define INDEX(X) ((size_t)((X) % data->capacity))
2022-12-02 00:38:43 +01:00
static INLINE bool_t find_slot(const hash_data_t *const data, uint64_t tweak, const value_t item, size_t *const index_out, bool_t *const reused_out)
2022-11-19 17:34:56 +01:00
{
size_t index;
2022-12-02 00:38:43 +01:00
bool_t is_saved = FALSE;
2022-11-22 17:00:43 +01:00
2022-12-02 00:38:43 +01:00
for (index = INDEX(hash_compute(tweak, item)); get_flag(data->used, index); index = INDEX(hash_compute(++tweak, item)))
2022-11-19 17:34:56 +01:00
{
2022-11-24 15:45:12 +01:00
if (get_flag(data->deleted, index))
2022-11-19 17:34:56 +01:00
{
2022-11-24 15:45:12 +01:00
if (!is_saved)
2022-11-19 22:34:33 +01:00
{
2022-11-24 15:45:12 +01:00
SAFE_SET(index_out, index);
SAFE_SET(reused_out, TRUE);
is_saved = TRUE;
2022-11-19 22:34:33 +01:00
}
2022-11-19 17:34:56 +01:00
}
2022-11-24 15:45:12 +01:00
else
2022-11-19 17:34:56 +01:00
{
2022-11-29 21:22:23 +01:00
if (data->items[index] == item)
2022-11-24 15:45:12 +01:00
{
SAFE_SET(index_out, index);
SAFE_SET(reused_out, FALSE);
return TRUE;
}
2022-11-19 17:34:56 +01:00
}
}
2022-11-24 15:45:12 +01:00
if (!is_saved)
2022-11-19 22:34:33 +01:00
{
2022-11-24 15:45:12 +01:00
SAFE_SET(index_out, index);
SAFE_SET(reused_out, FALSE);
2022-11-19 22:34:33 +01:00
}
2022-11-20 14:21:55 +01:00
return FALSE;
2022-11-19 17:34:56 +01:00
}
static INLINE void put_item(hash_data_t *const data, const size_t index, const value_t item, const bool_t reusing)
2022-11-19 17:34:56 +01:00
{
2022-11-29 21:22:23 +01:00
data->items[index] = item;
2022-11-24 15:45:12 +01:00
if (reusing)
2022-11-19 18:42:09 +01:00
{
2022-11-24 15:45:12 +01:00
assert(get_flag(data->used, index));
clear_flag(data->deleted, index);
2022-11-19 18:42:09 +01:00
}
else
{
2022-11-24 15:45:12 +01:00
assert(!get_flag(data->deleted, index));
set_flag(data->used, index);
}
2022-11-19 17:34:56 +01:00
}
2022-11-27 21:01:46 +01:00
static INLINE size_t compute_limit(const size_t capacity, const double load_factor)
2022-11-24 15:45:12 +01:00
{
size_t limit = round_sz(capacity * load_factor);
2022-11-25 13:06:54 +01:00
2022-11-24 15:45:12 +01:00
while (capacity && (limit >= capacity))
{
limit = safe_decr(limit);
}
2022-11-25 13:06:54 +01:00
2022-11-24 15:45:12 +01:00
return limit;
}
static INLINE errno_t rebuild_set(hash_set_t *const instance, const size_t new_capacity)
2022-11-19 17:34:56 +01:00
{
2022-11-28 02:26:52 +01:00
size_t k, index;
hash_data_t temp;
2022-11-19 20:28:13 +01:00
2022-11-21 17:14:11 +01:00
if (new_capacity < instance->valid)
{
return EINVAL;
}
2022-11-19 17:34:56 +01:00
if (!alloc_data(&temp, new_capacity))
{
2022-11-19 20:28:13 +01:00
return ENOMEM;
2022-11-19 17:34:56 +01:00
}
2022-11-19 20:28:13 +01:00
for (k = 0U; k < instance->data.capacity; ++k)
2022-11-19 17:34:56 +01:00
{
if (IS_VALID(instance->data, k))
2022-11-19 17:34:56 +01:00
{
const value_t item = instance->data.items[k];
if (find_slot(&temp, instance->tweak, item, &index, NULL))
2022-11-19 18:42:09 +01:00
{
2022-11-19 20:28:13 +01:00
free_data(&temp);
2022-11-24 15:45:12 +01:00
return EFAULT; /*this should never happen!*/
2022-11-19 18:42:09 +01:00
}
2022-11-29 21:22:23 +01:00
put_item(&temp, index, item, FALSE);
2022-11-19 17:34:56 +01:00
}
}
2022-11-19 18:42:09 +01:00
free_data(&instance->data);
instance->data = temp;
2022-11-24 15:45:12 +01:00
instance->limit = compute_limit(instance->data.capacity, instance->load_factor);
instance->deleted = 0U;
2022-11-19 20:28:13 +01:00
return 0;
2022-11-19 17:34:56 +01:00
}
/* ========================================================================= */
/* PUBLIC FUNCTIONS */
/* ========================================================================= */
2022-12-02 00:38:43 +01:00
hash_set_t *DECLARE(hash_set_create)(const size_t initial_capacity, const double load_factor, const uint32_t seed)
2022-11-19 17:34:56 +01:00
{
hash_set_t *instance = (hash_set_t*) calloc(1U, sizeof(hash_set_t));
2022-11-19 17:34:56 +01:00
if (!instance)
{
return NULL;
}
if (!alloc_data(&instance->data, (initial_capacity > 0U) ? next_pow2(initial_capacity) : DEFAULT_CAPACITY))
2022-11-19 17:34:56 +01:00
{
SAFE_FREE(instance);
2022-11-19 17:34:56 +01:00
return NULL;
}
2022-11-22 23:30:57 +01:00
instance->load_factor = (load_factor > DBL_EPSILON) ? BOUND(0.125, load_factor, 1.0) : DEFAULT_LOADFCTR;
2022-12-02 00:38:43 +01:00
instance->tweak = (seed ^ SEED) & UINT32_C(0x7FFFFFFF);
2022-11-24 15:45:12 +01:00
instance->limit = compute_limit(instance->data.capacity, instance->load_factor);
2022-11-19 20:28:13 +01:00
2022-11-19 17:34:56 +01:00
return instance;
}
2022-11-28 02:26:52 +01:00
void DECLARE(hash_set_destroy)(hash_set_t *instance)
2022-11-19 17:34:56 +01:00
{
if (instance)
{
free_data(&instance->data);
zero_memory(instance, 1U, sizeof(hash_set_t));
SAFE_FREE(instance);
2022-11-19 17:34:56 +01:00
}
}
errno_t DECLARE(hash_set_insert)(hash_set_t *const instance, const value_t item)
2022-11-19 17:34:56 +01:00
{
2022-11-19 20:28:13 +01:00
size_t index;
2022-11-24 15:45:12 +01:00
bool_t slot_reused;
2022-11-19 20:28:13 +01:00
2022-11-29 21:22:23 +01:00
if ((!instance) || (!instance->data.items))
2022-11-19 17:34:56 +01:00
{
2022-11-19 20:28:13 +01:00
return EINVAL;
2022-11-19 17:34:56 +01:00
}
if (find_slot(&instance->data, instance->tweak, item, &index, &slot_reused))
2022-11-19 17:34:56 +01:00
{
2022-11-19 20:28:13 +01:00
return EEXIST;
2022-11-19 17:34:56 +01:00
}
2022-11-24 15:45:12 +01:00
if ((!slot_reused) && (safe_add(instance->valid, instance->deleted) >= instance->limit))
{
2022-11-25 02:53:34 +01:00
const errno_t error = rebuild_set(instance, safe_times2(instance->data.capacity));
2022-11-24 15:45:12 +01:00
if (error)
2022-11-19 17:34:56 +01:00
{
2022-11-24 15:45:12 +01:00
return error;
2022-11-19 17:34:56 +01:00
}
if (find_slot(&instance->data, instance->tweak, item, &index, &slot_reused))
2022-11-19 17:34:56 +01:00
{
2022-11-24 15:45:12 +01:00
return EFAULT;
2022-11-19 17:34:56 +01:00
}
}
2022-11-29 21:22:23 +01:00
put_item(&instance->data, index, item, slot_reused);
2022-11-24 15:45:12 +01:00
instance->valid = safe_incr(instance->valid);
if (slot_reused)
{
instance->deleted = safe_decr(instance->deleted);
}
2022-11-19 20:28:13 +01:00
return 0;
2022-11-19 17:34:56 +01:00
}
errno_t DECLARE(hash_set_contains)(const hash_set_t *const instance, const value_t item)
2022-11-19 17:34:56 +01:00
{
2022-11-29 21:22:23 +01:00
if ((!instance) || (!instance->data.items))
2022-11-19 17:34:56 +01:00
{
2022-11-19 20:28:13 +01:00
return EINVAL;
2022-11-19 17:34:56 +01:00
}
return (instance->valid && find_slot(&instance->data, instance->tweak, item, NULL, NULL)) ? 0 : ENOENT;
2022-11-19 17:34:56 +01:00
}
errno_t DECLARE(hash_set_remove)(hash_set_t *const instance, const value_t item)
{
size_t index;
2022-11-29 21:22:23 +01:00
if ((!instance) || (!instance->data.items))
{
return EINVAL;
}
if ((!instance->valid) || (!find_slot(&instance->data, instance->tweak, item, &index, NULL)))
{
return ENOENT;
}
set_flag(instance->data.deleted, index);
2022-11-21 15:02:07 +01:00
instance->deleted = safe_incr(instance->deleted);
instance->valid = safe_decr(instance->valid);
2022-11-24 15:45:12 +01:00
if (!instance->valid)
{
2022-11-28 02:26:52 +01:00
return DECLARE(hash_set_clear)(instance);
2022-11-24 15:45:12 +01:00
}
if (instance->deleted > (instance->limit / 2U))
{
2022-11-25 02:53:34 +01:00
const size_t min_capacity = next_pow2(round_sz(safe_incr(instance->valid) / instance->load_factor));
const errno_t error = rebuild_set(instance, (instance->data.capacity > min_capacity) ? min_capacity : instance->data.capacity);
2022-11-24 15:45:12 +01:00
if (error && (error != ENOMEM))
{
return error;
}
}
return 0;
}
2022-11-28 02:26:52 +01:00
errno_t DECLARE(hash_set_clear)(hash_set_t *const instance)
{
2022-11-29 21:22:23 +01:00
if ((!instance) || (!instance->data.items))
{
return EINVAL;
}
2022-11-25 02:53:34 +01:00
if (instance->valid || instance->deleted)
{
const size_t count = div_ceil(instance->data.capacity, 8U);
instance->valid = instance->deleted = 0U;
zero_memory(instance->data.used, count, sizeof(uint8_t));
2022-11-25 02:53:34 +01:00
zero_memory(instance->data.deleted, count, sizeof(uint8_t));
}
else
{
return EAGAIN;
}
if (instance->data.capacity > MINIMUM_CAPACITY)
{
2022-11-24 15:45:12 +01:00
const errno_t error = rebuild_set(instance, MINIMUM_CAPACITY);
if (error && (error != ENOMEM))
{
2022-11-24 15:45:12 +01:00
return error;
}
}
2022-11-24 15:45:12 +01:00
return 0;
}
errno_t DECLARE(hash_set_iterate)(const hash_set_t *const instance, size_t *const cursor, value_t *const item)
{
size_t index;
2022-11-29 21:22:23 +01:00
if ((!instance) || (!cursor) || (*cursor >= SIZE_MAX) || (!instance->data.items))
{
return EINVAL;
}
2022-11-29 21:22:23 +01:00
for (index = *cursor; index < instance->data.capacity; ++index)
{
if (IS_VALID(instance->data, index))
{
2022-12-01 16:53:36 +01:00
SAFE_SET(item, instance->data.items[index]);
2022-11-29 21:22:23 +01:00
*cursor = index + 1U;
return 0;
}
}
2022-11-29 21:22:23 +01:00
*cursor = SIZE_MAX;
return ENOENT;
}
2022-11-28 02:26:52 +01:00
size_t DECLARE(hash_set_size)(const hash_set_t *const instance)
2022-11-19 17:34:56 +01:00
{
2022-11-21 15:02:07 +01:00
return instance ? instance->valid : 0U;
}
2022-11-28 02:26:52 +01:00
errno_t DECLARE(hash_set_info)(const hash_set_t *const instance, size_t *const capacity, size_t *const valid, size_t *const deleted, size_t *const limit)
2022-11-21 15:02:07 +01:00
{
2022-11-29 21:22:23 +01:00
if ((!instance) || (!instance->data.items))
2022-11-21 15:02:07 +01:00
{
return EINVAL;
}
2022-12-01 16:53:36 +01:00
SAFE_SET(capacity, instance->data.capacity);
SAFE_SET(valid, instance->valid);
SAFE_SET(deleted, instance->deleted);
SAFE_SET(limit, instance->limit);
2022-11-21 15:02:07 +01:00
return 0;
2022-11-19 17:34:56 +01:00
}
HASHSET_API errno_t DECLARE(hash_set_dump)(const hash_set_t *const instance, const hash_set_callback_t callback)
{
size_t index;
if ((!instance) || (!instance->data.items) || (!callback))
{
return EINVAL;
}
for (index = 0U; index < instance->data.capacity; ++index)
{
2022-11-29 21:22:23 +01:00
if (!callback(index, get_flag(instance->data.used, index) ? (get_flag(instance->data.deleted, index) ? 'd' : 'v') : 'u', instance->data.items[index]))
{
return ECANCELED;
}
}
return 0;
}
2022-11-29 21:22:23 +01:00
#endif /*_LIBHASHSET_GENERIC_SET_INCLUDED*/