299 lines
6.2 KiB
C
Raw Normal View History

2022-11-19 17:34:56 +01:00
/******************************************************************************/
/* HashSet for C99, by LoRd_MuldeR <MuldeR2@GMX.de> */
/* This work has been released under the CC0 1.0 Universal license! */
/******************************************************************************/
#include "hash_set.h"
/* CRT */
#include <string.h>
2022-11-19 20:28:13 +01:00
#include <errno.h>
#include <stdbool.h>
2022-11-19 17:34:56 +01:00
#if defined(__GNUC__)
# define INLINE __inline__
#elif defined(_MSC_VER)
# define INLINE __inline
#else
# define INLINE
#endif
struct _hash_set_data
{
uint64_t *values;
uint8_t *used;
size_t capacity;
};
struct _hash_set
{
double load_factor;
2022-11-19 23:21:14 +01:00
uint16_t options;
2022-11-19 17:34:56 +01:00
size_t size, limit;
2022-11-19 18:42:09 +01:00
struct _hash_set_data data;
2022-11-19 17:34:56 +01:00
};
#define BOUND(MIN,VAL,MAX) (((VAL) < (MIN)) ? (MIN) : (((VAL) > (MAX)) ? (MAX) : (VAL)))
/* ========================================================================= */
/* PRIVATE FUNCTIONS */
/* ========================================================================= */
2022-11-19 20:28:13 +01:00
static INLINE size_t hash(const uint64_t value, const size_t capacity)
{
return (size_t) (((UINT64_C(14695981039346656037) + value) * UINT64_C(1099511628211)) % capacity);
}
2022-11-19 22:34:33 +01:00
static INLINE size_t safe_mult2(const size_t value)
2022-11-19 17:34:56 +01:00
{
2022-11-19 18:42:09 +01:00
return (value < (SIZE_MAX >> 1)) ? (value << 1) : SIZE_MAX;
2022-11-19 17:34:56 +01:00
}
static INLINE size_t round(double d)
{
return (d >= 0.0) ? ((size_t)(d + 0.5)) : ((size_t)(d - ((double)((size_t)(d - 1))) + 0.5)) + ((size_t)(d - 1));
}
static INLINE size_t next_pow2(const size_t minimum)
{
size_t result = 2U;
2022-11-19 20:28:13 +01:00
2022-11-19 17:34:56 +01:00
while (result < minimum)
{
2022-11-19 18:42:09 +01:00
result = safe_mult2(result);
2022-11-19 17:34:56 +01:00
}
2022-11-19 20:28:13 +01:00
2022-11-19 17:34:56 +01:00
return result;
}
2022-11-19 20:28:13 +01:00
static INLINE bool alloc_data(struct _hash_set_data *const data, const size_t capacity)
2022-11-19 17:34:56 +01:00
{
memset(data, 0, sizeof(struct _hash_set_data));
data->values = (uint64_t*) calloc(capacity, sizeof(uint64_t));
if (!data->values)
{
2022-11-19 20:28:13 +01:00
return false;
2022-11-19 17:34:56 +01:00
}
2022-11-19 22:34:33 +01:00
data->used = (uint8_t*) calloc((capacity / 8U) + ((capacity % 8U != 0U) ? 1U : 0U), sizeof(uint8_t));
2022-11-19 17:34:56 +01:00
if (!data->used)
{
free(data->values);
data->values = NULL;
2022-11-19 20:28:13 +01:00
return false;
2022-11-19 17:34:56 +01:00
}
data->capacity = capacity;
2022-11-19 20:28:13 +01:00
return true;
2022-11-19 17:34:56 +01:00
}
static INLINE void free_data(struct _hash_set_data *const data)
{
if (data)
{
2022-11-19 18:42:09 +01:00
data->capacity = 0U;
2022-11-19 17:34:56 +01:00
if (data->values)
{
free(data->values);
data->values = NULL;
}
if (data->used)
{
free(data->used);
data->used = NULL;
}
}
}
2022-11-19 20:28:13 +01:00
static INLINE bool is_used(struct _hash_set_data *const data, const size_t index)
2022-11-19 17:34:56 +01:00
{
return (data->used[index / 8U] >> (index % 8U)) & 1U;
}
2022-11-19 22:34:33 +01:00
static INLINE bool find_slot(struct _hash_set_data* const data, const uint64_t value, size_t *const index_out)
2022-11-19 17:34:56 +01:00
{
2022-11-19 22:34:33 +01:00
size_t index = hash(value, data->capacity);
2022-11-19 17:34:56 +01:00
2022-11-19 22:34:33 +01:00
while (is_used(data, index))
2022-11-19 17:34:56 +01:00
{
2022-11-19 22:34:33 +01:00
if (data->values[index] == value)
2022-11-19 17:34:56 +01:00
{
2022-11-19 22:34:33 +01:00
if (index_out)
{
*index_out = index;
}
2022-11-19 20:28:13 +01:00
return true;
2022-11-19 17:34:56 +01:00
}
2022-11-19 22:34:33 +01:00
if (++index >= data->capacity)
2022-11-19 17:34:56 +01:00
{
2022-11-19 22:34:33 +01:00
index = 0U;
2022-11-19 17:34:56 +01:00
}
}
2022-11-19 22:34:33 +01:00
if (index_out)
{
*index_out = index;
}
2022-11-19 20:28:13 +01:00
return false;
2022-11-19 17:34:56 +01:00
}
2022-11-19 20:28:13 +01:00
static INLINE bool insert_value(struct _hash_set_data *const data, const size_t index, const uint64_t value)
2022-11-19 17:34:56 +01:00
{
2022-11-19 18:42:09 +01:00
if (is_used(data, index))
{
2022-11-19 20:28:13 +01:00
return false;
2022-11-19 18:42:09 +01:00
}
2022-11-19 17:34:56 +01:00
2022-11-19 18:42:09 +01:00
data->values[index] = value;
data->used[index / 8U] |= UINT8_C(1) << (index % 8U);
2022-11-19 20:28:13 +01:00
return true;
2022-11-19 17:34:56 +01:00
}
2022-11-19 20:28:13 +01:00
static INLINE errno_t grow_set(hash_set_t *const instance, const size_t new_capacity)
2022-11-19 17:34:56 +01:00
{
struct _hash_set_data temp;
2022-11-19 20:28:13 +01:00
size_t index, k;
2022-11-19 17:34:56 +01:00
if (!alloc_data(&temp, new_capacity))
{
2022-11-19 20:28:13 +01:00
return ENOMEM;
2022-11-19 17:34:56 +01:00
}
2022-11-19 20:28:13 +01:00
for (k = 0U; k < instance->data.capacity; ++k)
2022-11-19 17:34:56 +01:00
{
2022-11-19 20:28:13 +01:00
if (is_used(&instance->data, k))
2022-11-19 17:34:56 +01:00
{
2022-11-19 20:28:13 +01:00
const uint64_t value = instance->data.values[k];
2022-11-19 22:34:33 +01:00
if (find_slot(&temp, value, &index))
2022-11-19 17:34:56 +01:00
{
2022-11-19 20:28:13 +01:00
free_data(&temp);
return EFAULT;
2022-11-19 17:34:56 +01:00
}
2022-11-19 18:42:09 +01:00
if (!insert_value(&temp, index, value))
{
2022-11-19 20:28:13 +01:00
free_data(&temp);
return EFAULT;
2022-11-19 18:42:09 +01:00
}
2022-11-19 17:34:56 +01:00
}
}
2022-11-19 18:42:09 +01:00
free_data(&instance->data);
instance->data = temp;
2022-11-19 17:34:56 +01:00
instance->limit = round(instance->data.capacity * instance->load_factor);
2022-11-19 20:28:13 +01:00
return 0;
2022-11-19 17:34:56 +01:00
}
/* ========================================================================= */
/* PUBLIC FUNCTIONS */
/* ========================================================================= */
2022-11-19 23:21:14 +01:00
hash_set_t *hash_set_create(const size_t initial_capacity, const double load_factor, const uint16_t options)
2022-11-19 17:34:56 +01:00
{
hash_set_t *const instance = (hash_set_t*) calloc(1U, sizeof(hash_set_t));
if (!instance)
{
return NULL;
}
if (!alloc_data(&instance->data, (initial_capacity > 0U) ? next_pow2(initial_capacity) : 1024U))
{
free(instance);
return NULL;
}
2022-11-19 22:34:33 +01:00
instance->load_factor = (load_factor > 0.0) ? BOUND(0.001953125, load_factor, 1.0) : 0.666;
2022-11-19 23:21:14 +01:00
instance->options = options;
2022-11-19 17:34:56 +01:00
instance->limit = round(instance->data.capacity * instance->load_factor);
2022-11-19 20:28:13 +01:00
2022-11-19 17:34:56 +01:00
return instance;
}
void hash_set_destroy(hash_set_t *const instance)
{
if (instance)
{
free_data(&instance->data);
2022-11-19 22:34:33 +01:00
memset(instance, 0, sizeof(hash_set_t));
free(instance);
2022-11-19 17:34:56 +01:00
}
}
2022-11-19 20:28:13 +01:00
errno_t hash_set_insert(hash_set_t *const instance, const uint64_t value)
2022-11-19 17:34:56 +01:00
{
2022-11-19 20:28:13 +01:00
size_t index;
2022-11-19 17:34:56 +01:00
if ((!instance) || (!instance->data.values))
{
2022-11-19 20:28:13 +01:00
return EINVAL;
2022-11-19 17:34:56 +01:00
}
2022-11-19 22:34:33 +01:00
if (find_slot(&instance->data, value, &index))
2022-11-19 17:34:56 +01:00
{
2022-11-19 20:28:13 +01:00
return EEXIST;
2022-11-19 17:34:56 +01:00
}
if ((instance->size >= instance->limit) || (instance->size >= instance->data.capacity))
{
if (instance->data.capacity == SIZE_MAX)
{
2022-11-19 23:21:14 +01:00
if ((instance->options & HASHSET_OPT_FAILFAST) || (instance->size >= instance->data.capacity))
2022-11-19 17:34:56 +01:00
{
2022-11-19 20:28:13 +01:00
return ENOMEM; /*malloc has failed!*/
2022-11-19 17:34:56 +01:00
}
}
else
{
2022-11-19 20:28:13 +01:00
const errno_t error = grow_set(instance, safe_mult2(instance->data.capacity));
if (error)
2022-11-19 17:34:56 +01:00
{
2022-11-19 22:34:33 +01:00
instance->limit = instance->data.capacity;
2022-11-19 20:28:13 +01:00
if (error == ENOMEM)
2022-11-19 17:34:56 +01:00
{
2022-11-19 23:21:14 +01:00
if ((instance->options & HASHSET_OPT_FAILFAST) || (instance->size >= instance->data.capacity))
2022-11-19 20:28:13 +01:00
{
return ENOMEM; /*malloc has failed!*/
}
}
else
{
return error;
2022-11-19 17:34:56 +01:00
}
}
else
{
2022-11-19 22:34:33 +01:00
if (find_slot(&instance->data, value, &index))
2022-11-19 17:34:56 +01:00
{
2022-11-19 20:28:13 +01:00
return EFAULT;
2022-11-19 17:34:56 +01:00
}
}
}
}
2022-11-19 18:42:09 +01:00
if (!insert_value(&instance->data, index, value))
{
2022-11-19 20:28:13 +01:00
return EFAULT;
2022-11-19 18:42:09 +01:00
}
2022-11-19 17:34:56 +01:00
++instance->size;
2022-11-19 20:28:13 +01:00
return 0;
2022-11-19 17:34:56 +01:00
}
2022-11-19 20:28:13 +01:00
errno_t hash_set_contains(hash_set_t *const instance, const uint64_t value)
2022-11-19 17:34:56 +01:00
{
if ((!instance) || (!instance->data.values))
{
2022-11-19 20:28:13 +01:00
return EINVAL;
2022-11-19 17:34:56 +01:00
}
2022-11-19 22:34:33 +01:00
return find_slot(&instance->data, value, NULL) ? 0 : ENOENT;
2022-11-19 17:34:56 +01:00
}
2022-11-19 20:28:13 +01:00
size_t hash_set_size(hash_set_t *const instance)
2022-11-19 17:34:56 +01:00
{
return instance ? instance->size : 0U;
}