Do not use __umul128() intrinsic with MSVC, as it is not faster at all + some code clean-up.
This commit is contained in:
parent
3cdfc9d9ab
commit
d76d0f6e4b
@ -12,12 +12,6 @@
|
||||
#include <limits.h>
|
||||
#include <assert.h>
|
||||
|
||||
/* Intrinsic */
|
||||
#if defined(_MSC_VER) && defined(_M_X64)
|
||||
# include <intrin.h>
|
||||
# pragma intrinsic(_umul128)
|
||||
#endif
|
||||
|
||||
/* Compiler compatibility */
|
||||
#if defined(_MSC_VER)
|
||||
# define FORCE_INLINE __forceinline
|
||||
@ -103,17 +97,14 @@ static FORCE_INLINE uint8_t byte_u64(const uint64_t value, const size_t off)
|
||||
// 128-Bit math support
|
||||
// ==========================================================================
|
||||
|
||||
static FORCE_INLINE void mult_u128(uint128_t *const out, const uint128_t lhs, const uint128_t rhs)
|
||||
#define READ_U128(X) ((((__uint128_t)(X).hi) << 64U) | ((__uint128_t)(X).lo))
|
||||
|
||||
static FORCE_INLINE void multiply_u128(uint128_t *const out, const uint128_t lhs, const uint128_t rhs)
|
||||
{
|
||||
#if defined(__GNUC__) && defined(__SIZEOF_INT128__)
|
||||
const __uint128_t lhs_128 = ((__uint128_t)lhs.hi << 64U) | lhs.lo;
|
||||
const __uint128_t rhs_128 = ((__uint128_t)rhs.hi << 64U) | rhs.lo;
|
||||
const __uint128_t out_128 = lhs_128 * rhs_128;
|
||||
out->hi = (uint64_t)(out_128 >> 64U);
|
||||
out->lo = (uint64_t)(out_128 & 0xFFFFFFFFFFFFFFFF);
|
||||
#else
|
||||
#if defined(_MSC_VER) && defined(_M_X64)
|
||||
out->lo = _umul128(lhs.lo, rhs.lo, &out->hi);
|
||||
const __uint128_t tmp = READ_U128(lhs) * READ_U128(rhs);
|
||||
out->hi = (uint64_t)(tmp >> 64U);
|
||||
out->lo = (uint64_t)(tmp & 0xFFFFFFFFFFFFFFFF);
|
||||
#else
|
||||
const uint64_t lolo = (lhs.lo & 0xFFFFFFFF) * (rhs.lo & 0xFFFFFFFF);
|
||||
const uint64_t hilo = (lhs.lo >> 32U) * (rhs.lo & 0xFFFFFFFF);
|
||||
@ -122,7 +113,6 @@ static FORCE_INLINE void mult_u128(uint128_t *const out, const uint128_t lhs, co
|
||||
const uint64_t crss = (lolo >> 32U) + (hilo & 0xFFFFFFFF) + lohi;
|
||||
out->hi = (hilo >> 32U) + (crss >> 32) + hihi;
|
||||
out->lo = (crss << 32U) | (lolo & 0xFFFFFFFF);
|
||||
#endif
|
||||
out->hi += (lhs.hi * rhs.lo) + (lhs.lo * rhs.hi); /* 128x128=128 */
|
||||
#endif
|
||||
}
|
||||
@ -140,7 +130,7 @@ static FORCE_INLINE void hash_update_str(uint128_t *const hash, const uint8_t *c
|
||||
for (i = 0U; i < data_len; ++i)
|
||||
{
|
||||
hash->lo ^= data[i];
|
||||
mult_u128(hash, *hash, HASH_MAGIC_PRIME);
|
||||
multiply_u128(hash, *hash, HASH_MAGIC_PRIME);
|
||||
}
|
||||
}
|
||||
|
||||
@ -150,7 +140,7 @@ static FORCE_INLINE void hash_update_u64(uint128_t *const hash, const uint64_t v
|
||||
for (i = 0U; i < sizeof(uint64_t); ++i)
|
||||
{
|
||||
hash->lo ^= byte_u64(value, i);
|
||||
mult_u128(hash, *hash, HASH_MAGIC_PRIME);
|
||||
multiply_u128(hash, *hash, HASH_MAGIC_PRIME);
|
||||
}
|
||||
}
|
||||
|
||||
@ -179,9 +169,9 @@ static FORCE_INLINE uint64_t keygen_loop(uint64_t salt, const uint8_t *const pas
|
||||
|
||||
static void generate_key(uint64_t *const key, const uint64_t salt, const uint16_t pepper, const uint8_t *const passwd, const size_t passwd_len)
|
||||
{
|
||||
key[0U] = keygen_loop(0x243F6A8885A308D3 + salt + pepper, passwd, passwd_len);
|
||||
key[1U] = keygen_loop(0x13198A2E03707344 + salt + pepper, passwd, passwd_len);
|
||||
key[2U] = keygen_loop(0xA4093822299F31D0 + salt + pepper, passwd, passwd_len);
|
||||
key[0U] = keygen_loop(0x162603FA1CDA99D3 + salt + pepper, passwd, passwd_len);
|
||||
key[1U] = keygen_loop(0xBFDEC4A6C1A46E09 + salt + pepper, passwd, passwd_len);
|
||||
key[2U] = keygen_loop(0x6BA17D11624973EE + salt + pepper, passwd, passwd_len);
|
||||
}
|
||||
|
||||
// ==========================================================================
|
||||
|
Loading…
Reference in New Issue
Block a user