From d76d0f6e4bc2dc034df979a54f3d794e6174dee1 Mon Sep 17 00:00:00 2001 From: LoRd_MuldeR Date: Fri, 9 Apr 2021 01:00:29 +0200 Subject: [PATCH] Do not use __umul128() intrinsic with MSVC, as it is not faster at all + some code clean-up. --- libslunkcrypt/src/slunkcrypt.c | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/libslunkcrypt/src/slunkcrypt.c b/libslunkcrypt/src/slunkcrypt.c index 50b0910..1e7bbc3 100644 --- a/libslunkcrypt/src/slunkcrypt.c +++ b/libslunkcrypt/src/slunkcrypt.c @@ -12,12 +12,6 @@ #include #include -/* Intrinsic */ -#if defined(_MSC_VER) && defined(_M_X64) -# include -# pragma intrinsic(_umul128) -#endif - /* Compiler compatibility */ #if defined(_MSC_VER) # define FORCE_INLINE __forceinline @@ -103,17 +97,14 @@ static FORCE_INLINE uint8_t byte_u64(const uint64_t value, const size_t off) // 128-Bit math support // ========================================================================== -static FORCE_INLINE void mult_u128(uint128_t *const out, const uint128_t lhs, const uint128_t rhs) +#define READ_U128(X) ((((__uint128_t)(X).hi) << 64U) | ((__uint128_t)(X).lo)) + +static FORCE_INLINE void multiply_u128(uint128_t *const out, const uint128_t lhs, const uint128_t rhs) { #if defined(__GNUC__) && defined(__SIZEOF_INT128__) - const __uint128_t lhs_128 = ((__uint128_t)lhs.hi << 64U) | lhs.lo; - const __uint128_t rhs_128 = ((__uint128_t)rhs.hi << 64U) | rhs.lo; - const __uint128_t out_128 = lhs_128 * rhs_128; - out->hi = (uint64_t)(out_128 >> 64U); - out->lo = (uint64_t)(out_128 & 0xFFFFFFFFFFFFFFFF); -#else -#if defined(_MSC_VER) && defined(_M_X64) - out->lo = _umul128(lhs.lo, rhs.lo, &out->hi); + const __uint128_t tmp = READ_U128(lhs) * READ_U128(rhs); + out->hi = (uint64_t)(tmp >> 64U); + out->lo = (uint64_t)(tmp & 0xFFFFFFFFFFFFFFFF); #else const uint64_t lolo = (lhs.lo & 0xFFFFFFFF) * (rhs.lo & 0xFFFFFFFF); const uint64_t hilo = (lhs.lo >> 32U) * (rhs.lo & 0xFFFFFFFF); @@ -122,7 +113,6 @@ static FORCE_INLINE void mult_u128(uint128_t *const out, const uint128_t lhs, co const uint64_t crss = (lolo >> 32U) + (hilo & 0xFFFFFFFF) + lohi; out->hi = (hilo >> 32U) + (crss >> 32) + hihi; out->lo = (crss << 32U) | (lolo & 0xFFFFFFFF); -#endif out->hi += (lhs.hi * rhs.lo) + (lhs.lo * rhs.hi); /* 128x128=128 */ #endif } @@ -140,7 +130,7 @@ static FORCE_INLINE void hash_update_str(uint128_t *const hash, const uint8_t *c for (i = 0U; i < data_len; ++i) { hash->lo ^= data[i]; - mult_u128(hash, *hash, HASH_MAGIC_PRIME); + multiply_u128(hash, *hash, HASH_MAGIC_PRIME); } } @@ -150,7 +140,7 @@ static FORCE_INLINE void hash_update_u64(uint128_t *const hash, const uint64_t v for (i = 0U; i < sizeof(uint64_t); ++i) { hash->lo ^= byte_u64(value, i); - mult_u128(hash, *hash, HASH_MAGIC_PRIME); + multiply_u128(hash, *hash, HASH_MAGIC_PRIME); } } @@ -179,9 +169,9 @@ static FORCE_INLINE uint64_t keygen_loop(uint64_t salt, const uint8_t *const pas static void generate_key(uint64_t *const key, const uint64_t salt, const uint16_t pepper, const uint8_t *const passwd, const size_t passwd_len) { - key[0U] = keygen_loop(0x243F6A8885A308D3 + salt + pepper, passwd, passwd_len); - key[1U] = keygen_loop(0x13198A2E03707344 + salt + pepper, passwd, passwd_len); - key[2U] = keygen_loop(0xA4093822299F31D0 + salt + pepper, passwd, passwd_len); + key[0U] = keygen_loop(0x162603FA1CDA99D3 + salt + pepper, passwd, passwd_len); + key[1U] = keygen_loop(0xBFDEC4A6C1A46E09 + salt + pepper, passwd, passwd_len); + key[2U] = keygen_loop(0x6BA17D11624973EE + salt + pepper, passwd, passwd_len); } // ==========================================================================