This commit is contained in:
Lipeng Zhu 2024-04-18 03:04:22 +00:00 committed by GitHub
commit ad1f73d5f5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 101 additions and 0 deletions

View File

@ -280,6 +280,10 @@ ifeq ($(MALLOC),jemalloc)
FINAL_LIBS := ../deps/jemalloc/lib/libjemalloc.a $(FINAL_LIBS)
endif
ifeq ($(HLL_HASH),wyhash)
FINAL_CFLAGS+= -DUSE_WYHASH
endif
# LIBSSL & LIBCRYPTO
LIBSSL_LIBS=
LIBSSL_PKGCONFIG := $(shell $(PKG_CONFIG) --exists libssl && echo $$?)

View File

@ -369,6 +369,7 @@ static char *invalid_hll_err = "-INVALIDOBJ Corrupted HLL object detected";
/* ========================= HyperLogLog algorithm ========================= */
#ifndef USE_WYHASH
/* Our hash function is MurmurHash2, 64 bit version.
* It was modified for Redis in order to provide the same result in
* big and little endian archs (endian neutral). */
@ -424,7 +425,99 @@ uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) {
h ^= h >> r;
return h;
}
#else
static inline void _wymum(uint64_t *A, uint64_t *B) {
#if defined(__SIZEOF_INT128__)
__uint128_t r = *A;
r *= *B;
*A = (uint64_t)r;
*B = (uint64_t)(r >> 64);
#else
uint64_t ha = *A >> 32, hb = *B >> 32, la = (uint32_t)*A, lb = (uint32_t)*B, hi, lo;
uint64_t rh = ha * hb, rm0 = ha * lb, rm1 = hb * la, rl = la * lb, t = rl + (rm0 << 32), c = t < rl;
lo = t + (rm1 << 32);
c += lo < t;
hi = rh + (rm0 >> 32) + (rm1 >> 32) + c;
*A = lo;
*B = hi;
#endif
}
static inline uint64_t _wymix(uint64_t A, uint64_t B) {
_wymum(&A, &B);
return A ^ B;
}
#if (BYTE_ORDER == LITTLE_ENDIAN)
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;}
static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return v;}
#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);}
static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return __builtin_bswap32(v);}
#else
static inline uint64_t _wyr8(const uint8_t *p) {
uint64_t v; memcpy(&v, p, 8);
return (((v >> 56) & 0xff)| ((v >> 40) & 0xff00)| ((v >> 24) & 0xff0000)| ((v >> 8) & 0xff000000)| ((v << 8) & 0xff00000000)| ((v << 24) & 0xff0000000000)| ((v << 40) & 0xff000000000000)| ((v << 56) & 0xff00000000000000));
}
static inline uint64_t _wyr4(const uint8_t *p) {
uint32_t v; memcpy(&v, p, 4);
return (((v >> 24) & 0xff)| ((v >> 8) & 0xff00)| ((v << 8) & 0xff0000)| ((v << 24) & 0xff000000));
}
#endif
static inline uint64_t _wyr3(const uint8_t *p, size_t k) {
return (((uint64_t)p[0]) << 16) | (((uint64_t)p[k >> 1]) << 8) | p[k - 1];
}
static const uint64_t _wyp[4] = {
0x2d358dccaa6c78a5ull,
0x8bb84b93962eacc9ull,
0x4b33a62ed433d4a3ull,
0x4d5a2da51de1aa47ull
};
// Hashing algorithm inspired by
// wyhash: https://github.com/wangyi-fudan/wyhash
uint64_t wyhash (const void *key, size_t len, uint64_t seed, const uint64_t *secret) {
const uint8_t *p = (const uint8_t *)key;
seed ^= _wymix(seed ^ secret[0], secret[1]);
uint64_t a, b;
if(len <= 16) {
if(len >= 4) {
a = (_wyr4(p) << 32) | _wyr4(p + ((len >> 3) << 2));
b = (_wyr4(p + len - 4) << 32) | _wyr4(p + len - 4 - ((len >> 3) << 2));
} else if(len > 0) {
a = _wyr3(p, len);
b = 0;
} else {
a = b = 0;
}
} else {
size_t i = len;
if(i >= 48) {
uint64_t see1 = seed, see2 = seed;
do {
seed = _wymix(_wyr8(p) ^ secret[1], _wyr8(p + 8) ^ seed);
see1 = _wymix(_wyr8(p + 16) ^ secret[2], _wyr8(p + 24) ^ see1);
see2 = _wymix(_wyr8(p + 32) ^ secret[3], _wyr8(p + 40) ^ see2);
p += 48;
i -= 48;
} while (i >= 48);
seed ^= see1 ^ see2;
}
while(i > 16) {
seed = _wymix(_wyr8(p) ^ secret[1], _wyr8(p + 8) ^ seed);
i -= 16;
p += 16;
}
a=_wyr8(p + i - 16);
b=_wyr8(p + i - 8);
}
a ^= secret[1];
b ^= seed;
_wymum(&a, &b);
return _wymix(a ^ secret[0] ^ len, b ^ secret[1]);
}
#endif
/* Given a string element to add to the HyperLogLog, returns the length
* of the pattern 000..1 of the element hash. As a side effect 'regp' is
* set to the register index this element hashes to. */
@ -443,7 +536,11 @@ int hllPatLen(unsigned char *ele, size_t elesize, long *regp) {
*
* This may sound like inefficient, but actually in the average case
* there are high probabilities to find a 1 after a few iterations. */
#ifndef USE_WYHASH
hash = MurmurHash64A(ele,elesize,0xadc83b19ULL);
#else
hash = wyhash(ele, elesize, 0xadc83b19ULL, _wyp);
#endif
index = hash & HLL_P_MASK; /* Register index. */
hash >>= HLL_P; /* Remove bits used to address the register. */
hash |= ((uint64_t)1<<HLL_Q); /* Make sure the loop terminates