Murmur哈希是一种非加密散列函数,适用于一般的基于散列的查找。它在2008年由Austin Appleby创建,在Github上托管,名为“SMHasher” 的测试套件。
与其它流行的哈希函数相比,对于规律性较强的key,MurmurHash的随机分布特征表现更良好。
Redis在实现字典时用到了两种不同的哈希算法,MurmurHash便是其中一种(另一种是djb),在Redis中应用十分广泛,包括数据库、集群、哈希键、阻塞操作等功能都用到了这个算法。
uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const uint32_t m = 0x5bd1e995;
const int r = 24;
// Initialize the hash to a 'random' value
uint32_t h = seed ^ len;
// Mix 4 bytes at a time into the hash
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
uint32_t k = *(uint32_t*)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
// Handle the last few bytes of the input array
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
import ctypes
def int_overflow(val):
# 实现溢出能力
maxint = 2147483647
if not -maxint-1 <= val <= maxint:
val = (val + (maxint + 1)) % (2 * (maxint + 1)) - maxint - 1
return val
# 逻辑右移,给出r默认值24,这里没用到
def unsigned_right_shitf(n, r = 24):
# 数字小于0,则转为32位无符号uint
if n < 0:
n = ctypes.c_uint32(n).value
# 正常位移位数是为正数,但是为了兼容js之类的,负数就右移变成左移好了
if r < 0:
return -int_overflow(n << abs(r))
return int_overflow(n >> r)
# 大数乘法,m = 0x5bd13995 = 1540483477
def int_overflow_multiplication(a, m = 1540483477):
result = a * m
result = int_overflow(result)
return result
# seed 可以改自己的
def murmurhash(origin_string, seed = 111111):
origin_bytes = origin_string.encode()
length = len(origin_bytes)
h = seed ^ length
i = 0
r = 24
const = 0xff
while (length >= 4):
k = (origin_bytes[i] & const) + ((origin_bytes[i + 1] & const) << 8) + ((origin_bytes[i + 2] & const) << 16) + ((origin_bytes[i + 3] & const) << 24)
k = int_overflow_multiplication(k)
k ^= k >> r
k = int_overflow_multiplication(k)
h = int_overflow_multiplication(h)
h ^= k
length -= 4
i += 4
if (length == 3):
h ^= (origin_bytes[i + 2] & const) << 16
h ^= (origin_bytes[i + 1] & const) << 8
h ^= (origin_bytes[i] & const)
h = int_overflow_multiplication(h)
if (length == 2):
h ^= (origin_bytes[i + 1] & const) << 8
h ^= (origin_bytes[i] & const)
h = int_overflow_multiplication(h)
if (length == 1):
h ^= (origin_bytes[i] & const)
h = int_overflow_multiplication(h)
h ^= h >> 13
h = int_overflow_multiplication(h)
h ^= h >> 15
return h
if __name__ == '__main__':
murmurhash('abc')```