384 lines
9.0 KiB
Go
384 lines
9.0 KiB
Go
/*
|
|
* Go implementation of Google city hash (MIT license)
|
|
* https://code.google.com/p/cityhash/
|
|
*
|
|
* MIT License http://www.opensource.org/licenses/mit-license.php
|
|
*
|
|
* I don't even want to pretend to understand the details of city hash.
|
|
* I am only reproducing the logic in Go as faithfully as I can.
|
|
*
|
|
*/
|
|
|
|
package cityhash102
|
|
|
|
import (
|
|
"encoding/binary"
|
|
)
|
|
|
|
const (
|
|
k0 uint64 = 0xc3a5c85c97cb3127
|
|
k1 uint64 = 0xb492b66fbe98f273
|
|
k2 uint64 = 0x9ae16a3b2f90404f
|
|
k3 uint64 = 0xc949d7c7509e6557
|
|
|
|
kMul uint64 = 0x9ddfea08eb382d69
|
|
)
|
|
|
|
func fetch64(p []byte) uint64 {
|
|
return binary.LittleEndian.Uint64(p)
|
|
//return uint64InExpectedOrder(unalignedLoad64(p))
|
|
}
|
|
|
|
func fetch32(p []byte) uint32 {
|
|
return binary.LittleEndian.Uint32(p)
|
|
//return uint32InExpectedOrder(unalignedLoad32(p))
|
|
}
|
|
|
|
func rotate64(val uint64, shift uint32) uint64 {
|
|
if shift != 0 {
|
|
return ((val >> shift) | (val << (64 - shift)))
|
|
}
|
|
|
|
return val
|
|
}
|
|
|
|
func rotate32(val uint32, shift uint32) uint32 {
|
|
if shift != 0 {
|
|
return ((val >> shift) | (val << (32 - shift)))
|
|
}
|
|
|
|
return val
|
|
}
|
|
|
|
func swap64(a, b *uint64) {
|
|
*a, *b = *b, *a
|
|
}
|
|
|
|
func swap32(a, b *uint32) {
|
|
*a, *b = *b, *a
|
|
}
|
|
|
|
func permute3(a, b, c *uint32) {
|
|
swap32(a, b)
|
|
swap32(a, c)
|
|
}
|
|
|
|
func rotate64ByAtLeast1(val uint64, shift uint32) uint64 {
|
|
return (val >> shift) | (val << (64 - shift))
|
|
}
|
|
|
|
func shiftMix(val uint64) uint64 {
|
|
return val ^ (val >> 47)
|
|
}
|
|
|
|
type Uint128 [2]uint64
|
|
|
|
func (this *Uint128) setLower64(l uint64) {
|
|
this[0] = l
|
|
}
|
|
|
|
func (this *Uint128) setHigher64(h uint64) {
|
|
this[1] = h
|
|
}
|
|
|
|
func (this Uint128) Lower64() uint64 {
|
|
return this[0]
|
|
}
|
|
|
|
func (this Uint128) Higher64() uint64 {
|
|
return this[1]
|
|
}
|
|
|
|
func (this Uint128) Bytes() []byte {
|
|
b := make([]byte, 16)
|
|
binary.LittleEndian.PutUint64(b, this[0])
|
|
binary.LittleEndian.PutUint64(b[8:], this[1])
|
|
return b
|
|
}
|
|
|
|
func hash128to64(x Uint128) uint64 {
|
|
// Murmur-inspired hashing.
|
|
var a = (x.Lower64() ^ x.Higher64()) * kMul
|
|
a ^= (a >> 47)
|
|
var b = (x.Higher64() ^ a) * kMul
|
|
b ^= (b >> 47)
|
|
b *= kMul
|
|
return b
|
|
}
|
|
|
|
func hashLen16(u, v uint64) uint64 {
|
|
return hash128to64(Uint128{u, v})
|
|
}
|
|
|
|
func hashLen16_3(u, v, mul uint64) uint64 {
|
|
// Murmur-inspired hashing.
|
|
var a = (u ^ v) * mul
|
|
a ^= (a >> 47)
|
|
var b = (v ^ a) * mul
|
|
b ^= (b >> 47)
|
|
b *= mul
|
|
return b
|
|
}
|
|
|
|
func hashLen0to16(s []byte, length uint32) uint64 {
|
|
if length > 8 {
|
|
var a = fetch64(s)
|
|
var b = fetch64(s[length-8:])
|
|
|
|
return hashLen16(a, rotate64ByAtLeast1(b+uint64(length), length)) ^ b
|
|
}
|
|
|
|
if length >= 4 {
|
|
var a = fetch32(s)
|
|
return hashLen16(uint64(length)+(uint64(a)<<3), uint64(fetch32(s[length-4:])))
|
|
}
|
|
|
|
if length > 0 {
|
|
var a uint8 = uint8(s[0])
|
|
var b uint8 = uint8(s[length>>1])
|
|
var c uint8 = uint8(s[length-1])
|
|
|
|
var y uint32 = uint32(a) + (uint32(b) << 8)
|
|
var z uint32 = length + (uint32(c) << 2)
|
|
|
|
return shiftMix(uint64(y)*k2^uint64(z)*k3) * k2
|
|
}
|
|
|
|
return k2
|
|
}
|
|
|
|
// This probably works well for 16-byte strings as well, but it may be overkill
|
|
func hashLen17to32(s []byte, length uint32) uint64 {
|
|
var a = fetch64(s) * k1
|
|
var b = fetch64(s[8:])
|
|
var c = fetch64(s[length-8:]) * k2
|
|
var d = fetch64(s[length-16:]) * k0
|
|
|
|
return hashLen16(rotate64(a-b, 43)+rotate64(c, 30)+d,
|
|
a+rotate64(b^k3, 20)-c+uint64(length))
|
|
}
|
|
|
|
func weakHashLen32WithSeeds(w, x, y, z, a, b uint64) Uint128 {
|
|
a += w
|
|
b = rotate64(b+a+z, 21)
|
|
var c uint64 = a
|
|
a += x
|
|
a += y
|
|
b += rotate64(a, 44)
|
|
return Uint128{a + z, b + c}
|
|
}
|
|
|
|
func weakHashLen32WithSeeds_3(s []byte, a, b uint64) Uint128 {
|
|
return weakHashLen32WithSeeds(fetch64(s), fetch64(s[8:]), fetch64(s[16:]), fetch64(s[24:]), a, b)
|
|
}
|
|
|
|
func hashLen33to64(s []byte, length uint32) uint64 {
|
|
var z uint64 = fetch64(s[24:])
|
|
var a uint64 = fetch64(s) + (uint64(length)+fetch64(s[length-16:]))*k0
|
|
var b uint64 = rotate64(a+z, 52)
|
|
var c uint64 = rotate64(a, 37)
|
|
|
|
a += fetch64(s[8:])
|
|
c += rotate64(a, 7)
|
|
a += fetch64(s[16:])
|
|
|
|
var vf uint64 = a + z
|
|
var vs = b + rotate64(a, 31) + c
|
|
|
|
a = fetch64(s[16:]) + fetch64(s[length-32:])
|
|
z = fetch64(s[length-8:])
|
|
b = rotate64(a+z, 52)
|
|
c = rotate64(a, 37)
|
|
a += fetch64(s[length-24:])
|
|
c += rotate64(a, 7)
|
|
a += fetch64(s[length-16:])
|
|
|
|
wf := a + z
|
|
ws := b + rotate64(a, 31) + c
|
|
r := shiftMix((vf+ws)*k2 + (wf+vs)*k0)
|
|
return shiftMix(r*k0+vs) * k2
|
|
}
|
|
|
|
func CityHash64(s []byte, length uint32) uint64 {
|
|
if length <= 32 {
|
|
if length <= 16 {
|
|
return hashLen0to16(s, length)
|
|
} else {
|
|
return hashLen17to32(s, length)
|
|
}
|
|
} else if length <= 64 {
|
|
return hashLen33to64(s, length)
|
|
}
|
|
|
|
var x uint64 = fetch64(s)
|
|
var y uint64 = fetch64(s[length-16:]) ^ k1
|
|
var z uint64 = fetch64(s[length-56:]) ^ k0
|
|
|
|
var v Uint128 = weakHashLen32WithSeeds_3(s[length-64:], uint64(length), y)
|
|
var w Uint128 = weakHashLen32WithSeeds_3(s[length-32:], uint64(length)*k1, k0)
|
|
|
|
z += shiftMix(v.Higher64()) * k1
|
|
x = rotate64(z+x, 39) * k1
|
|
y = rotate64(y, 33) * k1
|
|
|
|
length = (length - 1) & ^uint32(63)
|
|
for {
|
|
x = rotate64(x+y+v.Lower64()+fetch64(s[16:]), 37) * k1
|
|
y = rotate64(y+v.Higher64()+fetch64(s[48:]), 42) * k1
|
|
|
|
x ^= w.Higher64()
|
|
y ^= v.Lower64()
|
|
|
|
z = rotate64(z^w.Lower64(), 33)
|
|
v = weakHashLen32WithSeeds_3(s, v.Higher64()*k1, x+w.Lower64())
|
|
w = weakHashLen32WithSeeds_3(s[32:], z+w.Higher64(), y)
|
|
|
|
swap64(&z, &x)
|
|
s = s[64:]
|
|
length -= 64
|
|
|
|
if length == 0 {
|
|
break
|
|
}
|
|
}
|
|
|
|
return hashLen16(hashLen16(v.Lower64(), w.Lower64())+shiftMix(y)*k1+z, hashLen16(v.Higher64(), w.Higher64())+x)
|
|
}
|
|
|
|
func CityHash64WithSeed(s []byte, length uint32, seed uint64) uint64 {
|
|
return CityHash64WithSeeds(s, length, k2, seed)
|
|
}
|
|
|
|
func CityHash64WithSeeds(s []byte, length uint32, seed0, seed1 uint64) uint64 {
|
|
return hashLen16(CityHash64(s, length)-seed0, seed1)
|
|
}
|
|
|
|
func cityMurmur(s []byte, length uint32, seed Uint128) Uint128 {
|
|
var a uint64 = seed.Lower64()
|
|
var b uint64 = seed.Higher64()
|
|
var c uint64 = 0
|
|
var d uint64 = 0
|
|
var l int32 = int32(length) - 16
|
|
|
|
if l <= 0 { // len <= 16
|
|
a = shiftMix(a*k1) * k1
|
|
c = b*k1 + hashLen0to16(s, length)
|
|
|
|
if length >= 8 {
|
|
d = shiftMix(a + fetch64(s))
|
|
} else {
|
|
d = shiftMix(a + c)
|
|
}
|
|
|
|
} else { // len > 16
|
|
c = hashLen16(fetch64(s[length-8:])+k1, a)
|
|
d = hashLen16(b+uint64(length), c+fetch64(s[length-16:]))
|
|
a += d
|
|
|
|
for {
|
|
a ^= shiftMix(fetch64(s)*k1) * k1
|
|
a *= k1
|
|
b ^= a
|
|
c ^= shiftMix(fetch64(s[8:])*k1) * k1
|
|
c *= k1
|
|
d ^= c
|
|
s = s[16:]
|
|
l -= 16
|
|
|
|
if l <= 0 {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
a = hashLen16(a, c)
|
|
b = hashLen16(d, b)
|
|
return Uint128{a ^ b, hashLen16(b, a)}
|
|
}
|
|
|
|
func CityHash128WithSeed(s []byte, length uint32, seed Uint128) Uint128 {
|
|
if length < 128 {
|
|
return cityMurmur(s, length, seed)
|
|
}
|
|
|
|
// We expect length >= 128 to be the common case. Keep 56 bytes of state:
|
|
// v, w, x, y, and z.
|
|
var v, w Uint128
|
|
var x uint64 = seed.Lower64()
|
|
var y uint64 = seed.Higher64()
|
|
var z uint64 = uint64(length) * k1
|
|
|
|
var pos uint32
|
|
var t = s
|
|
|
|
v.setLower64(rotate64(y^k1, 49)*k1 + fetch64(s))
|
|
v.setHigher64(rotate64(v.Lower64(), 42)*k1 + fetch64(s[8:]))
|
|
w.setLower64(rotate64(y+z, 35)*k1 + x)
|
|
w.setHigher64(rotate64(x+fetch64(s[88:]), 53) * k1)
|
|
|
|
// This is the same inner loop as CityHash64(), manually unrolled.
|
|
for {
|
|
x = rotate64(x+y+v.Lower64()+fetch64(s[16:]), 37) * k1
|
|
y = rotate64(y+v.Higher64()+fetch64(s[48:]), 42) * k1
|
|
|
|
x ^= w.Higher64()
|
|
y ^= v.Lower64()
|
|
z = rotate64(z^w.Lower64(), 33)
|
|
v = weakHashLen32WithSeeds_3(s, v.Higher64()*k1, x+w.Lower64())
|
|
w = weakHashLen32WithSeeds_3(s[32:], z+w.Higher64(), y)
|
|
swap64(&z, &x)
|
|
s = s[64:]
|
|
pos += 64
|
|
|
|
x = rotate64(x+y+v.Lower64()+fetch64(s[16:]), 37) * k1
|
|
y = rotate64(y+v.Higher64()+fetch64(s[48:]), 42) * k1
|
|
x ^= w.Higher64()
|
|
y ^= v.Lower64()
|
|
z = rotate64(z^w.Lower64(), 33)
|
|
v = weakHashLen32WithSeeds_3(s, v.Higher64()*k1, x+w.Lower64())
|
|
w = weakHashLen32WithSeeds_3(s[32:], z+w.Higher64(), y)
|
|
swap64(&z, &x)
|
|
s = s[64:]
|
|
pos += 64
|
|
length -= 128
|
|
|
|
if length < 128 {
|
|
break
|
|
}
|
|
}
|
|
|
|
y += rotate64(w.Lower64(), 37)*k0 + z
|
|
x += rotate64(v.Lower64()+z, 49) * k0
|
|
|
|
// If 0 < length < 128, hash up to 4 chunks of 32 bytes each from the end of s.
|
|
var tailDone uint32
|
|
for tailDone = 0; tailDone < length; {
|
|
tailDone += 32
|
|
y = rotate64(y-x, 42)*k0 + v.Higher64()
|
|
|
|
//TODO why not use origin_len ?
|
|
w.setLower64(w.Lower64() + fetch64(t[pos+length-tailDone+16:]))
|
|
x = rotate64(x, 49)*k0 + w.Lower64()
|
|
w.setLower64(w.Lower64() + v.Lower64())
|
|
v = weakHashLen32WithSeeds_3(t[pos+length-tailDone:], v.Lower64(), v.Higher64())
|
|
}
|
|
// At this point our 48 bytes of state should contain more than
|
|
// enough information for a strong 128-bit hash. We use two
|
|
// different 48-byte-to-8-byte hashes to get a 16-byte final result.
|
|
x = hashLen16(x, v.Lower64())
|
|
y = hashLen16(y, w.Lower64())
|
|
|
|
return Uint128{hashLen16(x+v.Higher64(), w.Higher64()) + y,
|
|
hashLen16(x+w.Higher64(), y+v.Higher64())}
|
|
}
|
|
|
|
func CityHash128(s []byte, length uint32) (result Uint128) {
|
|
if length >= 16 {
|
|
result = CityHash128WithSeed(s[16:length], length-16, Uint128{fetch64(s) ^ k3, fetch64(s[8:])})
|
|
} else if length >= 8 {
|
|
result = CityHash128WithSeed(nil, 0, Uint128{fetch64(s) ^ (uint64(length) * k0), fetch64(s[length-8:]) ^ k1})
|
|
} else {
|
|
result = CityHash128WithSeed(s, length, Uint128{k0, k1})
|
|
}
|
|
return
|
|
}
|