cityhash.go 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. /*
  2. * Go implementation of Google city hash (MIT license)
  3. * https://code.google.com/p/cityhash/
  4. *
  5. * MIT License http://www.opensource.org/licenses/mit-license.php
  6. *
  7. * I don't even want to pretend to understand the details of city hash.
  8. * I am only reproducing the logic in Go as faithfully as I can.
  9. *
  10. */
  11. package cityhash102
  12. import (
  13. "encoding/binary"
  14. )
  15. const (
  16. k0 uint64 = 0xc3a5c85c97cb3127
  17. k1 uint64 = 0xb492b66fbe98f273
  18. k2 uint64 = 0x9ae16a3b2f90404f
  19. k3 uint64 = 0xc949d7c7509e6557
  20. kMul uint64 = 0x9ddfea08eb382d69
  21. )
  22. func fetch64(p []byte) uint64 {
  23. return binary.LittleEndian.Uint64(p)
  24. //return uint64InExpectedOrder(unalignedLoad64(p))
  25. }
  26. func fetch32(p []byte) uint32 {
  27. return binary.LittleEndian.Uint32(p)
  28. //return uint32InExpectedOrder(unalignedLoad32(p))
  29. }
  30. func rotate64(val uint64, shift uint32) uint64 {
  31. if shift != 0 {
  32. return ((val >> shift) | (val << (64 - shift)))
  33. }
  34. return val
  35. }
  36. func rotate32(val uint32, shift uint32) uint32 {
  37. if shift != 0 {
  38. return ((val >> shift) | (val << (32 - shift)))
  39. }
  40. return val
  41. }
  42. func swap64(a, b *uint64) {
  43. *a, *b = *b, *a
  44. }
  45. func swap32(a, b *uint32) {
  46. *a, *b = *b, *a
  47. }
  48. func permute3(a, b, c *uint32) {
  49. swap32(a, b)
  50. swap32(a, c)
  51. }
  52. func rotate64ByAtLeast1(val uint64, shift uint32) uint64 {
  53. return (val >> shift) | (val << (64 - shift))
  54. }
  55. func shiftMix(val uint64) uint64 {
  56. return val ^ (val >> 47)
  57. }
  58. type Uint128 [2]uint64
  59. func (this *Uint128) setLower64(l uint64) {
  60. this[0] = l
  61. }
  62. func (this *Uint128) setHigher64(h uint64) {
  63. this[1] = h
  64. }
  65. func (this Uint128) Lower64() uint64 {
  66. return this[0]
  67. }
  68. func (this Uint128) Higher64() uint64 {
  69. return this[1]
  70. }
  71. func (this Uint128) Bytes() []byte {
  72. b := make([]byte, 16)
  73. binary.LittleEndian.PutUint64(b, this[0])
  74. binary.LittleEndian.PutUint64(b[8:], this[1])
  75. return b
  76. }
  77. func hash128to64(x Uint128) uint64 {
  78. // Murmur-inspired hashing.
  79. var a = (x.Lower64() ^ x.Higher64()) * kMul
  80. a ^= (a >> 47)
  81. var b = (x.Higher64() ^ a) * kMul
  82. b ^= (b >> 47)
  83. b *= kMul
  84. return b
  85. }
  86. func hashLen16(u, v uint64) uint64 {
  87. return hash128to64(Uint128{u, v})
  88. }
  89. func hashLen16_3(u, v, mul uint64) uint64 {
  90. // Murmur-inspired hashing.
  91. var a = (u ^ v) * mul
  92. a ^= (a >> 47)
  93. var b = (v ^ a) * mul
  94. b ^= (b >> 47)
  95. b *= mul
  96. return b
  97. }
  98. func hashLen0to16(s []byte, length uint32) uint64 {
  99. if length > 8 {
  100. var a = fetch64(s)
  101. var b = fetch64(s[length-8:])
  102. return hashLen16(a, rotate64ByAtLeast1(b+uint64(length), length)) ^ b
  103. }
  104. if length >= 4 {
  105. var a = fetch32(s)
  106. return hashLen16(uint64(length)+(uint64(a)<<3), uint64(fetch32(s[length-4:])))
  107. }
  108. if length > 0 {
  109. var a uint8 = uint8(s[0])
  110. var b uint8 = uint8(s[length>>1])
  111. var c uint8 = uint8(s[length-1])
  112. var y uint32 = uint32(a) + (uint32(b) << 8)
  113. var z uint32 = length + (uint32(c) << 2)
  114. return shiftMix(uint64(y)*k2^uint64(z)*k3) * k2
  115. }
  116. return k2
  117. }
  118. // This probably works well for 16-byte strings as well, but it may be overkill
  119. func hashLen17to32(s []byte, length uint32) uint64 {
  120. var a = fetch64(s) * k1
  121. var b = fetch64(s[8:])
  122. var c = fetch64(s[length-8:]) * k2
  123. var d = fetch64(s[length-16:]) * k0
  124. return hashLen16(rotate64(a-b, 43)+rotate64(c, 30)+d,
  125. a+rotate64(b^k3, 20)-c+uint64(length))
  126. }
  127. func weakHashLen32WithSeeds(w, x, y, z, a, b uint64) Uint128 {
  128. a += w
  129. b = rotate64(b+a+z, 21)
  130. var c uint64 = a
  131. a += x
  132. a += y
  133. b += rotate64(a, 44)
  134. return Uint128{a + z, b + c}
  135. }
  136. func weakHashLen32WithSeeds_3(s []byte, a, b uint64) Uint128 {
  137. return weakHashLen32WithSeeds(fetch64(s), fetch64(s[8:]), fetch64(s[16:]), fetch64(s[24:]), a, b)
  138. }
  139. func hashLen33to64(s []byte, length uint32) uint64 {
  140. var z uint64 = fetch64(s[24:])
  141. var a uint64 = fetch64(s) + (uint64(length)+fetch64(s[length-16:]))*k0
  142. var b uint64 = rotate64(a+z, 52)
  143. var c uint64 = rotate64(a, 37)
  144. a += fetch64(s[8:])
  145. c += rotate64(a, 7)
  146. a += fetch64(s[16:])
  147. var vf uint64 = a + z
  148. var vs = b + rotate64(a, 31) + c
  149. a = fetch64(s[16:]) + fetch64(s[length-32:])
  150. z = fetch64(s[length-8:])
  151. b = rotate64(a+z, 52)
  152. c = rotate64(a, 37)
  153. a += fetch64(s[length-24:])
  154. c += rotate64(a, 7)
  155. a += fetch64(s[length-16:])
  156. wf := a + z
  157. ws := b + rotate64(a, 31) + c
  158. r := shiftMix((vf+ws)*k2 + (wf+vs)*k0)
  159. return shiftMix(r*k0+vs) * k2
  160. }
  161. func CityHash64(s []byte, length uint32) uint64 {
  162. if length <= 32 {
  163. if length <= 16 {
  164. return hashLen0to16(s, length)
  165. } else {
  166. return hashLen17to32(s, length)
  167. }
  168. } else if length <= 64 {
  169. return hashLen33to64(s, length)
  170. }
  171. var x uint64 = fetch64(s)
  172. var y uint64 = fetch64(s[length-16:]) ^ k1
  173. var z uint64 = fetch64(s[length-56:]) ^ k0
  174. var v Uint128 = weakHashLen32WithSeeds_3(s[length-64:], uint64(length), y)
  175. var w Uint128 = weakHashLen32WithSeeds_3(s[length-32:], uint64(length)*k1, k0)
  176. z += shiftMix(v.Higher64()) * k1
  177. x = rotate64(z+x, 39) * k1
  178. y = rotate64(y, 33) * k1
  179. length = (length - 1) & ^uint32(63)
  180. for {
  181. x = rotate64(x+y+v.Lower64()+fetch64(s[16:]), 37) * k1
  182. y = rotate64(y+v.Higher64()+fetch64(s[48:]), 42) * k1
  183. x ^= w.Higher64()
  184. y ^= v.Lower64()
  185. z = rotate64(z^w.Lower64(), 33)
  186. v = weakHashLen32WithSeeds_3(s, v.Higher64()*k1, x+w.Lower64())
  187. w = weakHashLen32WithSeeds_3(s[32:], z+w.Higher64(), y)
  188. swap64(&z, &x)
  189. s = s[64:]
  190. length -= 64
  191. if length == 0 {
  192. break
  193. }
  194. }
  195. return hashLen16(hashLen16(v.Lower64(), w.Lower64())+shiftMix(y)*k1+z, hashLen16(v.Higher64(), w.Higher64())+x)
  196. }
  197. func CityHash64WithSeed(s []byte, length uint32, seed uint64) uint64 {
  198. return CityHash64WithSeeds(s, length, k2, seed)
  199. }
  200. func CityHash64WithSeeds(s []byte, length uint32, seed0, seed1 uint64) uint64 {
  201. return hashLen16(CityHash64(s, length)-seed0, seed1)
  202. }
  203. func cityMurmur(s []byte, length uint32, seed Uint128) Uint128 {
  204. var a uint64 = seed.Lower64()
  205. var b uint64 = seed.Higher64()
  206. var c uint64 = 0
  207. var d uint64 = 0
  208. var l int32 = int32(length) - 16
  209. if l <= 0 { // len <= 16
  210. a = shiftMix(a*k1) * k1
  211. c = b*k1 + hashLen0to16(s, length)
  212. if length >= 8 {
  213. d = shiftMix(a + fetch64(s))
  214. } else {
  215. d = shiftMix(a + c)
  216. }
  217. } else { // len > 16
  218. c = hashLen16(fetch64(s[length-8:])+k1, a)
  219. d = hashLen16(b+uint64(length), c+fetch64(s[length-16:]))
  220. a += d
  221. for {
  222. a ^= shiftMix(fetch64(s)*k1) * k1
  223. a *= k1
  224. b ^= a
  225. c ^= shiftMix(fetch64(s[8:])*k1) * k1
  226. c *= k1
  227. d ^= c
  228. s = s[16:]
  229. l -= 16
  230. if l <= 0 {
  231. break
  232. }
  233. }
  234. }
  235. a = hashLen16(a, c)
  236. b = hashLen16(d, b)
  237. return Uint128{a ^ b, hashLen16(b, a)}
  238. }
  239. func CityHash128WithSeed(s []byte, length uint32, seed Uint128) Uint128 {
  240. if length < 128 {
  241. return cityMurmur(s, length, seed)
  242. }
  243. // We expect length >= 128 to be the common case. Keep 56 bytes of state:
  244. // v, w, x, y, and z.
  245. var v, w Uint128
  246. var x uint64 = seed.Lower64()
  247. var y uint64 = seed.Higher64()
  248. var z uint64 = uint64(length) * k1
  249. var pos uint32
  250. var t = s
  251. v.setLower64(rotate64(y^k1, 49)*k1 + fetch64(s))
  252. v.setHigher64(rotate64(v.Lower64(), 42)*k1 + fetch64(s[8:]))
  253. w.setLower64(rotate64(y+z, 35)*k1 + x)
  254. w.setHigher64(rotate64(x+fetch64(s[88:]), 53) * k1)
  255. // This is the same inner loop as CityHash64(), manually unrolled.
  256. for {
  257. x = rotate64(x+y+v.Lower64()+fetch64(s[16:]), 37) * k1
  258. y = rotate64(y+v.Higher64()+fetch64(s[48:]), 42) * k1
  259. x ^= w.Higher64()
  260. y ^= v.Lower64()
  261. z = rotate64(z^w.Lower64(), 33)
  262. v = weakHashLen32WithSeeds_3(s, v.Higher64()*k1, x+w.Lower64())
  263. w = weakHashLen32WithSeeds_3(s[32:], z+w.Higher64(), y)
  264. swap64(&z, &x)
  265. s = s[64:]
  266. pos += 64
  267. x = rotate64(x+y+v.Lower64()+fetch64(s[16:]), 37) * k1
  268. y = rotate64(y+v.Higher64()+fetch64(s[48:]), 42) * k1
  269. x ^= w.Higher64()
  270. y ^= v.Lower64()
  271. z = rotate64(z^w.Lower64(), 33)
  272. v = weakHashLen32WithSeeds_3(s, v.Higher64()*k1, x+w.Lower64())
  273. w = weakHashLen32WithSeeds_3(s[32:], z+w.Higher64(), y)
  274. swap64(&z, &x)
  275. s = s[64:]
  276. pos += 64
  277. length -= 128
  278. if length < 128 {
  279. break
  280. }
  281. }
  282. y += rotate64(w.Lower64(), 37)*k0 + z
  283. x += rotate64(v.Lower64()+z, 49) * k0
  284. // If 0 < length < 128, hash up to 4 chunks of 32 bytes each from the end of s.
  285. var tailDone uint32
  286. for tailDone = 0; tailDone < length; {
  287. tailDone += 32
  288. y = rotate64(y-x, 42)*k0 + v.Higher64()
  289. //TODO why not use origin_len ?
  290. w.setLower64(w.Lower64() + fetch64(t[pos+length-tailDone+16:]))
  291. x = rotate64(x, 49)*k0 + w.Lower64()
  292. w.setLower64(w.Lower64() + v.Lower64())
  293. v = weakHashLen32WithSeeds_3(t[pos+length-tailDone:], v.Lower64(), v.Higher64())
  294. }
  295. // At this point our 48 bytes of state should contain more than
  296. // enough information for a strong 128-bit hash. We use two
  297. // different 48-byte-to-8-byte hashes to get a 16-byte final result.
  298. x = hashLen16(x, v.Lower64())
  299. y = hashLen16(y, w.Lower64())
  300. return Uint128{hashLen16(x+v.Higher64(), w.Higher64()) + y,
  301. hashLen16(x+w.Higher64(), y+v.Higher64())}
  302. }
  303. func CityHash128(s []byte, length uint32) (result Uint128) {
  304. if length >= 16 {
  305. result = CityHash128WithSeed(s[16:length], length-16, Uint128{fetch64(s) ^ k3, fetch64(s[8:])})
  306. } else if length >= 8 {
  307. result = CityHash128WithSeed(nil, 0, Uint128{fetch64(s) ^ (uint64(length) * k0), fetch64(s[length-8:]) ^ k1})
  308. } else {
  309. result = CityHash128WithSeed(s, length, Uint128{k0, k1})
  310. }
  311. return
  312. }