hash128.go 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. package xxh3
  2. import (
  3. "math/bits"
  4. )
  5. // Hash128 returns the 128-bit hash of the byte slice.
  6. func Hash128(b []byte) Uint128 {
  7. return hashAny128(*(*str)(ptr(&b)))
  8. }
  9. // HashString128 returns the 128-bit hash of the string slice.
  10. func HashString128(s string) Uint128 {
  11. return hashAny128(*(*str)(ptr(&s)))
  12. }
  13. func hashAny128(s str) (acc u128) {
  14. p, l := s.p, s.l
  15. switch {
  16. case l <= 16:
  17. switch {
  18. case l > 8: // 9-16
  19. const bitflipl = key64_032 ^ key64_040
  20. const bitfliph = key64_048 ^ key64_056
  21. input_lo := readU64(p, 0)
  22. input_hi := readU64(p, ui(l)-8)
  23. m128_h, m128_l := bits.Mul64(input_lo^input_hi^bitflipl, prime64_1)
  24. m128_l += uint64(l-1) << 54
  25. input_hi ^= bitfliph
  26. m128_h += input_hi + uint64(uint32(input_hi))*(prime32_2-1)
  27. m128_l ^= bits.ReverseBytes64(m128_h)
  28. acc.Hi, acc.Lo = bits.Mul64(m128_l, prime64_2)
  29. acc.Hi += m128_h * prime64_2
  30. acc.Lo = xxh3Avalanche(acc.Lo)
  31. acc.Hi = xxh3Avalanche(acc.Hi)
  32. return acc
  33. case l > 3: // 4-8
  34. const bitflip = key64_016 ^ key64_024
  35. input_lo := readU32(p, 0)
  36. input_hi := readU32(p, ui(l)-4)
  37. input_64 := u64(input_lo) + u64(input_hi)<<32
  38. keyed := input_64 ^ bitflip
  39. acc.Hi, acc.Lo = bits.Mul64(keyed, prime64_1+(uint64(l)<<2))
  40. acc.Hi += acc.Lo << 1
  41. acc.Lo ^= acc.Hi >> 3
  42. acc.Lo ^= acc.Lo >> 35
  43. acc.Lo *= 0x9fb21c651e98df25
  44. acc.Lo ^= acc.Lo >> 28
  45. acc.Hi = xxh3Avalanche(acc.Hi)
  46. return acc
  47. case l == 3: // 3
  48. c12 := u64(readU16(p, 0))
  49. c3 := u64(readU8(p, 2))
  50. acc.Lo = c12<<16 + c3 + 3<<8
  51. case l > 1: // 2
  52. c12 := u64(readU16(p, 0))
  53. acc.Lo = c12*(1<<24+1)>>8 + 2<<8
  54. case l == 1: // 1
  55. c1 := u64(readU8(p, 0))
  56. acc.Lo = c1*(1<<24+1<<16+1) + 1<<8
  57. default: // 0
  58. return u128{0x99aa06d3014798d8, 0x6001c324468d497f}
  59. }
  60. acc.Hi = uint64(bits.RotateLeft32(bits.ReverseBytes32(uint32(acc.Lo)), 13))
  61. acc.Lo ^= uint64(key32_000 ^ key32_004)
  62. acc.Hi ^= uint64(key32_008 ^ key32_012)
  63. acc.Lo = xxh64AvalancheSmall(acc.Lo)
  64. acc.Hi = xxh64AvalancheSmall(acc.Hi)
  65. return acc
  66. case l <= 128:
  67. acc.Lo = u64(l) * prime64_1
  68. if l > 32 {
  69. if l > 64 {
  70. if l > 96 {
  71. in8, in7 := readU64(p, ui(l)-8*8), readU64(p, ui(l)-7*8)
  72. i6, i7 := readU64(p, 6*8), readU64(p, 7*8)
  73. acc.Hi += mulFold64(in8^key64_112, in7^key64_120)
  74. acc.Hi ^= i6 + i7
  75. acc.Lo += mulFold64(i6^key64_096, i7^key64_104)
  76. acc.Lo ^= in8 + in7
  77. } // 96
  78. in6, in5 := readU64(p, ui(l)-6*8), readU64(p, ui(l)-5*8)
  79. i4, i5 := readU64(p, 4*8), readU64(p, 5*8)
  80. acc.Hi += mulFold64(in6^key64_080, in5^key64_088)
  81. acc.Hi ^= i4 + i5
  82. acc.Lo += mulFold64(i4^key64_064, i5^key64_072)
  83. acc.Lo ^= in6 + in5
  84. } // 64
  85. in4, in3 := readU64(p, ui(l)-4*8), readU64(p, ui(l)-3*8)
  86. i2, i3 := readU64(p, 2*8), readU64(p, 3*8)
  87. acc.Hi += mulFold64(in4^key64_048, in3^key64_056)
  88. acc.Hi ^= i2 + i3
  89. acc.Lo += mulFold64(i2^key64_032, i3^key64_040)
  90. acc.Lo ^= in4 + in3
  91. } // 32
  92. in2, in1 := readU64(p, ui(l)-2*8), readU64(p, ui(l)-1*8)
  93. i0, i1 := readU64(p, 0*8), readU64(p, 1*8)
  94. acc.Hi += mulFold64(in2^key64_016, in1^key64_024)
  95. acc.Hi ^= i0 + i1
  96. acc.Lo += mulFold64(i0^key64_000, i1^key64_008)
  97. acc.Lo ^= in2 + in1
  98. acc.Hi, acc.Lo = (acc.Lo*prime64_1)+(acc.Hi*prime64_4)+(u64(l)*prime64_2), acc.Hi+acc.Lo
  99. acc.Hi = -xxh3Avalanche(acc.Hi)
  100. acc.Lo = xxh3Avalanche(acc.Lo)
  101. return acc
  102. case l <= 240:
  103. acc.Lo = u64(l) * prime64_1
  104. {
  105. i0, i1, i2, i3 := readU64(p, 0*8), readU64(p, 1*8), readU64(p, 2*8), readU64(p, 3*8)
  106. acc.Hi += mulFold64(i2^key64_016, i3^key64_024)
  107. acc.Hi ^= i0 + i1
  108. acc.Lo += mulFold64(i0^key64_000, i1^key64_008)
  109. acc.Lo ^= i2 + i3
  110. }
  111. {
  112. i0, i1, i2, i3 := readU64(p, 4*8), readU64(p, 5*8), readU64(p, 6*8), readU64(p, 7*8)
  113. acc.Hi += mulFold64(i2^key64_048, i3^key64_056)
  114. acc.Hi ^= i0 + i1
  115. acc.Lo += mulFold64(i0^key64_032, i1^key64_040)
  116. acc.Lo ^= i2 + i3
  117. }
  118. {
  119. i0, i1, i2, i3 := readU64(p, 8*8), readU64(p, 9*8), readU64(p, 10*8), readU64(p, 11*8)
  120. acc.Hi += mulFold64(i2^key64_080, i3^key64_088)
  121. acc.Hi ^= i0 + i1
  122. acc.Lo += mulFold64(i0^key64_064, i1^key64_072)
  123. acc.Lo ^= i2 + i3
  124. }
  125. {
  126. i0, i1, i2, i3 := readU64(p, 12*8), readU64(p, 13*8), readU64(p, 14*8), readU64(p, 15*8)
  127. acc.Hi += mulFold64(i2^key64_112, i3^key64_120)
  128. acc.Hi ^= i0 + i1
  129. acc.Lo += mulFold64(i0^key64_096, i1^key64_104)
  130. acc.Lo ^= i2 + i3
  131. }
  132. // avalanche
  133. acc.Hi = xxh3Avalanche(acc.Hi)
  134. acc.Lo = xxh3Avalanche(acc.Lo)
  135. // trailing groups after 128
  136. top := ui(l) &^ 31
  137. for i := ui(4 * 32); i < top; i += 32 {
  138. i0, i1, i2, i3 := readU64(p, i+0), readU64(p, i+8), readU64(p, i+16), readU64(p, i+24)
  139. k0, k1, k2, k3 := readU64(key, i-125), readU64(key, i-117), readU64(key, i-109), readU64(key, i-101)
  140. acc.Hi += mulFold64(i2^k2, i3^k3)
  141. acc.Hi ^= i0 + i1
  142. acc.Lo += mulFold64(i0^k0, i1^k1)
  143. acc.Lo ^= i2 + i3
  144. }
  145. // last 32 bytes
  146. {
  147. i0, i1, i2, i3 := readU64(p, ui(l)-32), readU64(p, ui(l)-24), readU64(p, ui(l)-16), readU64(p, ui(l)-8)
  148. acc.Hi += mulFold64(i0^key64_119, i1^key64_127)
  149. acc.Hi ^= i2 + i3
  150. acc.Lo += mulFold64(i2^key64_103, i3^key64_111)
  151. acc.Lo ^= i0 + i1
  152. }
  153. acc.Hi, acc.Lo = (acc.Lo*prime64_1)+(acc.Hi*prime64_4)+(u64(l)*prime64_2), acc.Hi+acc.Lo
  154. acc.Hi = -xxh3Avalanche(acc.Hi)
  155. acc.Lo = xxh3Avalanche(acc.Lo)
  156. return acc
  157. default:
  158. acc.Lo = u64(l) * prime64_1
  159. acc.Hi = ^(u64(l) * prime64_2)
  160. accs := [8]u64{
  161. prime32_3, prime64_1, prime64_2, prime64_3,
  162. prime64_4, prime32_2, prime64_5, prime32_1,
  163. }
  164. if hasAVX512 && l >= avx512Switch {
  165. accumAVX512(&accs, p, key, u64(l))
  166. } else if hasAVX2 {
  167. accumAVX2(&accs, p, key, u64(l))
  168. } else if hasSSE2 {
  169. accumSSE(&accs, p, key, u64(l))
  170. } else {
  171. accumScalar(&accs, p, key, u64(l))
  172. }
  173. // merge accs
  174. acc.Lo += mulFold64(accs[0]^key64_011, accs[1]^key64_019)
  175. acc.Hi += mulFold64(accs[0]^key64_117, accs[1]^key64_125)
  176. acc.Lo += mulFold64(accs[2]^key64_027, accs[3]^key64_035)
  177. acc.Hi += mulFold64(accs[2]^key64_133, accs[3]^key64_141)
  178. acc.Lo += mulFold64(accs[4]^key64_043, accs[5]^key64_051)
  179. acc.Hi += mulFold64(accs[4]^key64_149, accs[5]^key64_157)
  180. acc.Lo += mulFold64(accs[6]^key64_059, accs[7]^key64_067)
  181. acc.Hi += mulFold64(accs[6]^key64_165, accs[7]^key64_173)
  182. acc.Lo = xxh3Avalanche(acc.Lo)
  183. acc.Hi = xxh3Avalanche(acc.Hi)
  184. return acc
  185. }
  186. }