hash128_seed.go 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. package xxh3
  2. import (
  3. "math/bits"
  4. )
  5. // Hash128Seed returns the 128-bit hash of the byte slice.
  6. func Hash128Seed(b []byte, seed uint64) Uint128 {
  7. return hashAny128Seed(*(*str)(ptr(&b)), seed)
  8. }
  9. // HashString128Seed returns the 128-bit hash of the string slice.
  10. func HashString128Seed(s string, seed uint64) Uint128 {
  11. return hashAny128Seed(*(*str)(ptr(&s)), seed)
  12. }
  13. func hashAny128Seed(s str, seed uint64) (acc u128) {
  14. p, l := s.p, s.l
  15. switch {
  16. case l <= 16:
  17. switch {
  18. case l > 8: // 9-16
  19. bitflipl := (key64_032 ^ key64_040) - seed
  20. bitfliph := (key64_048 ^ key64_056) + seed
  21. input_lo := readU64(p, 0)
  22. input_hi := readU64(p, ui(l)-8)
  23. m128_h, m128_l := bits.Mul64(input_lo^input_hi^bitflipl, prime64_1)
  24. m128_l += uint64(l-1) << 54
  25. input_hi ^= bitfliph
  26. m128_h += input_hi + uint64(uint32(input_hi))*(prime32_2-1)
  27. m128_l ^= bits.ReverseBytes64(m128_h)
  28. acc.Hi, acc.Lo = bits.Mul64(m128_l, prime64_2)
  29. acc.Hi += m128_h * prime64_2
  30. acc.Lo = xxh3Avalanche(acc.Lo)
  31. acc.Hi = xxh3Avalanche(acc.Hi)
  32. return acc
  33. case l > 3: // 4-8
  34. seed ^= u64(bits.ReverseBytes32(u32(seed))) << 32
  35. bitflip := (key64_016 ^ key64_024) + seed
  36. input_lo := readU32(p, 0)
  37. input_hi := readU32(p, ui(l)-4)
  38. input_64 := u64(input_lo) + u64(input_hi)<<32
  39. keyed := input_64 ^ bitflip
  40. acc.Hi, acc.Lo = bits.Mul64(keyed, prime64_1+(uint64(l)<<2))
  41. acc.Hi += acc.Lo << 1
  42. acc.Lo ^= acc.Hi >> 3
  43. acc.Lo ^= acc.Lo >> 35
  44. acc.Lo *= 0x9fb21c651e98df25
  45. acc.Lo ^= acc.Lo >> 28
  46. acc.Hi = xxh3Avalanche(acc.Hi)
  47. return acc
  48. case l == 3: // 3
  49. c12 := u64(readU16(p, 0))
  50. c3 := u64(readU8(p, 2))
  51. acc.Lo = c12<<16 + c3 + 3<<8
  52. case l > 1: // 2
  53. c12 := u64(readU16(p, 0))
  54. acc.Lo = c12*(1<<24+1)>>8 + 2<<8
  55. case l == 1: // 1
  56. c1 := u64(readU8(p, 0))
  57. acc.Lo = c1*(1<<24+1<<16+1) + 1<<8
  58. default: // 0
  59. bitflipl := key64_064 ^ key64_072 ^ seed
  60. bitfliph := key64_080 ^ key64_088 ^ seed
  61. return u128{Lo: xxh64AvalancheFull(bitflipl), Hi: xxh64AvalancheFull(bitfliph)}
  62. }
  63. acc.Hi = uint64(bits.RotateLeft32(bits.ReverseBytes32(uint32(acc.Lo)), 13))
  64. acc.Lo ^= uint64(key32_000^key32_004) + seed
  65. acc.Hi ^= uint64(key32_008^key32_012) - seed
  66. acc.Lo = xxh64AvalancheFull(acc.Lo)
  67. acc.Hi = xxh64AvalancheFull(acc.Hi)
  68. return acc
  69. case l <= 128:
  70. acc.Lo = u64(l) * prime64_1
  71. if l > 32 {
  72. if l > 64 {
  73. if l > 96 {
  74. in8, in7 := readU64(p, ui(l)-8*8), readU64(p, ui(l)-7*8)
  75. i6, i7 := readU64(p, 6*8), readU64(p, 7*8)
  76. acc.Hi += mulFold64(in8^(key64_112+seed), in7^(key64_120-seed))
  77. acc.Hi ^= i6 + i7
  78. acc.Lo += mulFold64(i6^(key64_096+seed), i7^(key64_104-seed))
  79. acc.Lo ^= in8 + in7
  80. } // 96
  81. in6, in5 := readU64(p, ui(l)-6*8), readU64(p, ui(l)-5*8)
  82. i4, i5 := readU64(p, 4*8), readU64(p, 5*8)
  83. acc.Hi += mulFold64(in6^(key64_080+seed), in5^(key64_088-seed))
  84. acc.Hi ^= i4 + i5
  85. acc.Lo += mulFold64(i4^(key64_064+seed), i5^(key64_072-seed))
  86. acc.Lo ^= in6 + in5
  87. } // 64
  88. in4, in3 := readU64(p, ui(l)-4*8), readU64(p, ui(l)-3*8)
  89. i2, i3 := readU64(p, 2*8), readU64(p, 3*8)
  90. acc.Hi += mulFold64(in4^(key64_048+seed), in3^(key64_056-seed))
  91. acc.Hi ^= i2 + i3
  92. acc.Lo += mulFold64(i2^(key64_032+seed), i3^(key64_040-seed))
  93. acc.Lo ^= in4 + in3
  94. } // 32
  95. in2, in1 := readU64(p, ui(l)-2*8), readU64(p, ui(l)-1*8)
  96. i0, i1 := readU64(p, 0*8), readU64(p, 1*8)
  97. acc.Hi += mulFold64(in2^(key64_016+seed), in1^(key64_024-seed))
  98. acc.Hi ^= i0 + i1
  99. acc.Lo += mulFold64(i0^(key64_000+seed), i1^(key64_008-seed))
  100. acc.Lo ^= in2 + in1
  101. acc.Hi, acc.Lo = (acc.Lo*prime64_1)+(acc.Hi*prime64_4)+((u64(l)-seed)*prime64_2), acc.Hi+acc.Lo
  102. acc.Hi = -xxh3Avalanche(acc.Hi)
  103. acc.Lo = xxh3Avalanche(acc.Lo)
  104. return acc
  105. case l <= 240:
  106. acc.Lo = u64(l) * prime64_1
  107. {
  108. i0, i1, i2, i3 := readU64(p, 0*8), readU64(p, 1*8), readU64(p, 2*8), readU64(p, 3*8)
  109. acc.Hi += mulFold64(i2^(key64_016+seed), i3^(key64_024-seed))
  110. acc.Hi ^= i0 + i1
  111. acc.Lo += mulFold64(i0^(key64_000+seed), i1^(key64_008-seed))
  112. acc.Lo ^= i2 + i3
  113. }
  114. {
  115. i0, i1, i2, i3 := readU64(p, 4*8), readU64(p, 5*8), readU64(p, 6*8), readU64(p, 7*8)
  116. acc.Hi += mulFold64(i2^(key64_048+seed), i3^(key64_056-seed))
  117. acc.Hi ^= i0 + i1
  118. acc.Lo += mulFold64(i0^(key64_032+seed), i1^(key64_040-seed))
  119. acc.Lo ^= i2 + i3
  120. }
  121. {
  122. i0, i1, i2, i3 := readU64(p, 8*8), readU64(p, 9*8), readU64(p, 10*8), readU64(p, 11*8)
  123. acc.Hi += mulFold64(i2^(key64_080+seed), i3^(key64_088-seed))
  124. acc.Hi ^= i0 + i1
  125. acc.Lo += mulFold64(i0^(key64_064+seed), i1^(key64_072-seed))
  126. acc.Lo ^= i2 + i3
  127. }
  128. {
  129. i0, i1, i2, i3 := readU64(p, 12*8), readU64(p, 13*8), readU64(p, 14*8), readU64(p, 15*8)
  130. acc.Hi += mulFold64(i2^(key64_112+seed), i3^(key64_120-seed))
  131. acc.Hi ^= i0 + i1
  132. acc.Lo += mulFold64(i0^(key64_096+seed), i1^(key64_104-seed))
  133. acc.Lo ^= i2 + i3
  134. }
  135. // avalanche
  136. acc.Hi = xxh3Avalanche(acc.Hi)
  137. acc.Lo = xxh3Avalanche(acc.Lo)
  138. // trailing groups after 128
  139. top := ui(l) &^ 31
  140. for i := ui(4 * 32); i < top; i += 32 {
  141. i0, i1, i2, i3 := readU64(p, i+0), readU64(p, i+8), readU64(p, i+16), readU64(p, i+24)
  142. k0, k1, k2, k3 := readU64(key, i-125)+seed, readU64(key, i-117)-seed, readU64(key, i-109)+seed, readU64(key, i-101)-seed
  143. acc.Hi += mulFold64(i2^k2, i3^k3)
  144. acc.Hi ^= i0 + i1
  145. acc.Lo += mulFold64(i0^k0, i1^k1)
  146. acc.Lo ^= i2 + i3
  147. }
  148. // last 32 bytes
  149. {
  150. i0, i1, i2, i3 := readU64(p, ui(l)-32), readU64(p, ui(l)-24), readU64(p, ui(l)-16), readU64(p, ui(l)-8)
  151. seed := 0 - seed
  152. acc.Hi += mulFold64(i0^(key64_119+seed), i1^(key64_127-seed))
  153. acc.Hi ^= i2 + i3
  154. acc.Lo += mulFold64(i2^(key64_103+seed), i3^(key64_111-seed))
  155. acc.Lo ^= i0 + i1
  156. }
  157. acc.Hi, acc.Lo = (acc.Lo*prime64_1)+(acc.Hi*prime64_4)+((u64(l)-seed)*prime64_2), acc.Hi+acc.Lo
  158. acc.Hi = -xxh3Avalanche(acc.Hi)
  159. acc.Lo = xxh3Avalanche(acc.Lo)
  160. return acc
  161. default:
  162. acc.Lo = u64(l) * prime64_1
  163. acc.Hi = ^(u64(l) * prime64_2)
  164. secret := key
  165. if seed != 0 {
  166. secret = ptr(&[secretSize]byte{})
  167. initSecret(secret, seed)
  168. }
  169. accs := [8]u64{
  170. prime32_3, prime64_1, prime64_2, prime64_3,
  171. prime64_4, prime32_2, prime64_5, prime32_1,
  172. }
  173. if hasAVX512 && l >= avx512Switch {
  174. accumAVX512(&accs, p, secret, u64(l))
  175. } else if hasAVX2 {
  176. accumAVX2(&accs, p, secret, u64(l))
  177. } else if hasSSE2 {
  178. accumSSE(&accs, p, secret, u64(l))
  179. } else {
  180. accumScalar(&accs, p, secret, u64(l))
  181. }
  182. // merge accs
  183. const hi_off = 117 - 11
  184. acc.Lo += mulFold64(accs[0]^readU64(secret, 11), accs[1]^readU64(secret, 19))
  185. acc.Hi += mulFold64(accs[0]^readU64(secret, 11+hi_off), accs[1]^readU64(secret, 19+hi_off))
  186. acc.Lo += mulFold64(accs[2]^readU64(secret, 27), accs[3]^readU64(secret, 35))
  187. acc.Hi += mulFold64(accs[2]^readU64(secret, 27+hi_off), accs[3]^readU64(secret, 35+hi_off))
  188. acc.Lo += mulFold64(accs[4]^readU64(secret, 43), accs[5]^readU64(secret, 51))
  189. acc.Hi += mulFold64(accs[4]^readU64(secret, 43+hi_off), accs[5]^readU64(secret, 51+hi_off))
  190. acc.Lo += mulFold64(accs[6]^readU64(secret, 59), accs[7]^readU64(secret, 67))
  191. acc.Hi += mulFold64(accs[6]^readU64(secret, 59+hi_off), accs[7]^readU64(secret, 67+hi_off))
  192. acc.Lo = xxh3Avalanche(acc.Lo)
  193. acc.Hi = xxh3Avalanche(acc.Hi)
  194. return acc
  195. }
  196. }