compress_generic.go 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. package blake3
  2. import (
  3. "bytes"
  4. "math/bits"
  5. )
  6. func compressNodeGeneric(out *[16]uint32, n node) {
  7. g := func(a, b, c, d, mx, my uint32) (uint32, uint32, uint32, uint32) {
  8. a += b + mx
  9. d = bits.RotateLeft32(d^a, -16)
  10. c += d
  11. b = bits.RotateLeft32(b^c, -12)
  12. a += b + my
  13. d = bits.RotateLeft32(d^a, -8)
  14. c += d
  15. b = bits.RotateLeft32(b^c, -7)
  16. return a, b, c, d
  17. }
  18. // NOTE: we unroll all of the rounds, as well as the permutations that occur
  19. // between rounds.
  20. // round 1 (also initializes state)
  21. // columns
  22. s0, s4, s8, s12 := g(n.cv[0], n.cv[4], iv[0], uint32(n.counter), n.block[0], n.block[1])
  23. s1, s5, s9, s13 := g(n.cv[1], n.cv[5], iv[1], uint32(n.counter>>32), n.block[2], n.block[3])
  24. s2, s6, s10, s14 := g(n.cv[2], n.cv[6], iv[2], n.blockLen, n.block[4], n.block[5])
  25. s3, s7, s11, s15 := g(n.cv[3], n.cv[7], iv[3], n.flags, n.block[6], n.block[7])
  26. // diagonals
  27. s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[8], n.block[9])
  28. s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[10], n.block[11])
  29. s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[12], n.block[13])
  30. s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[14], n.block[15])
  31. // round 2
  32. s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[2], n.block[6])
  33. s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[3], n.block[10])
  34. s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[7], n.block[0])
  35. s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[4], n.block[13])
  36. s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[1], n.block[11])
  37. s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[12], n.block[5])
  38. s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[9], n.block[14])
  39. s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[15], n.block[8])
  40. // round 3
  41. s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[3], n.block[4])
  42. s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[10], n.block[12])
  43. s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[13], n.block[2])
  44. s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[7], n.block[14])
  45. s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[6], n.block[5])
  46. s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[9], n.block[0])
  47. s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[11], n.block[15])
  48. s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[8], n.block[1])
  49. // round 4
  50. s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[10], n.block[7])
  51. s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[12], n.block[9])
  52. s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[14], n.block[3])
  53. s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[13], n.block[15])
  54. s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[4], n.block[0])
  55. s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[11], n.block[2])
  56. s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[5], n.block[8])
  57. s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[1], n.block[6])
  58. // round 5
  59. s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[12], n.block[13])
  60. s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[9], n.block[11])
  61. s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[15], n.block[10])
  62. s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[14], n.block[8])
  63. s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[7], n.block[2])
  64. s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[5], n.block[3])
  65. s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[0], n.block[1])
  66. s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[6], n.block[4])
  67. // round 6
  68. s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[9], n.block[14])
  69. s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[11], n.block[5])
  70. s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[8], n.block[12])
  71. s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[15], n.block[1])
  72. s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[13], n.block[3])
  73. s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[0], n.block[10])
  74. s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[2], n.block[6])
  75. s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[4], n.block[7])
  76. // round 7
  77. s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[11], n.block[15])
  78. s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[5], n.block[0])
  79. s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[1], n.block[9])
  80. s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[8], n.block[6])
  81. s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[14], n.block[10])
  82. s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[2], n.block[12])
  83. s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[3], n.block[4])
  84. s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[7], n.block[13])
  85. // finalization
  86. *out = [16]uint32{
  87. s0 ^ s8, s1 ^ s9, s2 ^ s10, s3 ^ s11,
  88. s4 ^ s12, s5 ^ s13, s6 ^ s14, s7 ^ s15,
  89. s8 ^ n.cv[0], s9 ^ n.cv[1], s10 ^ n.cv[2], s11 ^ n.cv[3],
  90. s12 ^ n.cv[4], s13 ^ n.cv[5], s14 ^ n.cv[6], s15 ^ n.cv[7],
  91. }
  92. }
  93. func chainingValue(n node) (cv [8]uint32) {
  94. full := compressNode(n)
  95. copy(cv[:], full[:])
  96. return
  97. }
  98. func compressBufferGeneric(buf *[maxSIMD * chunkSize]byte, buflen int, key *[8]uint32, counter uint64, flags uint32) (n node) {
  99. if buflen <= chunkSize {
  100. return compressChunk(buf[:buflen], key, counter, flags)
  101. }
  102. var cvs [maxSIMD][8]uint32
  103. var numCVs uint64
  104. for bb := bytes.NewBuffer(buf[:buflen]); bb.Len() > 0; numCVs++ {
  105. cvs[numCVs] = chainingValue(compressChunk(bb.Next(chunkSize), key, counter+numCVs, flags))
  106. }
  107. return mergeSubtrees(&cvs, numCVs, key, flags)
  108. }
  109. func compressBlocksGeneric(outs *[maxSIMD][64]byte, n node) {
  110. for i := range outs {
  111. wordsToBytes(compressNode(n), &outs[i])
  112. n.counter++
  113. }
  114. }
  115. func mergeSubtreesGeneric(cvs *[maxSIMD][8]uint32, numCVs uint64, key *[8]uint32, flags uint32) node {
  116. for numCVs > 2 {
  117. rem := numCVs / 2
  118. for i := range cvs[:rem] {
  119. cvs[i] = chainingValue(parentNode(cvs[i*2], cvs[i*2+1], *key, flags))
  120. }
  121. if numCVs%2 != 0 {
  122. cvs[rem] = cvs[rem*2]
  123. rem++
  124. }
  125. numCVs = rem
  126. }
  127. return parentNode(cvs[0], cvs[1], *key, flags)
  128. }