| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143 |
- package blake3
- import (
- "bytes"
- "math/bits"
- )
- func compressNodeGeneric(out *[16]uint32, n node) {
- g := func(a, b, c, d, mx, my uint32) (uint32, uint32, uint32, uint32) {
- a += b + mx
- d = bits.RotateLeft32(d^a, -16)
- c += d
- b = bits.RotateLeft32(b^c, -12)
- a += b + my
- d = bits.RotateLeft32(d^a, -8)
- c += d
- b = bits.RotateLeft32(b^c, -7)
- return a, b, c, d
- }
- // NOTE: we unroll all of the rounds, as well as the permutations that occur
- // between rounds.
- // round 1 (also initializes state)
- // columns
- s0, s4, s8, s12 := g(n.cv[0], n.cv[4], iv[0], uint32(n.counter), n.block[0], n.block[1])
- s1, s5, s9, s13 := g(n.cv[1], n.cv[5], iv[1], uint32(n.counter>>32), n.block[2], n.block[3])
- s2, s6, s10, s14 := g(n.cv[2], n.cv[6], iv[2], n.blockLen, n.block[4], n.block[5])
- s3, s7, s11, s15 := g(n.cv[3], n.cv[7], iv[3], n.flags, n.block[6], n.block[7])
- // diagonals
- s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[8], n.block[9])
- s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[10], n.block[11])
- s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[12], n.block[13])
- s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[14], n.block[15])
- // round 2
- s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[2], n.block[6])
- s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[3], n.block[10])
- s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[7], n.block[0])
- s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[4], n.block[13])
- s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[1], n.block[11])
- s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[12], n.block[5])
- s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[9], n.block[14])
- s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[15], n.block[8])
- // round 3
- s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[3], n.block[4])
- s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[10], n.block[12])
- s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[13], n.block[2])
- s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[7], n.block[14])
- s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[6], n.block[5])
- s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[9], n.block[0])
- s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[11], n.block[15])
- s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[8], n.block[1])
- // round 4
- s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[10], n.block[7])
- s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[12], n.block[9])
- s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[14], n.block[3])
- s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[13], n.block[15])
- s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[4], n.block[0])
- s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[11], n.block[2])
- s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[5], n.block[8])
- s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[1], n.block[6])
- // round 5
- s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[12], n.block[13])
- s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[9], n.block[11])
- s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[15], n.block[10])
- s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[14], n.block[8])
- s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[7], n.block[2])
- s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[5], n.block[3])
- s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[0], n.block[1])
- s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[6], n.block[4])
- // round 6
- s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[9], n.block[14])
- s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[11], n.block[5])
- s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[8], n.block[12])
- s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[15], n.block[1])
- s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[13], n.block[3])
- s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[0], n.block[10])
- s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[2], n.block[6])
- s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[4], n.block[7])
- // round 7
- s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[11], n.block[15])
- s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[5], n.block[0])
- s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[1], n.block[9])
- s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[8], n.block[6])
- s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[14], n.block[10])
- s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[2], n.block[12])
- s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[3], n.block[4])
- s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[7], n.block[13])
- // finalization
- *out = [16]uint32{
- s0 ^ s8, s1 ^ s9, s2 ^ s10, s3 ^ s11,
- s4 ^ s12, s5 ^ s13, s6 ^ s14, s7 ^ s15,
- s8 ^ n.cv[0], s9 ^ n.cv[1], s10 ^ n.cv[2], s11 ^ n.cv[3],
- s12 ^ n.cv[4], s13 ^ n.cv[5], s14 ^ n.cv[6], s15 ^ n.cv[7],
- }
- }
- func chainingValue(n node) (cv [8]uint32) {
- full := compressNode(n)
- copy(cv[:], full[:])
- return
- }
- func compressBufferGeneric(buf *[maxSIMD * chunkSize]byte, buflen int, key *[8]uint32, counter uint64, flags uint32) (n node) {
- if buflen <= chunkSize {
- return compressChunk(buf[:buflen], key, counter, flags)
- }
- var cvs [maxSIMD][8]uint32
- var numCVs uint64
- for bb := bytes.NewBuffer(buf[:buflen]); bb.Len() > 0; numCVs++ {
- cvs[numCVs] = chainingValue(compressChunk(bb.Next(chunkSize), key, counter+numCVs, flags))
- }
- return mergeSubtrees(&cvs, numCVs, key, flags)
- }
- func compressBlocksGeneric(outs *[maxSIMD][64]byte, n node) {
- for i := range outs {
- wordsToBytes(compressNode(n), &outs[i])
- n.counter++
- }
- }
- func mergeSubtreesGeneric(cvs *[maxSIMD][8]uint32, numCVs uint64, key *[8]uint32, flags uint32) node {
- for numCVs > 2 {
- rem := numCVs / 2
- for i := range cvs[:rem] {
- cvs[i] = chainingValue(parentNode(cvs[i*2], cvs[i*2+1], *key, flags))
- }
- if numCVs%2 != 0 {
- cvs[rem] = cvs[rem*2]
- rem++
- }
- numCVs = rem
- }
- return parentNode(cvs[0], cvs[1], *key, flags)
- }
|