modnscalar.go 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088
  1. // Copyright (c) 2020 The Decred developers
  2. // Use of this source code is governed by an ISC
  3. // license that can be found in the LICENSE file.
  4. package secp256k1
  5. import (
  6. "encoding/hex"
  7. "math/big"
  8. )
  9. // References:
  10. // [SECG]: Recommended Elliptic Curve Domain Parameters
  11. // https://www.secg.org/sec2-v2.pdf
  12. //
  13. // [HAC]: Handbook of Applied Cryptography Menezes, van Oorschot, Vanstone.
  14. // http://cacr.uwaterloo.ca/hac/
  15. // Many elliptic curve operations require working with scalars in a finite field
  16. // characterized by the order of the group underlying the secp256k1 curve.
  17. // Given this precision is larger than the biggest available native type,
  18. // obviously some form of bignum math is needed. This code implements
  19. // specialized fixed-precision field arithmetic rather than relying on an
  20. // arbitrary-precision arithmetic package such as math/big for dealing with the
  21. // math modulo the group order since the size is known. As a result, rather
  22. // large performance gains are achieved by taking advantage of many
  23. // optimizations not available to arbitrary-precision arithmetic and generic
  24. // modular arithmetic algorithms.
  25. //
  26. // There are various ways to internally represent each element. For example,
  27. // the most obvious representation would be to use an array of 4 uint64s (64
  28. // bits * 4 = 256 bits). However, that representation suffers from the fact
  29. // that there is no native Go type large enough to handle the intermediate
  30. // results while adding or multiplying two 64-bit numbers.
  31. //
  32. // Given the above, this implementation represents the field elements as 8
  33. // uint32s with each word (array entry) treated as base 2^32. This was chosen
  34. // because most systems at the current time are 64-bit (or at least have 64-bit
  35. // registers available for specialized purposes such as MMX) so the intermediate
  36. // results can typically be done using a native register (and using uint64s to
  37. // avoid the need for additional half-word arithmetic)
  38. const (
  39. // These fields provide convenient access to each of the words of the
  40. // secp256k1 curve group order N to improve code readability.
  41. //
  42. // The group order of the curve per [SECG] is:
  43. // 0xffffffff ffffffff ffffffff fffffffe baaedce6 af48a03b bfd25e8c d0364141
  44. orderWordZero uint32 = 0xd0364141
  45. orderWordOne uint32 = 0xbfd25e8c
  46. orderWordTwo uint32 = 0xaf48a03b
  47. orderWordThree uint32 = 0xbaaedce6
  48. orderWordFour uint32 = 0xfffffffe
  49. orderWordFive uint32 = 0xffffffff
  50. orderWordSix uint32 = 0xffffffff
  51. orderWordSeven uint32 = 0xffffffff
  52. // These fields provide convenient access to each of the words of the two's
  53. // complement of the secp256k1 curve group order N to improve code
  54. // readability.
  55. //
  56. // The two's complement of the group order is:
  57. // 0x00000000 00000000 00000000 00000001 45512319 50b75fc4 402da173 2fc9bebf
  58. orderComplementWordZero uint32 = (^orderWordZero) + 1
  59. orderComplementWordOne uint32 = ^orderWordOne
  60. orderComplementWordTwo uint32 = ^orderWordTwo
  61. orderComplementWordThree uint32 = ^orderWordThree
  62. //orderComplementWordFour uint32 = ^orderWordFour // unused
  63. //orderComplementWordFive uint32 = ^orderWordFive // unused
  64. //orderComplementWordSix uint32 = ^orderWordSix // unused
  65. //orderComplementWordSeven uint32 = ^orderWordSeven // unused
  66. // These fields provide convenient access to each of the words of the
  67. // secp256k1 curve group order N / 2 to improve code readability and avoid
  68. // the need to recalculate them.
  69. //
  70. // The half order of the secp256k1 curve group is:
  71. // 0x7fffffff ffffffff ffffffff ffffffff 5d576e73 57a4501d dfe92f46 681b20a0
  72. halfOrderWordZero uint32 = 0x681b20a0
  73. halfOrderWordOne uint32 = 0xdfe92f46
  74. halfOrderWordTwo uint32 = 0x57a4501d
  75. halfOrderWordThree uint32 = 0x5d576e73
  76. halfOrderWordFour uint32 = 0xffffffff
  77. halfOrderWordFive uint32 = 0xffffffff
  78. halfOrderWordSix uint32 = 0xffffffff
  79. halfOrderWordSeven uint32 = 0x7fffffff
  80. // uint32Mask is simply a mask with all bits set for a uint32 and is used to
  81. // improve the readability of the code.
  82. uint32Mask = 0xffffffff
  83. )
  84. var (
  85. // zero32 is an array of 32 bytes used for the purposes of zeroing and is
  86. // defined here to avoid extra allocations.
  87. zero32 = [32]byte{}
  88. )
  89. // ModNScalar implements optimized 256-bit constant-time fixed-precision
  90. // arithmetic over the secp256k1 group order. This means all arithmetic is
  91. // performed modulo:
  92. //
  93. // 0xfffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364141
  94. //
  95. // It only implements the arithmetic needed for elliptic curve operations,
  96. // however, the operations that are not implemented can typically be worked
  97. // around if absolutely needed. For example, subtraction can be performed by
  98. // adding the negation.
  99. //
  100. // Should it be absolutely necessary, conversion to the standard library
  101. // math/big.Int can be accomplished by using the Bytes method, slicing the
  102. // resulting fixed-size array, and feeding it to big.Int.SetBytes. However,
  103. // that should typically be avoided when possible as conversion to big.Ints
  104. // requires allocations, is not constant time, and is slower when working modulo
  105. // the group order.
  106. type ModNScalar struct {
  107. // The scalar is represented as 8 32-bit integers in base 2^32.
  108. //
  109. // The following depicts the internal representation:
  110. // ---------------------------------------------------------
  111. // | n[7] | n[6] | ... | n[0] |
  112. // | 32 bits | 32 bits | ... | 32 bits |
  113. // | Mult: 2^(32*7) | Mult: 2^(32*6) | ... | Mult: 2^(32*0) |
  114. // ---------------------------------------------------------
  115. //
  116. // For example, consider the number 2^87 + 2^42 + 1. It would be
  117. // represented as:
  118. // n[0] = 1
  119. // n[1] = 2^10
  120. // n[2] = 2^23
  121. // n[3..7] = 0
  122. //
  123. // The full 256-bit value is then calculated by looping i from 7..0 and
  124. // doing sum(n[i] * 2^(32i)) like so:
  125. // n[7] * 2^(32*7) = 0 * 2^224 = 0
  126. // n[6] * 2^(32*6) = 0 * 2^192 = 0
  127. // ...
  128. // n[2] * 2^(32*2) = 2^23 * 2^64 = 2^87
  129. // n[1] * 2^(32*1) = 2^10 * 2^32 = 2^42
  130. // n[0] * 2^(32*0) = 1 * 2^0 = 1
  131. // Sum: 0 + 0 + ... + 2^87 + 2^42 + 1 = 2^87 + 2^42 + 1
  132. n [8]uint32
  133. }
  134. // String returns the scalar as a human-readable hex string.
  135. //
  136. // This is NOT constant time.
  137. func (s ModNScalar) String() string {
  138. b := s.Bytes()
  139. return hex.EncodeToString(b[:])
  140. }
  141. // Set sets the scalar equal to a copy of the passed one in constant time.
  142. //
  143. // The scalar is returned to support chaining. This enables syntax like:
  144. // s := new(ModNScalar).Set(s2).Add(1) so that s = s2 + 1 where s2 is not
  145. // modified.
  146. func (s *ModNScalar) Set(val *ModNScalar) *ModNScalar {
  147. *s = *val
  148. return s
  149. }
  150. // Zero sets the scalar to zero in constant time. A newly created scalar is
  151. // already set to zero. This function can be useful to clear an existing scalar
  152. // for reuse.
  153. func (s *ModNScalar) Zero() {
  154. s.n[0] = 0
  155. s.n[1] = 0
  156. s.n[2] = 0
  157. s.n[3] = 0
  158. s.n[4] = 0
  159. s.n[5] = 0
  160. s.n[6] = 0
  161. s.n[7] = 0
  162. }
  163. // IsZero returns whether or not the scalar is equal to zero in constant time.
  164. func (s *ModNScalar) IsZero() bool {
  165. // The scalar can only be zero if no bits are set in any of the words.
  166. bits := s.n[0] | s.n[1] | s.n[2] | s.n[3] | s.n[4] | s.n[5] | s.n[6] | s.n[7]
  167. return bits == 0
  168. }
  169. // SetInt sets the scalar to the passed integer in constant time. This is a
  170. // convenience function since it is fairly common to perform some arithmetic
  171. // with small native integers.
  172. //
  173. // The scalar is returned to support chaining. This enables syntax like:
  174. // s := new(ModNScalar).SetInt(2).Mul(s2) so that s = 2 * s2.
  175. func (s *ModNScalar) SetInt(ui uint32) *ModNScalar {
  176. s.Zero()
  177. s.n[0] = ui
  178. return s
  179. }
  180. // constantTimeEq returns 1 if a == b or 0 otherwise in constant time.
  181. func constantTimeEq(a, b uint32) uint32 {
  182. return uint32((uint64(a^b) - 1) >> 63)
  183. }
  184. // constantTimeNotEq returns 1 if a != b or 0 otherwise in constant time.
  185. func constantTimeNotEq(a, b uint32) uint32 {
  186. return ^uint32((uint64(a^b)-1)>>63) & 1
  187. }
  188. // constantTimeLess returns 1 if a < b or 0 otherwise in constant time.
  189. func constantTimeLess(a, b uint32) uint32 {
  190. return uint32((uint64(a) - uint64(b)) >> 63)
  191. }
  192. // constantTimeLessOrEq returns 1 if a <= b or 0 otherwise in constant time.
  193. func constantTimeLessOrEq(a, b uint32) uint32 {
  194. return uint32((uint64(a) - uint64(b) - 1) >> 63)
  195. }
  196. // constantTimeGreater returns 1 if a > b or 0 otherwise in constant time.
  197. func constantTimeGreater(a, b uint32) uint32 {
  198. return constantTimeLess(b, a)
  199. }
  200. // constantTimeGreaterOrEq returns 1 if a >= b or 0 otherwise in constant time.
  201. func constantTimeGreaterOrEq(a, b uint32) uint32 {
  202. return constantTimeLessOrEq(b, a)
  203. }
  204. // constantTimeMin returns min(a,b) in constant time.
  205. func constantTimeMin(a, b uint32) uint32 {
  206. return b ^ ((a ^ b) & -constantTimeLess(a, b))
  207. }
  208. // overflows determines if the current scalar is greater than or equal to the
  209. // group order in constant time and returns 1 if it is or 0 otherwise.
  210. func (s *ModNScalar) overflows() uint32 {
  211. // The intuition here is that the scalar is greater than the group order if
  212. // one of the higher individual words is greater than corresponding word of
  213. // the group order and all higher words in the scalar are equal to their
  214. // corresponding word of the group order. Since this type is modulo the
  215. // group order, being equal is also an overflow back to 0.
  216. //
  217. // Note that the words 5, 6, and 7 are all the max uint32 value, so there is
  218. // no need to test if those individual words of the scalar exceeds them,
  219. // hence, only equality is checked for them.
  220. highWordsEqual := constantTimeEq(s.n[7], orderWordSeven)
  221. highWordsEqual &= constantTimeEq(s.n[6], orderWordSix)
  222. highWordsEqual &= constantTimeEq(s.n[5], orderWordFive)
  223. overflow := highWordsEqual & constantTimeGreater(s.n[4], orderWordFour)
  224. highWordsEqual &= constantTimeEq(s.n[4], orderWordFour)
  225. overflow |= highWordsEqual & constantTimeGreater(s.n[3], orderWordThree)
  226. highWordsEqual &= constantTimeEq(s.n[3], orderWordThree)
  227. overflow |= highWordsEqual & constantTimeGreater(s.n[2], orderWordTwo)
  228. highWordsEqual &= constantTimeEq(s.n[2], orderWordTwo)
  229. overflow |= highWordsEqual & constantTimeGreater(s.n[1], orderWordOne)
  230. highWordsEqual &= constantTimeEq(s.n[1], orderWordOne)
  231. overflow |= highWordsEqual & constantTimeGreaterOrEq(s.n[0], orderWordZero)
  232. return overflow
  233. }
  234. // reduce256 reduces the current scalar modulo the group order in accordance
  235. // with the overflows parameter in constant time. The overflows parameter
  236. // specifies whether or not the scalar is known to be greater than the group
  237. // order and MUST either be 1 in the case it is or 0 in the case it is not for a
  238. // correct result.
  239. func (s *ModNScalar) reduce256(overflows uint32) {
  240. // Notice that since s < 2^256 < 2N (where N is the group order), the max
  241. // possible number of reductions required is one. Therefore, in the case a
  242. // reduction is needed, it can be performed with a single subtraction of N.
  243. // Also, recall that subtraction is equivalent to addition by the two's
  244. // complement while ignoring the carry.
  245. //
  246. // When s >= N, the overflows parameter will be 1. Conversely, it will be 0
  247. // when s < N. Thus multiplying by the overflows parameter will either
  248. // result in 0 or the multiplicand itself.
  249. //
  250. // Combining the above along with the fact that s + 0 = s, the following is
  251. // a constant time implementation that works by either adding 0 or the two's
  252. // complement of N as needed.
  253. //
  254. // The final result will be in the range 0 <= s < N as expected.
  255. overflows64 := uint64(overflows)
  256. c := uint64(s.n[0]) + overflows64*uint64(orderComplementWordZero)
  257. s.n[0] = uint32(c & uint32Mask)
  258. c = (c >> 32) + uint64(s.n[1]) + overflows64*uint64(orderComplementWordOne)
  259. s.n[1] = uint32(c & uint32Mask)
  260. c = (c >> 32) + uint64(s.n[2]) + overflows64*uint64(orderComplementWordTwo)
  261. s.n[2] = uint32(c & uint32Mask)
  262. c = (c >> 32) + uint64(s.n[3]) + overflows64*uint64(orderComplementWordThree)
  263. s.n[3] = uint32(c & uint32Mask)
  264. c = (c >> 32) + uint64(s.n[4]) + overflows64 // * 1
  265. s.n[4] = uint32(c & uint32Mask)
  266. c = (c >> 32) + uint64(s.n[5]) // + overflows64 * 0
  267. s.n[5] = uint32(c & uint32Mask)
  268. c = (c >> 32) + uint64(s.n[6]) // + overflows64 * 0
  269. s.n[6] = uint32(c & uint32Mask)
  270. c = (c >> 32) + uint64(s.n[7]) // + overflows64 * 0
  271. s.n[7] = uint32(c & uint32Mask)
  272. }
  273. // SetBytes interprets the provided array as a 256-bit big-endian unsigned
  274. // integer, reduces it modulo the group order, sets the scalar to the result,
  275. // and returns either 1 if it was reduced (aka it overflowed) or 0 otherwise in
  276. // constant time.
  277. //
  278. // Note that a bool is not used here because it is not possible in Go to convert
  279. // from a bool to numeric value in constant time and many constant-time
  280. // operations require a numeric value.
  281. func (s *ModNScalar) SetBytes(b *[32]byte) uint32 {
  282. // Pack the 256 total bits across the 8 uint32 words. This could be done
  283. // with a for loop, but benchmarks show this unrolled version is about 2
  284. // times faster than the variant that uses a loop.
  285. s.n[0] = uint32(b[31]) | uint32(b[30])<<8 | uint32(b[29])<<16 | uint32(b[28])<<24
  286. s.n[1] = uint32(b[27]) | uint32(b[26])<<8 | uint32(b[25])<<16 | uint32(b[24])<<24
  287. s.n[2] = uint32(b[23]) | uint32(b[22])<<8 | uint32(b[21])<<16 | uint32(b[20])<<24
  288. s.n[3] = uint32(b[19]) | uint32(b[18])<<8 | uint32(b[17])<<16 | uint32(b[16])<<24
  289. s.n[4] = uint32(b[15]) | uint32(b[14])<<8 | uint32(b[13])<<16 | uint32(b[12])<<24
  290. s.n[5] = uint32(b[11]) | uint32(b[10])<<8 | uint32(b[9])<<16 | uint32(b[8])<<24
  291. s.n[6] = uint32(b[7]) | uint32(b[6])<<8 | uint32(b[5])<<16 | uint32(b[4])<<24
  292. s.n[7] = uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
  293. // The value might be >= N, so reduce it as required and return whether or
  294. // not it was reduced.
  295. needsReduce := s.overflows()
  296. s.reduce256(needsReduce)
  297. return needsReduce
  298. }
  299. // zeroArray32 zeroes the provided 32-byte buffer.
  300. func zeroArray32(b *[32]byte) {
  301. copy(b[:], zero32[:])
  302. }
  303. // SetByteSlice interprets the provided slice as a 256-bit big-endian unsigned
  304. // integer (meaning it is truncated to the first 32 bytes), reduces it modulo
  305. // the group order, sets the scalar to the result, and returns whether or not
  306. // the resulting truncated 256-bit integer overflowed in constant time.
  307. //
  308. // Note that since passing a slice with more than 32 bytes is truncated, it is
  309. // possible that the truncated value is less than the order of the curve and
  310. // hence it will not be reported as having overflowed in that case. It is up to
  311. // the caller to decide whether it needs to provide numbers of the appropriate
  312. // size or it is acceptable to use this function with the described truncation
  313. // and overflow behavior.
  314. func (s *ModNScalar) SetByteSlice(b []byte) bool {
  315. var b32 [32]byte
  316. b = b[:constantTimeMin(uint32(len(b)), 32)]
  317. copy(b32[:], b32[:32-len(b)])
  318. copy(b32[32-len(b):], b)
  319. result := s.SetBytes(&b32)
  320. zeroArray32(&b32)
  321. return result != 0
  322. }
  323. // PutBytesUnchecked unpacks the scalar to a 32-byte big-endian value directly
  324. // into the passed byte slice in constant time. The target slice must must have
  325. // at least 32 bytes available or it will panic.
  326. //
  327. // There is a similar function, PutBytes, which unpacks the scalar into a
  328. // 32-byte array directly. This version is provided since it can be useful to
  329. // write directly into part of a larger buffer without needing a separate
  330. // allocation.
  331. //
  332. // Preconditions:
  333. // - The target slice MUST have at least 32 bytes available
  334. func (s *ModNScalar) PutBytesUnchecked(b []byte) {
  335. // Unpack the 256 total bits from the 8 uint32 words. This could be done
  336. // with a for loop, but benchmarks show this unrolled version is about 2
  337. // times faster than the variant which uses a loop.
  338. b[31] = byte(s.n[0])
  339. b[30] = byte(s.n[0] >> 8)
  340. b[29] = byte(s.n[0] >> 16)
  341. b[28] = byte(s.n[0] >> 24)
  342. b[27] = byte(s.n[1])
  343. b[26] = byte(s.n[1] >> 8)
  344. b[25] = byte(s.n[1] >> 16)
  345. b[24] = byte(s.n[1] >> 24)
  346. b[23] = byte(s.n[2])
  347. b[22] = byte(s.n[2] >> 8)
  348. b[21] = byte(s.n[2] >> 16)
  349. b[20] = byte(s.n[2] >> 24)
  350. b[19] = byte(s.n[3])
  351. b[18] = byte(s.n[3] >> 8)
  352. b[17] = byte(s.n[3] >> 16)
  353. b[16] = byte(s.n[3] >> 24)
  354. b[15] = byte(s.n[4])
  355. b[14] = byte(s.n[4] >> 8)
  356. b[13] = byte(s.n[4] >> 16)
  357. b[12] = byte(s.n[4] >> 24)
  358. b[11] = byte(s.n[5])
  359. b[10] = byte(s.n[5] >> 8)
  360. b[9] = byte(s.n[5] >> 16)
  361. b[8] = byte(s.n[5] >> 24)
  362. b[7] = byte(s.n[6])
  363. b[6] = byte(s.n[6] >> 8)
  364. b[5] = byte(s.n[6] >> 16)
  365. b[4] = byte(s.n[6] >> 24)
  366. b[3] = byte(s.n[7])
  367. b[2] = byte(s.n[7] >> 8)
  368. b[1] = byte(s.n[7] >> 16)
  369. b[0] = byte(s.n[7] >> 24)
  370. }
  371. // PutBytes unpacks the scalar to a 32-byte big-endian value using the passed
  372. // byte array in constant time.
  373. //
  374. // There is a similar function, PutBytesUnchecked, which unpacks the scalar into
  375. // a slice that must have at least 32 bytes available. This version is provided
  376. // since it can be useful to write directly into an array that is type checked.
  377. //
  378. // Alternatively, there is also Bytes, which unpacks the scalar into a new array
  379. // and returns that which can sometimes be more ergonomic in applications that
  380. // aren't concerned about an additional copy.
  381. func (s *ModNScalar) PutBytes(b *[32]byte) {
  382. s.PutBytesUnchecked(b[:])
  383. }
  384. // Bytes unpacks the scalar to a 32-byte big-endian value in constant time.
  385. //
  386. // See PutBytes and PutBytesUnchecked for variants that allow an array or slice
  387. // to be passed which can be useful to cut down on the number of allocations
  388. // by allowing the caller to reuse a buffer or write directly into part of a
  389. // larger buffer.
  390. func (s *ModNScalar) Bytes() [32]byte {
  391. var b [32]byte
  392. s.PutBytesUnchecked(b[:])
  393. return b
  394. }
  395. // IsOdd returns whether or not the scalar is an odd number in constant time.
  396. func (s *ModNScalar) IsOdd() bool {
  397. // Only odd numbers have the bottom bit set.
  398. return s.n[0]&1 == 1
  399. }
  400. // Equals returns whether or not the two scalars are the same in constant time.
  401. func (s *ModNScalar) Equals(val *ModNScalar) bool {
  402. // Xor only sets bits when they are different, so the two scalars can only
  403. // be the same if no bits are set after xoring each word.
  404. bits := (s.n[0] ^ val.n[0]) | (s.n[1] ^ val.n[1]) | (s.n[2] ^ val.n[2]) |
  405. (s.n[3] ^ val.n[3]) | (s.n[4] ^ val.n[4]) | (s.n[5] ^ val.n[5]) |
  406. (s.n[6] ^ val.n[6]) | (s.n[7] ^ val.n[7])
  407. return bits == 0
  408. }
  409. // Add2 adds the passed two scalars together modulo the group order in constant
  410. // time and stores the result in s.
  411. //
  412. // The scalar is returned to support chaining. This enables syntax like:
  413. // s3.Add2(s, s2).AddInt(1) so that s3 = s + s2 + 1.
  414. func (s *ModNScalar) Add2(val1, val2 *ModNScalar) *ModNScalar {
  415. c := uint64(val1.n[0]) + uint64(val2.n[0])
  416. s.n[0] = uint32(c & uint32Mask)
  417. c = (c >> 32) + uint64(val1.n[1]) + uint64(val2.n[1])
  418. s.n[1] = uint32(c & uint32Mask)
  419. c = (c >> 32) + uint64(val1.n[2]) + uint64(val2.n[2])
  420. s.n[2] = uint32(c & uint32Mask)
  421. c = (c >> 32) + uint64(val1.n[3]) + uint64(val2.n[3])
  422. s.n[3] = uint32(c & uint32Mask)
  423. c = (c >> 32) + uint64(val1.n[4]) + uint64(val2.n[4])
  424. s.n[4] = uint32(c & uint32Mask)
  425. c = (c >> 32) + uint64(val1.n[5]) + uint64(val2.n[5])
  426. s.n[5] = uint32(c & uint32Mask)
  427. c = (c >> 32) + uint64(val1.n[6]) + uint64(val2.n[6])
  428. s.n[6] = uint32(c & uint32Mask)
  429. c = (c >> 32) + uint64(val1.n[7]) + uint64(val2.n[7])
  430. s.n[7] = uint32(c & uint32Mask)
  431. // The result is now 256 bits, but it might still be >= N, so use the
  432. // existing normal reduce method for 256-bit values.
  433. s.reduce256(uint32(c>>32) + s.overflows())
  434. return s
  435. }
  436. // Add adds the passed scalar to the existing one modulo the group order in
  437. // constant time and stores the result in s.
  438. //
  439. // The scalar is returned to support chaining. This enables syntax like:
  440. // s.Add(s2).AddInt(1) so that s = s + s2 + 1.
  441. func (s *ModNScalar) Add(val *ModNScalar) *ModNScalar {
  442. return s.Add2(s, val)
  443. }
  444. // accumulator96 provides a 96-bit accumulator for use in the intermediate
  445. // calculations requiring more than 64-bits.
  446. type accumulator96 struct {
  447. n [3]uint32
  448. }
  449. // Add adds the passed unsigned 64-bit value to the accumulator.
  450. func (a *accumulator96) Add(v uint64) {
  451. low := uint32(v & uint32Mask)
  452. hi := uint32(v >> 32)
  453. a.n[0] += low
  454. a.n[1] += constantTimeLess(a.n[0], low) // Carry if overflow in n[0].
  455. a.n[1] += hi
  456. a.n[2] += constantTimeLess(a.n[1], hi) // Carry if overflow in n[1].
  457. }
  458. // Rsh32 right shifts the accumulator by 32 bits.
  459. func (a *accumulator96) Rsh32() {
  460. a.n[0] = a.n[1]
  461. a.n[1] = a.n[2]
  462. a.n[2] = 0
  463. }
  464. // reduce385 reduces the 385-bit intermediate result in the passed terms modulo
  465. // the group order in constant time and stores the result in s.
  466. func (s *ModNScalar) reduce385(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12 uint64) {
  467. // At this point, the intermediate result in the passed terms has been
  468. // reduced to fit within 385 bits, so reduce it again using the same method
  469. // described in reduce512. As before, the intermediate result will end up
  470. // being reduced by another 127 bits to 258 bits, thus 9 32-bit terms are
  471. // needed for this iteration. The reduced terms are assigned back to t0
  472. // through t8.
  473. //
  474. // Note that several of the intermediate calculations require adding 64-bit
  475. // products together which would overflow a uint64, so a 96-bit accumulator
  476. // is used instead until the value is reduced enough to use native uint64s.
  477. // Terms for 2^(32*0).
  478. var acc accumulator96
  479. acc.n[0] = uint32(t0) // == acc.Add(t0) because acc is guaranteed to be 0.
  480. acc.Add(t8 * uint64(orderComplementWordZero))
  481. t0 = uint64(acc.n[0])
  482. acc.Rsh32()
  483. // Terms for 2^(32*1).
  484. acc.Add(t1)
  485. acc.Add(t8 * uint64(orderComplementWordOne))
  486. acc.Add(t9 * uint64(orderComplementWordZero))
  487. t1 = uint64(acc.n[0])
  488. acc.Rsh32()
  489. // Terms for 2^(32*2).
  490. acc.Add(t2)
  491. acc.Add(t8 * uint64(orderComplementWordTwo))
  492. acc.Add(t9 * uint64(orderComplementWordOne))
  493. acc.Add(t10 * uint64(orderComplementWordZero))
  494. t2 = uint64(acc.n[0])
  495. acc.Rsh32()
  496. // Terms for 2^(32*3).
  497. acc.Add(t3)
  498. acc.Add(t8 * uint64(orderComplementWordThree))
  499. acc.Add(t9 * uint64(orderComplementWordTwo))
  500. acc.Add(t10 * uint64(orderComplementWordOne))
  501. acc.Add(t11 * uint64(orderComplementWordZero))
  502. t3 = uint64(acc.n[0])
  503. acc.Rsh32()
  504. // Terms for 2^(32*4).
  505. acc.Add(t4)
  506. acc.Add(t8) // * uint64(orderComplementWordFour) // * 1
  507. acc.Add(t9 * uint64(orderComplementWordThree))
  508. acc.Add(t10 * uint64(orderComplementWordTwo))
  509. acc.Add(t11 * uint64(orderComplementWordOne))
  510. acc.Add(t12 * uint64(orderComplementWordZero))
  511. t4 = uint64(acc.n[0])
  512. acc.Rsh32()
  513. // Terms for 2^(32*5).
  514. acc.Add(t5)
  515. // acc.Add(t8 * uint64(orderComplementWordFive)) // 0
  516. acc.Add(t9) // * uint64(orderComplementWordFour) // * 1
  517. acc.Add(t10 * uint64(orderComplementWordThree))
  518. acc.Add(t11 * uint64(orderComplementWordTwo))
  519. acc.Add(t12 * uint64(orderComplementWordOne))
  520. t5 = uint64(acc.n[0])
  521. acc.Rsh32()
  522. // Terms for 2^(32*6).
  523. acc.Add(t6)
  524. // acc.Add(t8 * uint64(orderComplementWordSix)) // 0
  525. // acc.Add(t9 * uint64(orderComplementWordFive)) // 0
  526. acc.Add(t10) // * uint64(orderComplementWordFour) // * 1
  527. acc.Add(t11 * uint64(orderComplementWordThree))
  528. acc.Add(t12 * uint64(orderComplementWordTwo))
  529. t6 = uint64(acc.n[0])
  530. acc.Rsh32()
  531. // Terms for 2^(32*7).
  532. acc.Add(t7)
  533. // acc.Add(t8 * uint64(orderComplementWordSeven)) // 0
  534. // acc.Add(t9 * uint64(orderComplementWordSix)) // 0
  535. // acc.Add(t10 * uint64(orderComplementWordFive)) // 0
  536. acc.Add(t11) // * uint64(orderComplementWordFour) // * 1
  537. acc.Add(t12 * uint64(orderComplementWordThree))
  538. t7 = uint64(acc.n[0])
  539. acc.Rsh32()
  540. // Terms for 2^(32*8).
  541. // acc.Add(t9 * uint64(orderComplementWordSeven)) // 0
  542. // acc.Add(t10 * uint64(orderComplementWordSix)) // 0
  543. // acc.Add(t11 * uint64(orderComplementWordFive)) // 0
  544. acc.Add(t12) // * uint64(orderComplementWordFour) // * 1
  545. t8 = uint64(acc.n[0])
  546. // acc.Rsh32() // No need since not used after this. Guaranteed to be 0.
  547. // NOTE: All of the remaining multiplications for this iteration result in 0
  548. // as they all involve multiplying by combinations of the fifth, sixth, and
  549. // seventh words of the two's complement of N, which are 0, so skip them.
  550. // At this point, the result is reduced to fit within 258 bits, so reduce it
  551. // again using a slightly modified version of the same method. The maximum
  552. // value in t8 is 2 at this point and therefore multiplying it by each word
  553. // of the two's complement of N and adding it to a 32-bit term will result
  554. // in a maximum requirement of 33 bits, so it is safe to use native uint64s
  555. // here for the intermediate term carry propagation.
  556. //
  557. // Also, since the maximum value in t8 is 2, this ends up reducing by
  558. // another 2 bits to 256 bits.
  559. c := t0 + t8*uint64(orderComplementWordZero)
  560. s.n[0] = uint32(c & uint32Mask)
  561. c = (c >> 32) + t1 + t8*uint64(orderComplementWordOne)
  562. s.n[1] = uint32(c & uint32Mask)
  563. c = (c >> 32) + t2 + t8*uint64(orderComplementWordTwo)
  564. s.n[2] = uint32(c & uint32Mask)
  565. c = (c >> 32) + t3 + t8*uint64(orderComplementWordThree)
  566. s.n[3] = uint32(c & uint32Mask)
  567. c = (c >> 32) + t4 + t8 // * uint64(orderComplementWordFour) == * 1
  568. s.n[4] = uint32(c & uint32Mask)
  569. c = (c >> 32) + t5 // + t8*uint64(orderComplementWordFive) == 0
  570. s.n[5] = uint32(c & uint32Mask)
  571. c = (c >> 32) + t6 // + t8*uint64(orderComplementWordSix) == 0
  572. s.n[6] = uint32(c & uint32Mask)
  573. c = (c >> 32) + t7 // + t8*uint64(orderComplementWordSeven) == 0
  574. s.n[7] = uint32(c & uint32Mask)
  575. // The result is now 256 bits, but it might still be >= N, so use the
  576. // existing normal reduce method for 256-bit values.
  577. s.reduce256(uint32(c>>32) + s.overflows())
  578. }
  579. // reduce512 reduces the 512-bit intermediate result in the passed terms modulo
  580. // the group order down to 385 bits in constant time and stores the result in s.
  581. func (s *ModNScalar) reduce512(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15 uint64) {
  582. // At this point, the intermediate result in the passed terms is grouped
  583. // into the respective bases.
  584. //
  585. // Per [HAC] section 14.3.4: Reduction method of moduli of special form,
  586. // when the modulus is of the special form m = b^t - c, where log_2(c) < t,
  587. // highly efficient reduction can be achieved per the provided algorithm.
  588. //
  589. // The secp256k1 group order fits this criteria since it is:
  590. // 2^256 - 432420386565659656852420866394968145599
  591. //
  592. // Technically the max possible value here is (N-1)^2 since the two scalars
  593. // being multiplied are always mod N. Nevertheless, it is safer to consider
  594. // it to be (2^256-1)^2 = 2^512 - 2^256 + 1 since it is the product of two
  595. // 256-bit values.
  596. //
  597. // The algorithm is to reduce the result modulo the prime by subtracting
  598. // multiples of the group order N. However, in order simplify carry
  599. // propagation, this adds with the two's complement of N to achieve the same
  600. // result.
  601. //
  602. // Since the two's complement of N has 127 leading zero bits, this will end
  603. // up reducing the intermediate result from 512 bits to 385 bits, resulting
  604. // in 13 32-bit terms. The reduced terms are assigned back to t0 through
  605. // t12.
  606. //
  607. // Note that several of the intermediate calculations require adding 64-bit
  608. // products together which would overflow a uint64, so a 96-bit accumulator
  609. // is used instead.
  610. // Terms for 2^(32*0).
  611. var acc accumulator96
  612. acc.n[0] = uint32(t0) // == acc.Add(t0) because acc is guaranteed to be 0.
  613. acc.Add(t8 * uint64(orderComplementWordZero))
  614. t0 = uint64(acc.n[0])
  615. acc.Rsh32()
  616. // Terms for 2^(32*1).
  617. acc.Add(t1)
  618. acc.Add(t8 * uint64(orderComplementWordOne))
  619. acc.Add(t9 * uint64(orderComplementWordZero))
  620. t1 = uint64(acc.n[0])
  621. acc.Rsh32()
  622. // Terms for 2^(32*2).
  623. acc.Add(t2)
  624. acc.Add(t8 * uint64(orderComplementWordTwo))
  625. acc.Add(t9 * uint64(orderComplementWordOne))
  626. acc.Add(t10 * uint64(orderComplementWordZero))
  627. t2 = uint64(acc.n[0])
  628. acc.Rsh32()
  629. // Terms for 2^(32*3).
  630. acc.Add(t3)
  631. acc.Add(t8 * uint64(orderComplementWordThree))
  632. acc.Add(t9 * uint64(orderComplementWordTwo))
  633. acc.Add(t10 * uint64(orderComplementWordOne))
  634. acc.Add(t11 * uint64(orderComplementWordZero))
  635. t3 = uint64(acc.n[0])
  636. acc.Rsh32()
  637. // Terms for 2^(32*4).
  638. acc.Add(t4)
  639. acc.Add(t8) // * uint64(orderComplementWordFour) // * 1
  640. acc.Add(t9 * uint64(orderComplementWordThree))
  641. acc.Add(t10 * uint64(orderComplementWordTwo))
  642. acc.Add(t11 * uint64(orderComplementWordOne))
  643. acc.Add(t12 * uint64(orderComplementWordZero))
  644. t4 = uint64(acc.n[0])
  645. acc.Rsh32()
  646. // Terms for 2^(32*5).
  647. acc.Add(t5)
  648. // acc.Add(t8 * uint64(orderComplementWordFive)) // 0
  649. acc.Add(t9) // * uint64(orderComplementWordFour) // * 1
  650. acc.Add(t10 * uint64(orderComplementWordThree))
  651. acc.Add(t11 * uint64(orderComplementWordTwo))
  652. acc.Add(t12 * uint64(orderComplementWordOne))
  653. acc.Add(t13 * uint64(orderComplementWordZero))
  654. t5 = uint64(acc.n[0])
  655. acc.Rsh32()
  656. // Terms for 2^(32*6).
  657. acc.Add(t6)
  658. // acc.Add(t8 * uint64(orderComplementWordSix)) // 0
  659. // acc.Add(t9 * uint64(orderComplementWordFive)) // 0
  660. acc.Add(t10) // * uint64(orderComplementWordFour)) // * 1
  661. acc.Add(t11 * uint64(orderComplementWordThree))
  662. acc.Add(t12 * uint64(orderComplementWordTwo))
  663. acc.Add(t13 * uint64(orderComplementWordOne))
  664. acc.Add(t14 * uint64(orderComplementWordZero))
  665. t6 = uint64(acc.n[0])
  666. acc.Rsh32()
  667. // Terms for 2^(32*7).
  668. acc.Add(t7)
  669. // acc.Add(t8 * uint64(orderComplementWordSeven)) // 0
  670. // acc.Add(t9 * uint64(orderComplementWordSix)) // 0
  671. // acc.Add(t10 * uint64(orderComplementWordFive)) // 0
  672. acc.Add(t11) // * uint64(orderComplementWordFour) // * 1
  673. acc.Add(t12 * uint64(orderComplementWordThree))
  674. acc.Add(t13 * uint64(orderComplementWordTwo))
  675. acc.Add(t14 * uint64(orderComplementWordOne))
  676. acc.Add(t15 * uint64(orderComplementWordZero))
  677. t7 = uint64(acc.n[0])
  678. acc.Rsh32()
  679. // Terms for 2^(32*8).
  680. // acc.Add(t9 * uint64(orderComplementWordSeven)) // 0
  681. // acc.Add(t10 * uint64(orderComplementWordSix)) // 0
  682. // acc.Add(t11 * uint64(orderComplementWordFive)) // 0
  683. acc.Add(t12) // * uint64(orderComplementWordFour) // * 1
  684. acc.Add(t13 * uint64(orderComplementWordThree))
  685. acc.Add(t14 * uint64(orderComplementWordTwo))
  686. acc.Add(t15 * uint64(orderComplementWordOne))
  687. t8 = uint64(acc.n[0])
  688. acc.Rsh32()
  689. // Terms for 2^(32*9).
  690. // acc.Add(t10 * uint64(orderComplementWordSeven)) // 0
  691. // acc.Add(t11 * uint64(orderComplementWordSix)) // 0
  692. // acc.Add(t12 * uint64(orderComplementWordFive)) // 0
  693. acc.Add(t13) // * uint64(orderComplementWordFour) // * 1
  694. acc.Add(t14 * uint64(orderComplementWordThree))
  695. acc.Add(t15 * uint64(orderComplementWordTwo))
  696. t9 = uint64(acc.n[0])
  697. acc.Rsh32()
  698. // Terms for 2^(32*10).
  699. // acc.Add(t11 * uint64(orderComplementWordSeven)) // 0
  700. // acc.Add(t12 * uint64(orderComplementWordSix)) // 0
  701. // acc.Add(t13 * uint64(orderComplementWordFive)) // 0
  702. acc.Add(t14) // * uint64(orderComplementWordFour) // * 1
  703. acc.Add(t15 * uint64(orderComplementWordThree))
  704. t10 = uint64(acc.n[0])
  705. acc.Rsh32()
  706. // Terms for 2^(32*11).
  707. // acc.Add(t12 * uint64(orderComplementWordSeven)) // 0
  708. // acc.Add(t13 * uint64(orderComplementWordSix)) // 0
  709. // acc.Add(t14 * uint64(orderComplementWordFive)) // 0
  710. acc.Add(t15) // * uint64(orderComplementWordFour) // * 1
  711. t11 = uint64(acc.n[0])
  712. acc.Rsh32()
  713. // NOTE: All of the remaining multiplications for this iteration result in 0
  714. // as they all involve multiplying by combinations of the fifth, sixth, and
  715. // seventh words of the two's complement of N, which are 0, so skip them.
  716. // Terms for 2^(32*12).
  717. t12 = uint64(acc.n[0])
  718. // acc.Rsh32() // No need since not used after this. Guaranteed to be 0.
  719. // At this point, the result is reduced to fit within 385 bits, so reduce it
  720. // again using the same method accordingly.
  721. s.reduce385(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12)
  722. }
  723. // Mul2 multiplies the passed two scalars together modulo the group order in
  724. // constant time and stores the result in s.
  725. //
  726. // The scalar is returned to support chaining. This enables syntax like:
  727. // s3.Mul2(s, s2).AddInt(1) so that s3 = (s * s2) + 1.
  728. func (s *ModNScalar) Mul2(val, val2 *ModNScalar) *ModNScalar {
  729. // This could be done with for loops and an array to store the intermediate
  730. // terms, but this unrolled version is significantly faster.
  731. // The overall strategy employed here is:
  732. // 1) Calculate the 512-bit product of the two scalars using the standard
  733. // pencil-and-paper method.
  734. // 2) Reduce the result modulo the prime by effectively subtracting
  735. // multiples of the group order N (actually performed by adding multiples
  736. // of the two's complement of N to avoid implementing subtraction).
  737. // 3) Repeat step 2 noting that each iteration reduces the required number
  738. // of bits by 127 because the two's complement of N has 127 leading zero
  739. // bits.
  740. // 4) Once reduced to 256 bits, call the existing reduce method to perform
  741. // a final reduction as needed.
  742. //
  743. // Note that several of the intermediate calculations require adding 64-bit
  744. // products together which would overflow a uint64, so a 96-bit accumulator
  745. // is used instead.
  746. // Terms for 2^(32*0).
  747. var acc accumulator96
  748. acc.Add(uint64(val.n[0]) * uint64(val2.n[0]))
  749. t0 := uint64(acc.n[0])
  750. acc.Rsh32()
  751. // Terms for 2^(32*1).
  752. acc.Add(uint64(val.n[0]) * uint64(val2.n[1]))
  753. acc.Add(uint64(val.n[1]) * uint64(val2.n[0]))
  754. t1 := uint64(acc.n[0])
  755. acc.Rsh32()
  756. // Terms for 2^(32*2).
  757. acc.Add(uint64(val.n[0]) * uint64(val2.n[2]))
  758. acc.Add(uint64(val.n[1]) * uint64(val2.n[1]))
  759. acc.Add(uint64(val.n[2]) * uint64(val2.n[0]))
  760. t2 := uint64(acc.n[0])
  761. acc.Rsh32()
  762. // Terms for 2^(32*3).
  763. acc.Add(uint64(val.n[0]) * uint64(val2.n[3]))
  764. acc.Add(uint64(val.n[1]) * uint64(val2.n[2]))
  765. acc.Add(uint64(val.n[2]) * uint64(val2.n[1]))
  766. acc.Add(uint64(val.n[3]) * uint64(val2.n[0]))
  767. t3 := uint64(acc.n[0])
  768. acc.Rsh32()
  769. // Terms for 2^(32*4).
  770. acc.Add(uint64(val.n[0]) * uint64(val2.n[4]))
  771. acc.Add(uint64(val.n[1]) * uint64(val2.n[3]))
  772. acc.Add(uint64(val.n[2]) * uint64(val2.n[2]))
  773. acc.Add(uint64(val.n[3]) * uint64(val2.n[1]))
  774. acc.Add(uint64(val.n[4]) * uint64(val2.n[0]))
  775. t4 := uint64(acc.n[0])
  776. acc.Rsh32()
  777. // Terms for 2^(32*5).
  778. acc.Add(uint64(val.n[0]) * uint64(val2.n[5]))
  779. acc.Add(uint64(val.n[1]) * uint64(val2.n[4]))
  780. acc.Add(uint64(val.n[2]) * uint64(val2.n[3]))
  781. acc.Add(uint64(val.n[3]) * uint64(val2.n[2]))
  782. acc.Add(uint64(val.n[4]) * uint64(val2.n[1]))
  783. acc.Add(uint64(val.n[5]) * uint64(val2.n[0]))
  784. t5 := uint64(acc.n[0])
  785. acc.Rsh32()
  786. // Terms for 2^(32*6).
  787. acc.Add(uint64(val.n[0]) * uint64(val2.n[6]))
  788. acc.Add(uint64(val.n[1]) * uint64(val2.n[5]))
  789. acc.Add(uint64(val.n[2]) * uint64(val2.n[4]))
  790. acc.Add(uint64(val.n[3]) * uint64(val2.n[3]))
  791. acc.Add(uint64(val.n[4]) * uint64(val2.n[2]))
  792. acc.Add(uint64(val.n[5]) * uint64(val2.n[1]))
  793. acc.Add(uint64(val.n[6]) * uint64(val2.n[0]))
  794. t6 := uint64(acc.n[0])
  795. acc.Rsh32()
  796. // Terms for 2^(32*7).
  797. acc.Add(uint64(val.n[0]) * uint64(val2.n[7]))
  798. acc.Add(uint64(val.n[1]) * uint64(val2.n[6]))
  799. acc.Add(uint64(val.n[2]) * uint64(val2.n[5]))
  800. acc.Add(uint64(val.n[3]) * uint64(val2.n[4]))
  801. acc.Add(uint64(val.n[4]) * uint64(val2.n[3]))
  802. acc.Add(uint64(val.n[5]) * uint64(val2.n[2]))
  803. acc.Add(uint64(val.n[6]) * uint64(val2.n[1]))
  804. acc.Add(uint64(val.n[7]) * uint64(val2.n[0]))
  805. t7 := uint64(acc.n[0])
  806. acc.Rsh32()
  807. // Terms for 2^(32*8).
  808. acc.Add(uint64(val.n[1]) * uint64(val2.n[7]))
  809. acc.Add(uint64(val.n[2]) * uint64(val2.n[6]))
  810. acc.Add(uint64(val.n[3]) * uint64(val2.n[5]))
  811. acc.Add(uint64(val.n[4]) * uint64(val2.n[4]))
  812. acc.Add(uint64(val.n[5]) * uint64(val2.n[3]))
  813. acc.Add(uint64(val.n[6]) * uint64(val2.n[2]))
  814. acc.Add(uint64(val.n[7]) * uint64(val2.n[1]))
  815. t8 := uint64(acc.n[0])
  816. acc.Rsh32()
  817. // Terms for 2^(32*9).
  818. acc.Add(uint64(val.n[2]) * uint64(val2.n[7]))
  819. acc.Add(uint64(val.n[3]) * uint64(val2.n[6]))
  820. acc.Add(uint64(val.n[4]) * uint64(val2.n[5]))
  821. acc.Add(uint64(val.n[5]) * uint64(val2.n[4]))
  822. acc.Add(uint64(val.n[6]) * uint64(val2.n[3]))
  823. acc.Add(uint64(val.n[7]) * uint64(val2.n[2]))
  824. t9 := uint64(acc.n[0])
  825. acc.Rsh32()
  826. // Terms for 2^(32*10).
  827. acc.Add(uint64(val.n[3]) * uint64(val2.n[7]))
  828. acc.Add(uint64(val.n[4]) * uint64(val2.n[6]))
  829. acc.Add(uint64(val.n[5]) * uint64(val2.n[5]))
  830. acc.Add(uint64(val.n[6]) * uint64(val2.n[4]))
  831. acc.Add(uint64(val.n[7]) * uint64(val2.n[3]))
  832. t10 := uint64(acc.n[0])
  833. acc.Rsh32()
  834. // Terms for 2^(32*11).
  835. acc.Add(uint64(val.n[4]) * uint64(val2.n[7]))
  836. acc.Add(uint64(val.n[5]) * uint64(val2.n[6]))
  837. acc.Add(uint64(val.n[6]) * uint64(val2.n[5]))
  838. acc.Add(uint64(val.n[7]) * uint64(val2.n[4]))
  839. t11 := uint64(acc.n[0])
  840. acc.Rsh32()
  841. // Terms for 2^(32*12).
  842. acc.Add(uint64(val.n[5]) * uint64(val2.n[7]))
  843. acc.Add(uint64(val.n[6]) * uint64(val2.n[6]))
  844. acc.Add(uint64(val.n[7]) * uint64(val2.n[5]))
  845. t12 := uint64(acc.n[0])
  846. acc.Rsh32()
  847. // Terms for 2^(32*13).
  848. acc.Add(uint64(val.n[6]) * uint64(val2.n[7]))
  849. acc.Add(uint64(val.n[7]) * uint64(val2.n[6]))
  850. t13 := uint64(acc.n[0])
  851. acc.Rsh32()
  852. // Terms for 2^(32*14).
  853. acc.Add(uint64(val.n[7]) * uint64(val2.n[7]))
  854. t14 := uint64(acc.n[0])
  855. acc.Rsh32()
  856. // What's left is for 2^(32*15).
  857. t15 := uint64(acc.n[0])
  858. // acc.Rsh32() // No need since not used after this. Guaranteed to be 0.
  859. // At this point, all of the terms are grouped into their respective base
  860. // and occupy up to 512 bits. Reduce the result accordingly.
  861. s.reduce512(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14,
  862. t15)
  863. return s
  864. }
  865. // Mul multiplies the passed scalar with the existing one modulo the group order
  866. // in constant time and stores the result in s.
  867. //
  868. // The scalar is returned to support chaining. This enables syntax like:
  869. // s.Mul(s2).AddInt(1) so that s = (s * s2) + 1.
  870. func (s *ModNScalar) Mul(val *ModNScalar) *ModNScalar {
  871. return s.Mul2(s, val)
  872. }
  873. // SquareVal squares the passed scalar modulo the group order in constant time
  874. // and stores the result in s.
  875. //
  876. // The scalar is returned to support chaining. This enables syntax like:
  877. // s3.SquareVal(s).Mul(s) so that s3 = s^2 * s = s^3.
  878. func (s *ModNScalar) SquareVal(val *ModNScalar) *ModNScalar {
  879. // This could technically be optimized slightly to take advantage of the
  880. // fact that many of the intermediate calculations in squaring are just
  881. // doubling, however, benchmarking has shown that due to the need to use a
  882. // 96-bit accumulator, any savings are essentially offset by that and
  883. // consequently there is no real difference in performance over just
  884. // multiplying the value by itself to justify the extra code for now. This
  885. // can be revisited in the future if it becomes a bottleneck in practice.
  886. return s.Mul2(val, val)
  887. }
  888. // Square squares the scalar modulo the group order in constant time. The
  889. // existing scalar is modified.
  890. //
  891. // The scalar is returned to support chaining. This enables syntax like:
  892. // s.Square().Mul(s2) so that s = s^2 * s2.
  893. func (s *ModNScalar) Square() *ModNScalar {
  894. return s.SquareVal(s)
  895. }
  896. // NegateVal negates the passed scalar modulo the group order and stores the
  897. // result in s in constant time.
  898. //
  899. // The scalar is returned to support chaining. This enables syntax like:
  900. // s.NegateVal(s2).AddInt(1) so that s = -s2 + 1.
  901. func (s *ModNScalar) NegateVal(val *ModNScalar) *ModNScalar {
  902. // Since the scalar is already in the range 0 <= val < N, where N is the
  903. // group order, negation modulo the group order is just the group order
  904. // minus the value. This implies that the result will always be in the
  905. // desired range with the sole exception of 0 because N - 0 = N itself.
  906. //
  907. // Therefore, in order to avoid the need to reduce the result for every
  908. // other case in order to achieve constant time, this creates a mask that is
  909. // all 0s in the case of the scalar being negated is 0 and all 1s otherwise
  910. // and bitwise ands that mask with each word.
  911. //
  912. // Finally, to simplify the carry propagation, this adds the two's
  913. // complement of the scalar to N in order to achieve the same result.
  914. bits := val.n[0] | val.n[1] | val.n[2] | val.n[3] | val.n[4] | val.n[5] |
  915. val.n[6] | val.n[7]
  916. mask := uint64(uint32Mask * constantTimeNotEq(bits, 0))
  917. c := uint64(orderWordZero) + (uint64(^val.n[0]) + 1)
  918. s.n[0] = uint32(c & mask)
  919. c = (c >> 32) + uint64(orderWordOne) + uint64(^val.n[1])
  920. s.n[1] = uint32(c & mask)
  921. c = (c >> 32) + uint64(orderWordTwo) + uint64(^val.n[2])
  922. s.n[2] = uint32(c & mask)
  923. c = (c >> 32) + uint64(orderWordThree) + uint64(^val.n[3])
  924. s.n[3] = uint32(c & mask)
  925. c = (c >> 32) + uint64(orderWordFour) + uint64(^val.n[4])
  926. s.n[4] = uint32(c & mask)
  927. c = (c >> 32) + uint64(orderWordFive) + uint64(^val.n[5])
  928. s.n[5] = uint32(c & mask)
  929. c = (c >> 32) + uint64(orderWordSix) + uint64(^val.n[6])
  930. s.n[6] = uint32(c & mask)
  931. c = (c >> 32) + uint64(orderWordSeven) + uint64(^val.n[7])
  932. s.n[7] = uint32(c & mask)
  933. return s
  934. }
  935. // Negate negates the scalar modulo the group order in constant time. The
  936. // existing scalar is modified.
  937. //
  938. // The scalar is returned to support chaining. This enables syntax like:
  939. // s.Negate().AddInt(1) so that s = -s + 1.
  940. func (s *ModNScalar) Negate() *ModNScalar {
  941. return s.NegateVal(s)
  942. }
  943. // InverseValNonConst finds the modular multiplicative inverse of the passed
  944. // scalar and stores result in s in *non-constant* time.
  945. //
  946. // The scalar is returned to support chaining. This enables syntax like:
  947. // s3.InverseVal(s1).Mul(s2) so that s3 = s1^-1 * s2.
  948. func (s *ModNScalar) InverseValNonConst(val *ModNScalar) *ModNScalar {
  949. // This is making use of big integers for now. Ideally it will be replaced
  950. // with an implementation that does not depend on big integers.
  951. valBytes := val.Bytes()
  952. bigVal := new(big.Int).SetBytes(valBytes[:])
  953. bigVal.ModInverse(bigVal, curveParams.N)
  954. s.SetByteSlice(bigVal.Bytes())
  955. return s
  956. }
  957. // InverseNonConst finds the modular multiplicative inverse of the scalar in
  958. // *non-constant* time. The existing scalar is modified.
  959. //
  960. // The scalar is returned to support chaining. This enables syntax like:
  961. // s.Inverse().Mul(s2) so that s = s^-1 * s2.
  962. func (s *ModNScalar) InverseNonConst() *ModNScalar {
  963. return s.InverseValNonConst(s)
  964. }
  965. // IsOverHalfOrder returns whether or not the scalar exceeds the group order
  966. // divided by 2 in constant time.
  967. func (s *ModNScalar) IsOverHalfOrder() bool {
  968. // The intuition here is that the scalar is greater than half of the group
  969. // order if one of the higher individual words is greater than the
  970. // corresponding word of the half group order and all higher words in the
  971. // scalar are equal to their corresponding word of the half group order.
  972. //
  973. // Note that the words 4, 5, and 6 are all the max uint32 value, so there is
  974. // no need to test if those individual words of the scalar exceeds them,
  975. // hence, only equality is checked for them.
  976. result := constantTimeGreater(s.n[7], halfOrderWordSeven)
  977. highWordsEqual := constantTimeEq(s.n[7], halfOrderWordSeven)
  978. highWordsEqual &= constantTimeEq(s.n[6], halfOrderWordSix)
  979. highWordsEqual &= constantTimeEq(s.n[5], halfOrderWordFive)
  980. highWordsEqual &= constantTimeEq(s.n[4], halfOrderWordFour)
  981. result |= highWordsEqual & constantTimeGreater(s.n[3], halfOrderWordThree)
  982. highWordsEqual &= constantTimeEq(s.n[3], halfOrderWordThree)
  983. result |= highWordsEqual & constantTimeGreater(s.n[2], halfOrderWordTwo)
  984. highWordsEqual &= constantTimeEq(s.n[2], halfOrderWordTwo)
  985. result |= highWordsEqual & constantTimeGreater(s.n[1], halfOrderWordOne)
  986. highWordsEqual &= constantTimeEq(s.n[1], halfOrderWordOne)
  987. result |= highWordsEqual & constantTimeGreater(s.n[0], halfOrderWordZero)
  988. return result != 0
  989. }