mont25519_amd64.go 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build amd64,!gccgo,!appengine
  5. package curve25519
  6. // These functions are implemented in the .s files. The names of the functions
  7. // in the rest of the file are also taken from the SUPERCOP sources to help
  8. // people following along.
  9. //go:noescape
  10. func cswap(inout *[5]uint64, v uint64)
  11. //go:noescape
  12. func ladderstep(inout *[5][5]uint64)
  13. //go:noescape
  14. func freeze(inout *[5]uint64)
  15. //go:noescape
  16. func mul(dest, a, b *[5]uint64)
  17. //go:noescape
  18. func square(out, in *[5]uint64)
  19. // mladder uses a Montgomery ladder to calculate (xr/zr) *= s.
  20. func mladder(xr, zr *[5]uint64, s *[32]byte) {
  21. var work [5][5]uint64
  22. work[0] = *xr
  23. setint(&work[1], 1)
  24. setint(&work[2], 0)
  25. work[3] = *xr
  26. setint(&work[4], 1)
  27. j := uint(6)
  28. var prevbit byte
  29. for i := 31; i >= 0; i-- {
  30. for j < 8 {
  31. bit := ((*s)[i] >> j) & 1
  32. swap := bit ^ prevbit
  33. prevbit = bit
  34. cswap(&work[1], uint64(swap))
  35. ladderstep(&work)
  36. j--
  37. }
  38. j = 7
  39. }
  40. *xr = work[1]
  41. *zr = work[2]
  42. }
  43. func scalarMult(out, in, base *[32]byte) {
  44. var e [32]byte
  45. copy(e[:], (*in)[:])
  46. e[0] &= 248
  47. e[31] &= 127
  48. e[31] |= 64
  49. var t, z [5]uint64
  50. unpack(&t, base)
  51. mladder(&t, &z, &e)
  52. invert(&z, &z)
  53. mul(&t, &t, &z)
  54. pack(out, &t)
  55. }
  56. func setint(r *[5]uint64, v uint64) {
  57. r[0] = v
  58. r[1] = 0
  59. r[2] = 0
  60. r[3] = 0
  61. r[4] = 0
  62. }
  63. // unpack sets r = x where r consists of 5, 51-bit limbs in little-endian
  64. // order.
  65. func unpack(r *[5]uint64, x *[32]byte) {
  66. r[0] = uint64(x[0]) |
  67. uint64(x[1])<<8 |
  68. uint64(x[2])<<16 |
  69. uint64(x[3])<<24 |
  70. uint64(x[4])<<32 |
  71. uint64(x[5])<<40 |
  72. uint64(x[6]&7)<<48
  73. r[1] = uint64(x[6])>>3 |
  74. uint64(x[7])<<5 |
  75. uint64(x[8])<<13 |
  76. uint64(x[9])<<21 |
  77. uint64(x[10])<<29 |
  78. uint64(x[11])<<37 |
  79. uint64(x[12]&63)<<45
  80. r[2] = uint64(x[12])>>6 |
  81. uint64(x[13])<<2 |
  82. uint64(x[14])<<10 |
  83. uint64(x[15])<<18 |
  84. uint64(x[16])<<26 |
  85. uint64(x[17])<<34 |
  86. uint64(x[18])<<42 |
  87. uint64(x[19]&1)<<50
  88. r[3] = uint64(x[19])>>1 |
  89. uint64(x[20])<<7 |
  90. uint64(x[21])<<15 |
  91. uint64(x[22])<<23 |
  92. uint64(x[23])<<31 |
  93. uint64(x[24])<<39 |
  94. uint64(x[25]&15)<<47
  95. r[4] = uint64(x[25])>>4 |
  96. uint64(x[26])<<4 |
  97. uint64(x[27])<<12 |
  98. uint64(x[28])<<20 |
  99. uint64(x[29])<<28 |
  100. uint64(x[30])<<36 |
  101. uint64(x[31]&127)<<44
  102. }
  103. // pack sets out = x where out is the usual, little-endian form of the 5,
  104. // 51-bit limbs in x.
  105. func pack(out *[32]byte, x *[5]uint64) {
  106. t := *x
  107. freeze(&t)
  108. out[0] = byte(t[0])
  109. out[1] = byte(t[0] >> 8)
  110. out[2] = byte(t[0] >> 16)
  111. out[3] = byte(t[0] >> 24)
  112. out[4] = byte(t[0] >> 32)
  113. out[5] = byte(t[0] >> 40)
  114. out[6] = byte(t[0] >> 48)
  115. out[6] ^= byte(t[1]<<3) & 0xf8
  116. out[7] = byte(t[1] >> 5)
  117. out[8] = byte(t[1] >> 13)
  118. out[9] = byte(t[1] >> 21)
  119. out[10] = byte(t[1] >> 29)
  120. out[11] = byte(t[1] >> 37)
  121. out[12] = byte(t[1] >> 45)
  122. out[12] ^= byte(t[2]<<6) & 0xc0
  123. out[13] = byte(t[2] >> 2)
  124. out[14] = byte(t[2] >> 10)
  125. out[15] = byte(t[2] >> 18)
  126. out[16] = byte(t[2] >> 26)
  127. out[17] = byte(t[2] >> 34)
  128. out[18] = byte(t[2] >> 42)
  129. out[19] = byte(t[2] >> 50)
  130. out[19] ^= byte(t[3]<<1) & 0xfe
  131. out[20] = byte(t[3] >> 7)
  132. out[21] = byte(t[3] >> 15)
  133. out[22] = byte(t[3] >> 23)
  134. out[23] = byte(t[3] >> 31)
  135. out[24] = byte(t[3] >> 39)
  136. out[25] = byte(t[3] >> 47)
  137. out[25] ^= byte(t[4]<<4) & 0xf0
  138. out[26] = byte(t[4] >> 4)
  139. out[27] = byte(t[4] >> 12)
  140. out[28] = byte(t[4] >> 20)
  141. out[29] = byte(t[4] >> 28)
  142. out[30] = byte(t[4] >> 36)
  143. out[31] = byte(t[4] >> 44)
  144. }
  145. // invert calculates r = x^-1 mod p using Fermat's little theorem.
  146. func invert(r *[5]uint64, x *[5]uint64) {
  147. var z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t [5]uint64
  148. square(&z2, x) /* 2 */
  149. square(&t, &z2) /* 4 */
  150. square(&t, &t) /* 8 */
  151. mul(&z9, &t, x) /* 9 */
  152. mul(&z11, &z9, &z2) /* 11 */
  153. square(&t, &z11) /* 22 */
  154. mul(&z2_5_0, &t, &z9) /* 2^5 - 2^0 = 31 */
  155. square(&t, &z2_5_0) /* 2^6 - 2^1 */
  156. for i := 1; i < 5; i++ { /* 2^20 - 2^10 */
  157. square(&t, &t)
  158. }
  159. mul(&z2_10_0, &t, &z2_5_0) /* 2^10 - 2^0 */
  160. square(&t, &z2_10_0) /* 2^11 - 2^1 */
  161. for i := 1; i < 10; i++ { /* 2^20 - 2^10 */
  162. square(&t, &t)
  163. }
  164. mul(&z2_20_0, &t, &z2_10_0) /* 2^20 - 2^0 */
  165. square(&t, &z2_20_0) /* 2^21 - 2^1 */
  166. for i := 1; i < 20; i++ { /* 2^40 - 2^20 */
  167. square(&t, &t)
  168. }
  169. mul(&t, &t, &z2_20_0) /* 2^40 - 2^0 */
  170. square(&t, &t) /* 2^41 - 2^1 */
  171. for i := 1; i < 10; i++ { /* 2^50 - 2^10 */
  172. square(&t, &t)
  173. }
  174. mul(&z2_50_0, &t, &z2_10_0) /* 2^50 - 2^0 */
  175. square(&t, &z2_50_0) /* 2^51 - 2^1 */
  176. for i := 1; i < 50; i++ { /* 2^100 - 2^50 */
  177. square(&t, &t)
  178. }
  179. mul(&z2_100_0, &t, &z2_50_0) /* 2^100 - 2^0 */
  180. square(&t, &z2_100_0) /* 2^101 - 2^1 */
  181. for i := 1; i < 100; i++ { /* 2^200 - 2^100 */
  182. square(&t, &t)
  183. }
  184. mul(&t, &t, &z2_100_0) /* 2^200 - 2^0 */
  185. square(&t, &t) /* 2^201 - 2^1 */
  186. for i := 1; i < 50; i++ { /* 2^250 - 2^50 */
  187. square(&t, &t)
  188. }
  189. mul(&t, &t, &z2_50_0) /* 2^250 - 2^0 */
  190. square(&t, &t) /* 2^251 - 2^1 */
  191. square(&t, &t) /* 2^252 - 2^2 */
  192. square(&t, &t) /* 2^253 - 2^3 */
  193. square(&t, &t) /* 2^254 - 2^4 */
  194. square(&t, &t) /* 2^255 - 2^5 */
  195. mul(r, &t, &z11) /* 2^255 - 21 */
  196. }