curve25519.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842
  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // We have a implementation in amd64 assembly so this code is only run on
  5. // non-amd64 platforms. The amd64 assembly does not support gccgo.
  6. // +build !amd64 gccgo appengine
  7. package curve25519
  8. // This code is a port of the public domain, "ref10" implementation of
  9. // curve25519 from SUPERCOP 20130419 by D. J. Bernstein.
  10. // fieldElement represents an element of the field GF(2^255 - 19). An element
  11. // t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
  12. // t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on
  13. // context.
  14. type fieldElement [10]int32
  15. func feZero(fe *fieldElement) {
  16. for i := range fe {
  17. fe[i] = 0
  18. }
  19. }
  20. func feOne(fe *fieldElement) {
  21. feZero(fe)
  22. fe[0] = 1
  23. }
  24. func feAdd(dst, a, b *fieldElement) {
  25. for i := range dst {
  26. dst[i] = a[i] + b[i]
  27. }
  28. }
  29. func feSub(dst, a, b *fieldElement) {
  30. for i := range dst {
  31. dst[i] = a[i] - b[i]
  32. }
  33. }
  34. func feCopy(dst, src *fieldElement) {
  35. for i := range dst {
  36. dst[i] = src[i]
  37. }
  38. }
  39. // feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0.
  40. //
  41. // Preconditions: b in {0,1}.
  42. func feCSwap(f, g *fieldElement, b int32) {
  43. var x fieldElement
  44. b = -b
  45. for i := range x {
  46. x[i] = b & (f[i] ^ g[i])
  47. }
  48. for i := range f {
  49. f[i] ^= x[i]
  50. }
  51. for i := range g {
  52. g[i] ^= x[i]
  53. }
  54. }
  55. // load3 reads a 24-bit, little-endian value from in.
  56. func load3(in []byte) int64 {
  57. var r int64
  58. r = int64(in[0])
  59. r |= int64(in[1]) << 8
  60. r |= int64(in[2]) << 16
  61. return r
  62. }
  63. // load4 reads a 32-bit, little-endian value from in.
  64. func load4(in []byte) int64 {
  65. var r int64
  66. r = int64(in[0])
  67. r |= int64(in[1]) << 8
  68. r |= int64(in[2]) << 16
  69. r |= int64(in[3]) << 24
  70. return r
  71. }
  72. func feFromBytes(dst *fieldElement, src *[32]byte) {
  73. h0 := load4(src[:])
  74. h1 := load3(src[4:]) << 6
  75. h2 := load3(src[7:]) << 5
  76. h3 := load3(src[10:]) << 3
  77. h4 := load3(src[13:]) << 2
  78. h5 := load4(src[16:])
  79. h6 := load3(src[20:]) << 7
  80. h7 := load3(src[23:]) << 5
  81. h8 := load3(src[26:]) << 4
  82. h9 := load3(src[29:]) << 2
  83. var carry [10]int64
  84. carry[9] = (h9 + 1<<24) >> 25
  85. h0 += carry[9] * 19
  86. h9 -= carry[9] << 25
  87. carry[1] = (h1 + 1<<24) >> 25
  88. h2 += carry[1]
  89. h1 -= carry[1] << 25
  90. carry[3] = (h3 + 1<<24) >> 25
  91. h4 += carry[3]
  92. h3 -= carry[3] << 25
  93. carry[5] = (h5 + 1<<24) >> 25
  94. h6 += carry[5]
  95. h5 -= carry[5] << 25
  96. carry[7] = (h7 + 1<<24) >> 25
  97. h8 += carry[7]
  98. h7 -= carry[7] << 25
  99. carry[0] = (h0 + 1<<25) >> 26
  100. h1 += carry[0]
  101. h0 -= carry[0] << 26
  102. carry[2] = (h2 + 1<<25) >> 26
  103. h3 += carry[2]
  104. h2 -= carry[2] << 26
  105. carry[4] = (h4 + 1<<25) >> 26
  106. h5 += carry[4]
  107. h4 -= carry[4] << 26
  108. carry[6] = (h6 + 1<<25) >> 26
  109. h7 += carry[6]
  110. h6 -= carry[6] << 26
  111. carry[8] = (h8 + 1<<25) >> 26
  112. h9 += carry[8]
  113. h8 -= carry[8] << 26
  114. dst[0] = int32(h0)
  115. dst[1] = int32(h1)
  116. dst[2] = int32(h2)
  117. dst[3] = int32(h3)
  118. dst[4] = int32(h4)
  119. dst[5] = int32(h5)
  120. dst[6] = int32(h6)
  121. dst[7] = int32(h7)
  122. dst[8] = int32(h8)
  123. dst[9] = int32(h9)
  124. }
  125. // feToBytes marshals h to s.
  126. // Preconditions:
  127. // |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  128. //
  129. // Write p=2^255-19; q=floor(h/p).
  130. // Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
  131. //
  132. // Proof:
  133. // Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
  134. // Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
  135. //
  136. // Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
  137. // Then 0<y<1.
  138. //
  139. // Write r=h-pq.
  140. // Have 0<=r<=p-1=2^255-20.
  141. // Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
  142. //
  143. // Write x=r+19(2^-255)r+y.
  144. // Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
  145. //
  146. // Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
  147. // so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
  148. func feToBytes(s *[32]byte, h *fieldElement) {
  149. var carry [10]int32
  150. q := (19*h[9] + (1 << 24)) >> 25
  151. q = (h[0] + q) >> 26
  152. q = (h[1] + q) >> 25
  153. q = (h[2] + q) >> 26
  154. q = (h[3] + q) >> 25
  155. q = (h[4] + q) >> 26
  156. q = (h[5] + q) >> 25
  157. q = (h[6] + q) >> 26
  158. q = (h[7] + q) >> 25
  159. q = (h[8] + q) >> 26
  160. q = (h[9] + q) >> 25
  161. // Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
  162. h[0] += 19 * q
  163. // Goal: Output h-2^255 q, which is between 0 and 2^255-20.
  164. carry[0] = h[0] >> 26
  165. h[1] += carry[0]
  166. h[0] -= carry[0] << 26
  167. carry[1] = h[1] >> 25
  168. h[2] += carry[1]
  169. h[1] -= carry[1] << 25
  170. carry[2] = h[2] >> 26
  171. h[3] += carry[2]
  172. h[2] -= carry[2] << 26
  173. carry[3] = h[3] >> 25
  174. h[4] += carry[3]
  175. h[3] -= carry[3] << 25
  176. carry[4] = h[4] >> 26
  177. h[5] += carry[4]
  178. h[4] -= carry[4] << 26
  179. carry[5] = h[5] >> 25
  180. h[6] += carry[5]
  181. h[5] -= carry[5] << 25
  182. carry[6] = h[6] >> 26
  183. h[7] += carry[6]
  184. h[6] -= carry[6] << 26
  185. carry[7] = h[7] >> 25
  186. h[8] += carry[7]
  187. h[7] -= carry[7] << 25
  188. carry[8] = h[8] >> 26
  189. h[9] += carry[8]
  190. h[8] -= carry[8] << 26
  191. carry[9] = h[9] >> 25
  192. h[9] -= carry[9] << 25
  193. // h10 = carry9
  194. // Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
  195. // Have h[0]+...+2^230 h[9] between 0 and 2^255-1;
  196. // evidently 2^255 h10-2^255 q = 0.
  197. // Goal: Output h[0]+...+2^230 h[9].
  198. s[0] = byte(h[0] >> 0)
  199. s[1] = byte(h[0] >> 8)
  200. s[2] = byte(h[0] >> 16)
  201. s[3] = byte((h[0] >> 24) | (h[1] << 2))
  202. s[4] = byte(h[1] >> 6)
  203. s[5] = byte(h[1] >> 14)
  204. s[6] = byte((h[1] >> 22) | (h[2] << 3))
  205. s[7] = byte(h[2] >> 5)
  206. s[8] = byte(h[2] >> 13)
  207. s[9] = byte((h[2] >> 21) | (h[3] << 5))
  208. s[10] = byte(h[3] >> 3)
  209. s[11] = byte(h[3] >> 11)
  210. s[12] = byte((h[3] >> 19) | (h[4] << 6))
  211. s[13] = byte(h[4] >> 2)
  212. s[14] = byte(h[4] >> 10)
  213. s[15] = byte(h[4] >> 18)
  214. s[16] = byte(h[5] >> 0)
  215. s[17] = byte(h[5] >> 8)
  216. s[18] = byte(h[5] >> 16)
  217. s[19] = byte((h[5] >> 24) | (h[6] << 1))
  218. s[20] = byte(h[6] >> 7)
  219. s[21] = byte(h[6] >> 15)
  220. s[22] = byte((h[6] >> 23) | (h[7] << 3))
  221. s[23] = byte(h[7] >> 5)
  222. s[24] = byte(h[7] >> 13)
  223. s[25] = byte((h[7] >> 21) | (h[8] << 4))
  224. s[26] = byte(h[8] >> 4)
  225. s[27] = byte(h[8] >> 12)
  226. s[28] = byte((h[8] >> 20) | (h[9] << 6))
  227. s[29] = byte(h[9] >> 2)
  228. s[30] = byte(h[9] >> 10)
  229. s[31] = byte(h[9] >> 18)
  230. }
  231. // feMul calculates h = f * g
  232. // Can overlap h with f or g.
  233. //
  234. // Preconditions:
  235. // |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
  236. // |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
  237. //
  238. // Postconditions:
  239. // |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  240. //
  241. // Notes on implementation strategy:
  242. //
  243. // Using schoolbook multiplication.
  244. // Karatsuba would save a little in some cost models.
  245. //
  246. // Most multiplications by 2 and 19 are 32-bit precomputations;
  247. // cheaper than 64-bit postcomputations.
  248. //
  249. // There is one remaining multiplication by 19 in the carry chain;
  250. // one *19 precomputation can be merged into this,
  251. // but the resulting data flow is considerably less clean.
  252. //
  253. // There are 12 carries below.
  254. // 10 of them are 2-way parallelizable and vectorizable.
  255. // Can get away with 11 carries, but then data flow is much deeper.
  256. //
  257. // With tighter constraints on inputs can squeeze carries into int32.
  258. func feMul(h, f, g *fieldElement) {
  259. f0 := f[0]
  260. f1 := f[1]
  261. f2 := f[2]
  262. f3 := f[3]
  263. f4 := f[4]
  264. f5 := f[5]
  265. f6 := f[6]
  266. f7 := f[7]
  267. f8 := f[8]
  268. f9 := f[9]
  269. g0 := g[0]
  270. g1 := g[1]
  271. g2 := g[2]
  272. g3 := g[3]
  273. g4 := g[4]
  274. g5 := g[5]
  275. g6 := g[6]
  276. g7 := g[7]
  277. g8 := g[8]
  278. g9 := g[9]
  279. g1_19 := 19 * g1 // 1.4*2^29
  280. g2_19 := 19 * g2 // 1.4*2^30; still ok
  281. g3_19 := 19 * g3
  282. g4_19 := 19 * g4
  283. g5_19 := 19 * g5
  284. g6_19 := 19 * g6
  285. g7_19 := 19 * g7
  286. g8_19 := 19 * g8
  287. g9_19 := 19 * g9
  288. f1_2 := 2 * f1
  289. f3_2 := 2 * f3
  290. f5_2 := 2 * f5
  291. f7_2 := 2 * f7
  292. f9_2 := 2 * f9
  293. f0g0 := int64(f0) * int64(g0)
  294. f0g1 := int64(f0) * int64(g1)
  295. f0g2 := int64(f0) * int64(g2)
  296. f0g3 := int64(f0) * int64(g3)
  297. f0g4 := int64(f0) * int64(g4)
  298. f0g5 := int64(f0) * int64(g5)
  299. f0g6 := int64(f0) * int64(g6)
  300. f0g7 := int64(f0) * int64(g7)
  301. f0g8 := int64(f0) * int64(g8)
  302. f0g9 := int64(f0) * int64(g9)
  303. f1g0 := int64(f1) * int64(g0)
  304. f1g1_2 := int64(f1_2) * int64(g1)
  305. f1g2 := int64(f1) * int64(g2)
  306. f1g3_2 := int64(f1_2) * int64(g3)
  307. f1g4 := int64(f1) * int64(g4)
  308. f1g5_2 := int64(f1_2) * int64(g5)
  309. f1g6 := int64(f1) * int64(g6)
  310. f1g7_2 := int64(f1_2) * int64(g7)
  311. f1g8 := int64(f1) * int64(g8)
  312. f1g9_38 := int64(f1_2) * int64(g9_19)
  313. f2g0 := int64(f2) * int64(g0)
  314. f2g1 := int64(f2) * int64(g1)
  315. f2g2 := int64(f2) * int64(g2)
  316. f2g3 := int64(f2) * int64(g3)
  317. f2g4 := int64(f2) * int64(g4)
  318. f2g5 := int64(f2) * int64(g5)
  319. f2g6 := int64(f2) * int64(g6)
  320. f2g7 := int64(f2) * int64(g7)
  321. f2g8_19 := int64(f2) * int64(g8_19)
  322. f2g9_19 := int64(f2) * int64(g9_19)
  323. f3g0 := int64(f3) * int64(g0)
  324. f3g1_2 := int64(f3_2) * int64(g1)
  325. f3g2 := int64(f3) * int64(g2)
  326. f3g3_2 := int64(f3_2) * int64(g3)
  327. f3g4 := int64(f3) * int64(g4)
  328. f3g5_2 := int64(f3_2) * int64(g5)
  329. f3g6 := int64(f3) * int64(g6)
  330. f3g7_38 := int64(f3_2) * int64(g7_19)
  331. f3g8_19 := int64(f3) * int64(g8_19)
  332. f3g9_38 := int64(f3_2) * int64(g9_19)
  333. f4g0 := int64(f4) * int64(g0)
  334. f4g1 := int64(f4) * int64(g1)
  335. f4g2 := int64(f4) * int64(g2)
  336. f4g3 := int64(f4) * int64(g3)
  337. f4g4 := int64(f4) * int64(g4)
  338. f4g5 := int64(f4) * int64(g5)
  339. f4g6_19 := int64(f4) * int64(g6_19)
  340. f4g7_19 := int64(f4) * int64(g7_19)
  341. f4g8_19 := int64(f4) * int64(g8_19)
  342. f4g9_19 := int64(f4) * int64(g9_19)
  343. f5g0 := int64(f5) * int64(g0)
  344. f5g1_2 := int64(f5_2) * int64(g1)
  345. f5g2 := int64(f5) * int64(g2)
  346. f5g3_2 := int64(f5_2) * int64(g3)
  347. f5g4 := int64(f5) * int64(g4)
  348. f5g5_38 := int64(f5_2) * int64(g5_19)
  349. f5g6_19 := int64(f5) * int64(g6_19)
  350. f5g7_38 := int64(f5_2) * int64(g7_19)
  351. f5g8_19 := int64(f5) * int64(g8_19)
  352. f5g9_38 := int64(f5_2) * int64(g9_19)
  353. f6g0 := int64(f6) * int64(g0)
  354. f6g1 := int64(f6) * int64(g1)
  355. f6g2 := int64(f6) * int64(g2)
  356. f6g3 := int64(f6) * int64(g3)
  357. f6g4_19 := int64(f6) * int64(g4_19)
  358. f6g5_19 := int64(f6) * int64(g5_19)
  359. f6g6_19 := int64(f6) * int64(g6_19)
  360. f6g7_19 := int64(f6) * int64(g7_19)
  361. f6g8_19 := int64(f6) * int64(g8_19)
  362. f6g9_19 := int64(f6) * int64(g9_19)
  363. f7g0 := int64(f7) * int64(g0)
  364. f7g1_2 := int64(f7_2) * int64(g1)
  365. f7g2 := int64(f7) * int64(g2)
  366. f7g3_38 := int64(f7_2) * int64(g3_19)
  367. f7g4_19 := int64(f7) * int64(g4_19)
  368. f7g5_38 := int64(f7_2) * int64(g5_19)
  369. f7g6_19 := int64(f7) * int64(g6_19)
  370. f7g7_38 := int64(f7_2) * int64(g7_19)
  371. f7g8_19 := int64(f7) * int64(g8_19)
  372. f7g9_38 := int64(f7_2) * int64(g9_19)
  373. f8g0 := int64(f8) * int64(g0)
  374. f8g1 := int64(f8) * int64(g1)
  375. f8g2_19 := int64(f8) * int64(g2_19)
  376. f8g3_19 := int64(f8) * int64(g3_19)
  377. f8g4_19 := int64(f8) * int64(g4_19)
  378. f8g5_19 := int64(f8) * int64(g5_19)
  379. f8g6_19 := int64(f8) * int64(g6_19)
  380. f8g7_19 := int64(f8) * int64(g7_19)
  381. f8g8_19 := int64(f8) * int64(g8_19)
  382. f8g9_19 := int64(f8) * int64(g9_19)
  383. f9g0 := int64(f9) * int64(g0)
  384. f9g1_38 := int64(f9_2) * int64(g1_19)
  385. f9g2_19 := int64(f9) * int64(g2_19)
  386. f9g3_38 := int64(f9_2) * int64(g3_19)
  387. f9g4_19 := int64(f9) * int64(g4_19)
  388. f9g5_38 := int64(f9_2) * int64(g5_19)
  389. f9g6_19 := int64(f9) * int64(g6_19)
  390. f9g7_38 := int64(f9_2) * int64(g7_19)
  391. f9g8_19 := int64(f9) * int64(g8_19)
  392. f9g9_38 := int64(f9_2) * int64(g9_19)
  393. h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38
  394. h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19
  395. h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38
  396. h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19
  397. h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38
  398. h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19
  399. h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38
  400. h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19
  401. h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38
  402. h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0
  403. var carry [10]int64
  404. // |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
  405. // i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
  406. // |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
  407. // i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
  408. carry[0] = (h0 + (1 << 25)) >> 26
  409. h1 += carry[0]
  410. h0 -= carry[0] << 26
  411. carry[4] = (h4 + (1 << 25)) >> 26
  412. h5 += carry[4]
  413. h4 -= carry[4] << 26
  414. // |h0| <= 2^25
  415. // |h4| <= 2^25
  416. // |h1| <= 1.51*2^58
  417. // |h5| <= 1.51*2^58
  418. carry[1] = (h1 + (1 << 24)) >> 25
  419. h2 += carry[1]
  420. h1 -= carry[1] << 25
  421. carry[5] = (h5 + (1 << 24)) >> 25
  422. h6 += carry[5]
  423. h5 -= carry[5] << 25
  424. // |h1| <= 2^24; from now on fits into int32
  425. // |h5| <= 2^24; from now on fits into int32
  426. // |h2| <= 1.21*2^59
  427. // |h6| <= 1.21*2^59
  428. carry[2] = (h2 + (1 << 25)) >> 26
  429. h3 += carry[2]
  430. h2 -= carry[2] << 26
  431. carry[6] = (h6 + (1 << 25)) >> 26
  432. h7 += carry[6]
  433. h6 -= carry[6] << 26
  434. // |h2| <= 2^25; from now on fits into int32 unchanged
  435. // |h6| <= 2^25; from now on fits into int32 unchanged
  436. // |h3| <= 1.51*2^58
  437. // |h7| <= 1.51*2^58
  438. carry[3] = (h3 + (1 << 24)) >> 25
  439. h4 += carry[3]
  440. h3 -= carry[3] << 25
  441. carry[7] = (h7 + (1 << 24)) >> 25
  442. h8 += carry[7]
  443. h7 -= carry[7] << 25
  444. // |h3| <= 2^24; from now on fits into int32 unchanged
  445. // |h7| <= 2^24; from now on fits into int32 unchanged
  446. // |h4| <= 1.52*2^33
  447. // |h8| <= 1.52*2^33
  448. carry[4] = (h4 + (1 << 25)) >> 26
  449. h5 += carry[4]
  450. h4 -= carry[4] << 26
  451. carry[8] = (h8 + (1 << 25)) >> 26
  452. h9 += carry[8]
  453. h8 -= carry[8] << 26
  454. // |h4| <= 2^25; from now on fits into int32 unchanged
  455. // |h8| <= 2^25; from now on fits into int32 unchanged
  456. // |h5| <= 1.01*2^24
  457. // |h9| <= 1.51*2^58
  458. carry[9] = (h9 + (1 << 24)) >> 25
  459. h0 += carry[9] * 19
  460. h9 -= carry[9] << 25
  461. // |h9| <= 2^24; from now on fits into int32 unchanged
  462. // |h0| <= 1.8*2^37
  463. carry[0] = (h0 + (1 << 25)) >> 26
  464. h1 += carry[0]
  465. h0 -= carry[0] << 26
  466. // |h0| <= 2^25; from now on fits into int32 unchanged
  467. // |h1| <= 1.01*2^24
  468. h[0] = int32(h0)
  469. h[1] = int32(h1)
  470. h[2] = int32(h2)
  471. h[3] = int32(h3)
  472. h[4] = int32(h4)
  473. h[5] = int32(h5)
  474. h[6] = int32(h6)
  475. h[7] = int32(h7)
  476. h[8] = int32(h8)
  477. h[9] = int32(h9)
  478. }
  479. // feSquare calculates h = f*f. Can overlap h with f.
  480. //
  481. // Preconditions:
  482. // |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
  483. //
  484. // Postconditions:
  485. // |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  486. func feSquare(h, f *fieldElement) {
  487. f0 := f[0]
  488. f1 := f[1]
  489. f2 := f[2]
  490. f3 := f[3]
  491. f4 := f[4]
  492. f5 := f[5]
  493. f6 := f[6]
  494. f7 := f[7]
  495. f8 := f[8]
  496. f9 := f[9]
  497. f0_2 := 2 * f0
  498. f1_2 := 2 * f1
  499. f2_2 := 2 * f2
  500. f3_2 := 2 * f3
  501. f4_2 := 2 * f4
  502. f5_2 := 2 * f5
  503. f6_2 := 2 * f6
  504. f7_2 := 2 * f7
  505. f5_38 := 38 * f5 // 1.31*2^30
  506. f6_19 := 19 * f6 // 1.31*2^30
  507. f7_38 := 38 * f7 // 1.31*2^30
  508. f8_19 := 19 * f8 // 1.31*2^30
  509. f9_38 := 38 * f9 // 1.31*2^30
  510. f0f0 := int64(f0) * int64(f0)
  511. f0f1_2 := int64(f0_2) * int64(f1)
  512. f0f2_2 := int64(f0_2) * int64(f2)
  513. f0f3_2 := int64(f0_2) * int64(f3)
  514. f0f4_2 := int64(f0_2) * int64(f4)
  515. f0f5_2 := int64(f0_2) * int64(f5)
  516. f0f6_2 := int64(f0_2) * int64(f6)
  517. f0f7_2 := int64(f0_2) * int64(f7)
  518. f0f8_2 := int64(f0_2) * int64(f8)
  519. f0f9_2 := int64(f0_2) * int64(f9)
  520. f1f1_2 := int64(f1_2) * int64(f1)
  521. f1f2_2 := int64(f1_2) * int64(f2)
  522. f1f3_4 := int64(f1_2) * int64(f3_2)
  523. f1f4_2 := int64(f1_2) * int64(f4)
  524. f1f5_4 := int64(f1_2) * int64(f5_2)
  525. f1f6_2 := int64(f1_2) * int64(f6)
  526. f1f7_4 := int64(f1_2) * int64(f7_2)
  527. f1f8_2 := int64(f1_2) * int64(f8)
  528. f1f9_76 := int64(f1_2) * int64(f9_38)
  529. f2f2 := int64(f2) * int64(f2)
  530. f2f3_2 := int64(f2_2) * int64(f3)
  531. f2f4_2 := int64(f2_2) * int64(f4)
  532. f2f5_2 := int64(f2_2) * int64(f5)
  533. f2f6_2 := int64(f2_2) * int64(f6)
  534. f2f7_2 := int64(f2_2) * int64(f7)
  535. f2f8_38 := int64(f2_2) * int64(f8_19)
  536. f2f9_38 := int64(f2) * int64(f9_38)
  537. f3f3_2 := int64(f3_2) * int64(f3)
  538. f3f4_2 := int64(f3_2) * int64(f4)
  539. f3f5_4 := int64(f3_2) * int64(f5_2)
  540. f3f6_2 := int64(f3_2) * int64(f6)
  541. f3f7_76 := int64(f3_2) * int64(f7_38)
  542. f3f8_38 := int64(f3_2) * int64(f8_19)
  543. f3f9_76 := int64(f3_2) * int64(f9_38)
  544. f4f4 := int64(f4) * int64(f4)
  545. f4f5_2 := int64(f4_2) * int64(f5)
  546. f4f6_38 := int64(f4_2) * int64(f6_19)
  547. f4f7_38 := int64(f4) * int64(f7_38)
  548. f4f8_38 := int64(f4_2) * int64(f8_19)
  549. f4f9_38 := int64(f4) * int64(f9_38)
  550. f5f5_38 := int64(f5) * int64(f5_38)
  551. f5f6_38 := int64(f5_2) * int64(f6_19)
  552. f5f7_76 := int64(f5_2) * int64(f7_38)
  553. f5f8_38 := int64(f5_2) * int64(f8_19)
  554. f5f9_76 := int64(f5_2) * int64(f9_38)
  555. f6f6_19 := int64(f6) * int64(f6_19)
  556. f6f7_38 := int64(f6) * int64(f7_38)
  557. f6f8_38 := int64(f6_2) * int64(f8_19)
  558. f6f9_38 := int64(f6) * int64(f9_38)
  559. f7f7_38 := int64(f7) * int64(f7_38)
  560. f7f8_38 := int64(f7_2) * int64(f8_19)
  561. f7f9_76 := int64(f7_2) * int64(f9_38)
  562. f8f8_19 := int64(f8) * int64(f8_19)
  563. f8f9_38 := int64(f8) * int64(f9_38)
  564. f9f9_38 := int64(f9) * int64(f9_38)
  565. h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38
  566. h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38
  567. h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19
  568. h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38
  569. h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38
  570. h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38
  571. h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19
  572. h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38
  573. h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38
  574. h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2
  575. var carry [10]int64
  576. carry[0] = (h0 + (1 << 25)) >> 26
  577. h1 += carry[0]
  578. h0 -= carry[0] << 26
  579. carry[4] = (h4 + (1 << 25)) >> 26
  580. h5 += carry[4]
  581. h4 -= carry[4] << 26
  582. carry[1] = (h1 + (1 << 24)) >> 25
  583. h2 += carry[1]
  584. h1 -= carry[1] << 25
  585. carry[5] = (h5 + (1 << 24)) >> 25
  586. h6 += carry[5]
  587. h5 -= carry[5] << 25
  588. carry[2] = (h2 + (1 << 25)) >> 26
  589. h3 += carry[2]
  590. h2 -= carry[2] << 26
  591. carry[6] = (h6 + (1 << 25)) >> 26
  592. h7 += carry[6]
  593. h6 -= carry[6] << 26
  594. carry[3] = (h3 + (1 << 24)) >> 25
  595. h4 += carry[3]
  596. h3 -= carry[3] << 25
  597. carry[7] = (h7 + (1 << 24)) >> 25
  598. h8 += carry[7]
  599. h7 -= carry[7] << 25
  600. carry[4] = (h4 + (1 << 25)) >> 26
  601. h5 += carry[4]
  602. h4 -= carry[4] << 26
  603. carry[8] = (h8 + (1 << 25)) >> 26
  604. h9 += carry[8]
  605. h8 -= carry[8] << 26
  606. carry[9] = (h9 + (1 << 24)) >> 25
  607. h0 += carry[9] * 19
  608. h9 -= carry[9] << 25
  609. carry[0] = (h0 + (1 << 25)) >> 26
  610. h1 += carry[0]
  611. h0 -= carry[0] << 26
  612. h[0] = int32(h0)
  613. h[1] = int32(h1)
  614. h[2] = int32(h2)
  615. h[3] = int32(h3)
  616. h[4] = int32(h4)
  617. h[5] = int32(h5)
  618. h[6] = int32(h6)
  619. h[7] = int32(h7)
  620. h[8] = int32(h8)
  621. h[9] = int32(h9)
  622. }
  623. // feMul121666 calculates h = f * 121666. Can overlap h with f.
  624. //
  625. // Preconditions:
  626. // |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
  627. //
  628. // Postconditions:
  629. // |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  630. func feMul121666(h, f *fieldElement) {
  631. h0 := int64(f[0]) * 121666
  632. h1 := int64(f[1]) * 121666
  633. h2 := int64(f[2]) * 121666
  634. h3 := int64(f[3]) * 121666
  635. h4 := int64(f[4]) * 121666
  636. h5 := int64(f[5]) * 121666
  637. h6 := int64(f[6]) * 121666
  638. h7 := int64(f[7]) * 121666
  639. h8 := int64(f[8]) * 121666
  640. h9 := int64(f[9]) * 121666
  641. var carry [10]int64
  642. carry[9] = (h9 + (1 << 24)) >> 25
  643. h0 += carry[9] * 19
  644. h9 -= carry[9] << 25
  645. carry[1] = (h1 + (1 << 24)) >> 25
  646. h2 += carry[1]
  647. h1 -= carry[1] << 25
  648. carry[3] = (h3 + (1 << 24)) >> 25
  649. h4 += carry[3]
  650. h3 -= carry[3] << 25
  651. carry[5] = (h5 + (1 << 24)) >> 25
  652. h6 += carry[5]
  653. h5 -= carry[5] << 25
  654. carry[7] = (h7 + (1 << 24)) >> 25
  655. h8 += carry[7]
  656. h7 -= carry[7] << 25
  657. carry[0] = (h0 + (1 << 25)) >> 26
  658. h1 += carry[0]
  659. h0 -= carry[0] << 26
  660. carry[2] = (h2 + (1 << 25)) >> 26
  661. h3 += carry[2]
  662. h2 -= carry[2] << 26
  663. carry[4] = (h4 + (1 << 25)) >> 26
  664. h5 += carry[4]
  665. h4 -= carry[4] << 26
  666. carry[6] = (h6 + (1 << 25)) >> 26
  667. h7 += carry[6]
  668. h6 -= carry[6] << 26
  669. carry[8] = (h8 + (1 << 25)) >> 26
  670. h9 += carry[8]
  671. h8 -= carry[8] << 26
  672. h[0] = int32(h0)
  673. h[1] = int32(h1)
  674. h[2] = int32(h2)
  675. h[3] = int32(h3)
  676. h[4] = int32(h4)
  677. h[5] = int32(h5)
  678. h[6] = int32(h6)
  679. h[7] = int32(h7)
  680. h[8] = int32(h8)
  681. h[9] = int32(h9)
  682. }
  683. // feInvert sets out = z^-1.
  684. func feInvert(out, z *fieldElement) {
  685. var t0, t1, t2, t3 fieldElement
  686. var i int
  687. feSquare(&t0, z)
  688. for i = 1; i < 1; i++ {
  689. feSquare(&t0, &t0)
  690. }
  691. feSquare(&t1, &t0)
  692. for i = 1; i < 2; i++ {
  693. feSquare(&t1, &t1)
  694. }
  695. feMul(&t1, z, &t1)
  696. feMul(&t0, &t0, &t1)
  697. feSquare(&t2, &t0)
  698. for i = 1; i < 1; i++ {
  699. feSquare(&t2, &t2)
  700. }
  701. feMul(&t1, &t1, &t2)
  702. feSquare(&t2, &t1)
  703. for i = 1; i < 5; i++ {
  704. feSquare(&t2, &t2)
  705. }
  706. feMul(&t1, &t2, &t1)
  707. feSquare(&t2, &t1)
  708. for i = 1; i < 10; i++ {
  709. feSquare(&t2, &t2)
  710. }
  711. feMul(&t2, &t2, &t1)
  712. feSquare(&t3, &t2)
  713. for i = 1; i < 20; i++ {
  714. feSquare(&t3, &t3)
  715. }
  716. feMul(&t2, &t3, &t2)
  717. feSquare(&t2, &t2)
  718. for i = 1; i < 10; i++ {
  719. feSquare(&t2, &t2)
  720. }
  721. feMul(&t1, &t2, &t1)
  722. feSquare(&t2, &t1)
  723. for i = 1; i < 50; i++ {
  724. feSquare(&t2, &t2)
  725. }
  726. feMul(&t2, &t2, &t1)
  727. feSquare(&t3, &t2)
  728. for i = 1; i < 100; i++ {
  729. feSquare(&t3, &t3)
  730. }
  731. feMul(&t2, &t3, &t2)
  732. feSquare(&t2, &t2)
  733. for i = 1; i < 50; i++ {
  734. feSquare(&t2, &t2)
  735. }
  736. feMul(&t1, &t2, &t1)
  737. feSquare(&t1, &t1)
  738. for i = 1; i < 5; i++ {
  739. feSquare(&t1, &t1)
  740. }
  741. feMul(out, &t1, &t0)
  742. }
  743. func scalarMult(out, in, base *[32]byte) {
  744. var e [32]byte
  745. copy(e[:], in[:])
  746. e[0] &= 248
  747. e[31] &= 127
  748. e[31] |= 64
  749. var x1, x2, z2, x3, z3, tmp0, tmp1 fieldElement
  750. feFromBytes(&x1, base)
  751. feOne(&x2)
  752. feCopy(&x3, &x1)
  753. feOne(&z3)
  754. swap := int32(0)
  755. for pos := 254; pos >= 0; pos-- {
  756. b := e[pos/8] >> uint(pos&7)
  757. b &= 1
  758. swap ^= int32(b)
  759. feCSwap(&x2, &x3, swap)
  760. feCSwap(&z2, &z3, swap)
  761. swap = int32(b)
  762. feSub(&tmp0, &x3, &z3)
  763. feSub(&tmp1, &x2, &z2)
  764. feAdd(&x2, &x2, &z2)
  765. feAdd(&z2, &x3, &z3)
  766. feMul(&z3, &tmp0, &x2)
  767. feMul(&z2, &z2, &tmp1)
  768. feSquare(&tmp0, &tmp1)
  769. feSquare(&tmp1, &x2)
  770. feAdd(&x3, &z3, &z2)
  771. feSub(&z2, &z3, &z2)
  772. feMul(&x2, &tmp1, &tmp0)
  773. feSub(&tmp1, &tmp1, &tmp0)
  774. feSquare(&z2, &z2)
  775. feMul121666(&z3, &tmp1)
  776. feSquare(&x3, &x3)
  777. feAdd(&tmp0, &tmp0, &z3)
  778. feMul(&z3, &x1, &z2)
  779. feMul(&z2, &tmp1, &tmp0)
  780. }
  781. feCSwap(&x2, &x3, swap)
  782. feCSwap(&z2, &z3, swap)
  783. feInvert(&z2, &z2)
  784. feMul(&x2, &x2, &z2)
  785. feToBytes(out, &x2)
  786. }