x25519.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668
  1. // Source: https://github.com/jedisct1/libsodium/tree/1.0.18
  2. // Source: https://github.com/openssl/openssl/blob/OpenSSL_1_1_1g/crypto/ec/asm/x25519-x86_64.pl
  3. #include "x25519.h"
  4. #include <string.h> // memcpy
  5. typedef unsigned __int128 uint128_t;
  6. base_powers g_base_powers;
  7. /*
  8. h = f + g
  9. Can overlap h with f or g.
  10. */
  11. static inline void
  12. fe25519_add(fe25519 h, const fe25519 f, const fe25519 g)
  13. {
  14. uint64_t h0 = f[0] + g[0];
  15. uint64_t h1 = f[1] + g[1];
  16. uint64_t h2 = f[2] + g[2];
  17. uint64_t h3 = f[3] + g[3];
  18. uint64_t h4 = f[4] + g[4];
  19. h[0] = h0;
  20. h[1] = h1;
  21. h[2] = h2;
  22. h[3] = h3;
  23. h[4] = h4;
  24. }
  25. /*
  26. h = f - g
  27. */
  28. static void
  29. fe25519_sub(fe25519 h, const fe25519 f, const fe25519 g)
  30. {
  31. const uint64_t mask = 0x7ffffffffffffULL;
  32. uint64_t h0, h1, h2, h3, h4;
  33. h0 = g[0];
  34. h1 = g[1];
  35. h2 = g[2];
  36. h3 = g[3];
  37. h4 = g[4];
  38. h1 += h0 >> 51;
  39. h0 &= mask;
  40. h2 += h1 >> 51;
  41. h1 &= mask;
  42. h3 += h2 >> 51;
  43. h2 &= mask;
  44. h4 += h3 >> 51;
  45. h3 &= mask;
  46. h0 += 19ULL * (h4 >> 51);
  47. h4 &= mask;
  48. h0 = (f[0] + 0xfffffffffffdaULL) - h0;
  49. h1 = (f[1] + 0xffffffffffffeULL) - h1;
  50. h2 = (f[2] + 0xffffffffffffeULL) - h2;
  51. h3 = (f[3] + 0xffffffffffffeULL) - h3;
  52. h4 = (f[4] + 0xffffffffffffeULL) - h4;
  53. h[0] = h0;
  54. h[1] = h1;
  55. h[2] = h2;
  56. h[3] = h3;
  57. h[4] = h4;
  58. }
  59. #define REDUCE_ASM \
  60. "movq $0x7ffffffffffff,%%rbp \n" \
  61. \
  62. "movq %%r10,%%rdx \n" \
  63. "shrq $51,%%r10 \n" \
  64. "shlq $13,%%r11 \n" \
  65. "andq %%rbp,%%rdx \n" /* %%rdx = g2 = h2 & mask */ \
  66. "orq %%r10,%%r11 \n" /* h2>>51 */ \
  67. "addq %%r11,%%r12 \n" \
  68. "adcq $0,%%r13 \n" /* h3 += h2>>51 */ \
  69. \
  70. "movq %%rbx,%%rax \n" \
  71. "shrq $51,%%rbx \n" \
  72. "shlq $13,%[y] \n" \
  73. "andq %%rbp,%%rax \n" /* %%rax = g0 = h0 & mask */ \
  74. "orq %%rbx,%[y] \n" /* h0>>51 */ \
  75. "addq %[y],%%r8 \n" /* h1 += h0>>51 */ \
  76. "adcq $0,%%r9 \n" \
  77. \
  78. "movq %%r12,%%rbx \n" \
  79. "shrq $51,%%r12 \n" \
  80. "shlq $13,%%r13 \n" \
  81. "andq %%rbp,%%rbx \n" /* %%rbx = g3 = h3 & mask */ \
  82. "orq %%r12,%%r13 \n" /* h3>>51 */ \
  83. "addq %%r13,%%r14 \n" /* h4 += h3>>51 */ \
  84. "adcq $0,%%r15 \n" \
  85. \
  86. "movq %%r8,%[y] \n" \
  87. "shrq $51,%%r8 \n" \
  88. "shlq $13,%%r9 \n" \
  89. "andq %%rbp,%[y] \n" /* %[y] = g1 = h1 & mask */ \
  90. "orq %%r8,%%r9 \n" \
  91. "addq %%r9,%%rdx \n" /* g2 += h1>>51 */ \
  92. \
  93. "movq %%r14,%%r10 \n" \
  94. "shrq $51,%%r14 \n" \
  95. "shlq $13,%%r15 \n" \
  96. "andq %%rbp,%%r10 \n" /* %%r10 = g4 = h0 & mask */ \
  97. "orq %%r14,%%r15 \n" /* h0>>51 */ \
  98. \
  99. "leaq (%%r15,%%r15,8),%%r14 \n" \
  100. "leaq (%%r15,%%r14,2),%%r15 \n" \
  101. "addq %%r15,%%rax \n" /* g0 += (h0>>51)*19 */ \
  102. \
  103. "movq %%rdx,%%r8 \n" \
  104. "andq %%rbp,%%rdx \n" /* g2 &= mask */ \
  105. "shrq $51,%%r8 \n" \
  106. "addq %%r8,%%rbx \n" /* g3 += g2>>51 */ \
  107. \
  108. "movq %%rax,%%r9 \n" \
  109. "andq %%rbp,%%rax \n" /* g0 &= mask */ \
  110. "shrq $51,%%r9 \n" \
  111. "addq %%r9,%[y] \n" /* g1 += g0>>51 */ \
  112. \
  113. "movq %%rax,8*0(%[x]) \n" /* save the result */ \
  114. "movq %[y],8*1(%[x]) \n" \
  115. "movq %%rdx,8*2(%[x]) \n" \
  116. "movq %%rbx,8*3(%[x]) \n" \
  117. "movq %%r10,8*4(%[x]) \n"
  118. /*
  119. h = f * g
  120. Can overlap h with f or g.
  121. */
  122. static void
  123. fe25519_mul(fe25519 h, const fe25519 f, const fe25519 g)
  124. {
  125. uint64_t g0, g1, g2, ht, yt;
  126. __asm__ (
  127. "movq 8*0(%[f]),%%rax \n" // f[0]
  128. "movq 8*0(%[y]),%%r11 \n" // load g[0-4]
  129. "movq 8*1(%[y]),%%r12 \n"
  130. "movq 8*2(%[y]),%%r13 \n"
  131. "movq 8*3(%[y]),%%rbp \n"
  132. "movq 8*4(%[y]),%%r14 \n"
  133. "movq %[x],%[ht] \n" // offload 1st argument
  134. "movq %%rax,%[x] \n"
  135. "mulq %%r11 \n" // f[0]*g[0]
  136. "movq %%r11,%[g0] \n" // offload g[0]
  137. "movq %%rax,%%rbx \n" // %%rbx:%[y] = h0
  138. "movq %[x],%%rax \n"
  139. "movq %[y],%[yt] \n"
  140. "movq %%rdx,%[y] \n"
  141. "mulq %%r12 \n" // f[0]*g[1]
  142. "movq %%r12,%[g1] \n" // offload g[1]
  143. "movq %%rax,%%r8 \n" // %%r8:%%r9 = h1
  144. "movq %[x],%%rax \n"
  145. "leaq (%%r14,%%r14,8),%%r15 \n"
  146. "movq %%rdx,%%r9 \n"
  147. "mulq %%r13 \n" // f[0]*g[2]
  148. "movq %%r13,%[g2] \n" // offload g[2]
  149. "movq %%rax,%%r10 \n" // %%r10:%%r11 = h2
  150. "movq %[x],%%rax \n"
  151. "leaq (%%r14,%%r15,2),%[x] \n" // g[4]*19
  152. "movq %%rdx,%%r11 \n"
  153. "mulq %%rbp \n" // f[0]*g[3]
  154. "movq %%rax,%%r12 \n" // %%r12:%%r13 = h3
  155. "movq 8*0(%[f]),%%rax \n" // f[0]
  156. "movq %%rdx,%%r13 \n"
  157. "mulq %%r14 \n" // f[0]*g[4]
  158. "movq %%rax,%%r14 \n" // %%r14:%%r15 = h4
  159. "movq 8*1(%[f]),%%rax \n" // f[1]
  160. "movq %%rdx,%%r15 \n"
  161. "mulq %[x] \n" // f[1]*g[4]*19
  162. "addq %%rax,%%rbx \n"
  163. "movq 8*2(%[f]),%%rax \n" // f[2]
  164. "adcq %%rdx,%[y] \n"
  165. "mulq %[x] \n" // f[2]*g[4]*19
  166. "addq %%rax,%%r8 \n"
  167. "movq 8*3(%[f]),%%rax \n" // f[3]
  168. "adcq %%rdx,%%r9 \n"
  169. "mulq %[x] \n" // f[3]*g[4]*19
  170. "addq %%rax,%%r10 \n"
  171. "movq 8*4(%[f]),%%rax \n" // f[4]
  172. "adcq %%rdx,%%r11 \n"
  173. "mulq %[x] \n" // f[4]*g[4]*19
  174. "imulq $19,%%rbp,%[x] \n" // g[3]*19
  175. "addq %%rax,%%r12 \n"
  176. "movq 8*1(%[f]),%%rax \n" // f[1]
  177. "adcq %%rdx,%%r13 \n"
  178. "mulq %%rbp \n" // f[1]*g[3]
  179. "movq %[g2],%%rbp \n" // g[2]
  180. "addq %%rax,%%r14 \n"
  181. "movq 8*2(%[f]),%%rax \n" // f[2]
  182. "adcq %%rdx,%%r15 \n"
  183. "mulq %[x] \n" // f[2]*g[3]*19
  184. "addq %%rax,%%rbx \n"
  185. "movq 8*3(%[f]),%%rax \n" // f[3]
  186. "adcq %%rdx,%[y] \n"
  187. "mulq %[x] \n" // f[3]*g[3]*19
  188. "addq %%rax,%%r8 \n"
  189. "movq 8*4(%[f]),%%rax \n" // f[4]
  190. "adcq %%rdx,%%r9 \n"
  191. "mulq %[x] \n" // f[4]*g[3]*19
  192. "imulq $19,%%rbp,%[x] \n" // g[2]*19
  193. "addq %%rax,%%r10 \n"
  194. "movq 8*1(%[f]),%%rax \n" // f[1]
  195. "adcq %%rdx,%%r11 \n"
  196. "mulq %%rbp \n" // f[1]*g[2]
  197. "addq %%rax,%%r12 \n"
  198. "movq 8*2(%[f]),%%rax \n" // f[2]
  199. "adcq %%rdx,%%r13 \n"
  200. "mulq %%rbp \n" // f[2]*g[2]
  201. "movq %[g1],%%rbp \n" // g[1]
  202. "addq %%rax,%%r14 \n"
  203. "movq 8*3(%[f]),%%rax \n" // f[3]
  204. "adcq %%rdx,%%r15 \n"
  205. "mulq %[x] \n" // f[3]*g[2]*19
  206. "addq %%rax,%%rbx \n"
  207. "movq 8*4(%[f]),%%rax \n" // f[3]
  208. "adcq %%rdx,%[y] \n"
  209. "mulq %[x] \n" // f[4]*g[2]*19
  210. "addq %%rax,%%r8 \n"
  211. "movq 8*1(%[f]),%%rax \n" // f[1]
  212. "adcq %%rdx,%%r9 \n"
  213. "mulq %%rbp \n" // f[1]*g[1]
  214. "imulq $19,%%rbp,%[x] \n"
  215. "addq %%rax,%%r10 \n"
  216. "movq 8*2(%[f]),%%rax \n" // f[2]
  217. "adcq %%rdx,%%r11 \n"
  218. "mulq %%rbp \n" // f[2]*g[1]
  219. "addq %%rax,%%r12 \n"
  220. "movq 8*3(%[f]),%%rax \n" // f[3]
  221. "adcq %%rdx,%%r13 \n"
  222. "mulq %%rbp \n" // f[3]*g[1]
  223. "movq %[g0],%%rbp \n" // g[0]
  224. "addq %%rax,%%r14 \n"
  225. "movq 8*4(%[f]),%%rax \n" // f[4]
  226. "adcq %%rdx,%%r15 \n"
  227. "mulq %[x] \n" // f[4]*g[1]*19
  228. "addq %%rax,%%rbx \n"
  229. "movq 8*1(%[f]),%%rax \n" // f[1]
  230. "adcq %%rdx,%[y] \n"
  231. "mulq %%rbp \n" // f[1]*g[0]
  232. "addq %%rax,%%r8 \n"
  233. "movq 8*2(%[f]),%%rax \n" // f[2]
  234. "adcq %%rdx,%%r9 \n"
  235. "mulq %%rbp \n" // f[2]*g[0]
  236. "addq %%rax,%%r10 \n"
  237. "movq 8*3(%[f]),%%rax \n" // f[3]
  238. "adcq %%rdx,%%r11 \n"
  239. "mulq %%rbp \n" // f[3]*g[0]
  240. "addq %%rax,%%r12 \n"
  241. "movq 8*4(%[f]),%%rax \n" // f[4]
  242. "adcq %%rdx,%%r13 \n"
  243. "mulq %%rbp \n" // f[4]*g[0]
  244. "addq %%rax,%%r14 \n"
  245. "adcq %%rdx,%%r15 \n"
  246. "movq %[ht],%[x] \n" // restore 1st argument
  247. REDUCE_ASM
  248. "movq %[yt],%[y] \n"
  249. : [g0]"+m"(g0), [g1]"+m"(g1), [g2]"+m"(g2), [ht]"+m"(ht), [yt]"+m"(yt)
  250. : [x]"r"(h), [f] "r"(f), [y] "r"(g)
  251. : "%rax", "%rbx", "%rdx", "%rbp",
  252. "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory"
  253. );
  254. }
  255. /*
  256. h = f * f
  257. Can overlap h with f.
  258. */
  259. static void fe25519_sq(fe25519 h, const fe25519 f)
  260. {
  261. uint64_t g2, ht, yt;
  262. __asm__ (
  263. "movq %[y], %%rsi \n"
  264. "movq 8*0(%%rsi),%%rax \n" // g[0]
  265. "movq 8*2(%%rsi),%%r15 \n" // g[2]
  266. "movq 8*4(%%rsi),%%rbp \n" // g[4]
  267. "movq %[x],%[ht] \n" // offload 1st argument
  268. "leaq (%%rax,%%rax),%%r14 \n"
  269. "mulq %%rax \n" // g[0]*g[0]
  270. "movq %%rax,%%rbx \n"
  271. "movq 8*1(%%rsi),%%rax \n" // g[1]
  272. "movq %[y],%[yt] \n"
  273. "movq %%rdx,%[y] \n"
  274. "mulq %%r14 \n" // 2*g[0]*g[1]
  275. "movq %%rax,%%r8 \n"
  276. "movq %%r15,%%rax \n"
  277. "movq %%r15,%[g2] \n" // offload g[2]
  278. "movq %%rdx,%%r9 \n"
  279. "mulq %%r14 \n" // 2*g[0]*g[2]
  280. "movq %%rax,%%r10 \n"
  281. "movq 8*3(%%rsi),%%rax \n"
  282. "movq %%rdx,%%r11 \n"
  283. "imulq $19,%%rbp,%[x] \n" // g[4]*19
  284. "mulq %%r14 \n" // 2*g[0]*g[3]
  285. "movq %%rax,%%r12 \n"
  286. "movq %%rbp,%%rax \n"
  287. "movq %%rdx,%%r13 \n"
  288. "mulq %%r14 \n" // 2*g[0]*g[4]
  289. "movq %%rax,%%r14 \n"
  290. "movq %%rbp,%%rax \n"
  291. "movq %%rdx,%%r15 \n"
  292. "mulq %[x] \n" // g[4]*g[4]*19
  293. "addq %%rax,%%r12 \n"
  294. "movq 8*1(%%rsi),%%rax \n" // g[1]
  295. "adcq %%rdx,%%r13 \n"
  296. "movq 8*3(%%rsi),%%rsi \n" // g[3]
  297. "leaq (%%rax,%%rax),%%rbp \n"
  298. "mulq %%rax \n" // g[1]*g[1]
  299. "addq %%rax,%%r10 \n"
  300. "movq %[g2],%%rax \n" // g[2]
  301. "adcq %%rdx,%%r11 \n"
  302. "mulq %%rbp \n" // 2*g[1]*g[2]
  303. "addq %%rax,%%r12 \n"
  304. "movq %%rbp,%%rax \n"
  305. "adcq %%rdx,%%r13 \n"
  306. "mulq %%rsi \n" // 2*g[1]*g[3]
  307. "addq %%rax,%%r14 \n"
  308. "movq %%rbp,%%rax \n"
  309. "adcq %%rdx,%%r15 \n"
  310. "imulq $19,%%rsi,%%rbp \n" // g[3]*19
  311. "mulq %[x] \n" // 2*g[1]*g[4]*19
  312. "addq %%rax,%%rbx \n"
  313. "leaq (%%rsi,%%rsi),%%rax \n"
  314. "adcq %%rdx,%[y] \n"
  315. "mulq %[x] \n" // 2*g[3]*g[4]*19
  316. "addq %%rax,%%r10 \n"
  317. "movq %%rsi,%%rax \n"
  318. "adcq %%rdx,%%r11 \n"
  319. "mulq %%rbp \n" // g[3]*g[3]*19
  320. "addq %%rax,%%r8 \n"
  321. "movq %[g2],%%rax \n" // g[2]
  322. "adcq %%rdx,%%r9 \n"
  323. "leaq (%%rax,%%rax),%%rsi \n"
  324. "mulq %%rax \n" // g[2]*g[2]
  325. "addq %%rax,%%r14 \n"
  326. "movq %%rbp,%%rax \n"
  327. "adcq %%rdx,%%r15 \n"
  328. "mulq %%rsi \n" // 2*g[2]*g[3]*19
  329. "addq %%rax,%%rbx \n"
  330. "movq %%rsi,%%rax \n"
  331. "adcq %%rdx,%[y] \n"
  332. "mulq %[x] \n" // 2*g[2]*g[4]*19
  333. "addq %%rax,%%r8 \n"
  334. "adcq %%rdx,%%r9 \n"
  335. "movq %[ht],%[x] \n" // restore 1st argument
  336. REDUCE_ASM
  337. "movq %[yt],%[y] \n"
  338. : [g2] "+m"(g2), [ht]"+m"(ht), [yt]"+m"(yt)
  339. : [x]"r"(h), [y] "r"(f)
  340. : "%rax", "%rbx", "%rdx", "%rsi", "%rbp",
  341. "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory"
  342. );
  343. }
  344. static void
  345. fe25519_invert(fe25519 out, const fe25519 z)
  346. {
  347. fe25519 t0;
  348. fe25519 t1;
  349. fe25519 t2;
  350. fe25519 t3;
  351. int i;
  352. fe25519_sq(t0, z);
  353. fe25519_sq(t1, t0);
  354. fe25519_sq(t1, t1);
  355. fe25519_mul(t1, z, t1);
  356. fe25519_mul(t0, t0, t1);
  357. fe25519_sq(t2, t0);
  358. fe25519_mul(t1, t1, t2);
  359. fe25519_sq(t2, t1);
  360. for (i = 1; i < 5; ++i) {
  361. fe25519_sq(t2, t2);
  362. }
  363. fe25519_mul(t1, t2, t1);
  364. fe25519_sq(t2, t1);
  365. for (i = 1; i < 10; ++i) {
  366. fe25519_sq(t2, t2);
  367. }
  368. fe25519_mul(t2, t2, t1);
  369. fe25519_sq(t3, t2);
  370. for (i = 1; i < 20; ++i) {
  371. fe25519_sq(t3, t3);
  372. }
  373. fe25519_mul(t2, t3, t2);
  374. for (i = 1; i < 11; ++i) {
  375. fe25519_sq(t2, t2);
  376. }
  377. fe25519_mul(t1, t2, t1);
  378. fe25519_sq(t2, t1);
  379. for (i = 1; i < 50; ++i) {
  380. fe25519_sq(t2, t2);
  381. }
  382. fe25519_mul(t2, t2, t1);
  383. fe25519_sq(t3, t2);
  384. for (i = 1; i < 100; ++i) {
  385. fe25519_sq(t3, t3);
  386. }
  387. fe25519_mul(t2, t3, t2);
  388. for (i = 1; i < 51; ++i) {
  389. fe25519_sq(t2, t2);
  390. }
  391. fe25519_mul(t1, t2, t1);
  392. for (i = 1; i < 6; ++i) {
  393. fe25519_sq(t1, t1);
  394. }
  395. fe25519_mul(out, t1, t0);
  396. }
  397. static void
  398. fe25519_reduce(fe25519 h, const fe25519 f)
  399. {
  400. const uint64_t mask = 0x7ffffffffffffULL;
  401. uint128_t t[5];
  402. t[0] = f[0];
  403. t[1] = f[1];
  404. t[2] = f[2];
  405. t[3] = f[3];
  406. t[4] = f[4];
  407. t[1] += t[0] >> 51;
  408. t[0] &= mask;
  409. t[2] += t[1] >> 51;
  410. t[1] &= mask;
  411. t[3] += t[2] >> 51;
  412. t[2] &= mask;
  413. t[4] += t[3] >> 51;
  414. t[3] &= mask;
  415. t[0] += 19 * (t[4] >> 51);
  416. t[4] &= mask;
  417. t[1] += t[0] >> 51;
  418. t[0] &= mask;
  419. t[2] += t[1] >> 51;
  420. t[1] &= mask;
  421. t[3] += t[2] >> 51;
  422. t[2] &= mask;
  423. t[4] += t[3] >> 51;
  424. t[3] &= mask;
  425. t[0] += 19 * (t[4] >> 51);
  426. t[4] &= mask;
  427. /* now t is between 0 and 2^255-1, properly carried. */
  428. /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
  429. t[0] += 19ULL;
  430. t[1] += t[0] >> 51;
  431. t[0] &= mask;
  432. t[2] += t[1] >> 51;
  433. t[1] &= mask;
  434. t[3] += t[2] >> 51;
  435. t[2] &= mask;
  436. t[4] += t[3] >> 51;
  437. t[3] &= mask;
  438. t[0] += 19ULL * (t[4] >> 51);
  439. t[4] &= mask;
  440. /* now between 19 and 2^255-1 in both cases, and offset by 19. */
  441. t[0] += 0x8000000000000 - 19ULL;
  442. t[1] += 0x8000000000000 - 1ULL;
  443. t[2] += 0x8000000000000 - 1ULL;
  444. t[3] += 0x8000000000000 - 1ULL;
  445. t[4] += 0x8000000000000 - 1ULL;
  446. /* now between 2^255 and 2^256-20, and offset by 2^255. */
  447. t[1] += t[0] >> 51;
  448. t[0] &= mask;
  449. t[2] += t[1] >> 51;
  450. t[1] &= mask;
  451. t[3] += t[2] >> 51;
  452. t[2] &= mask;
  453. t[4] += t[3] >> 51;
  454. t[3] &= mask;
  455. t[4] &= mask;
  456. h[0] = t[0];
  457. h[1] = t[1];
  458. h[2] = t[2];
  459. h[3] = t[3];
  460. h[4] = t[4];
  461. }
  462. static void
  463. fe25519_tobytes(unsigned char *s, const fe25519 h)
  464. {
  465. fe25519 t;
  466. uint64_t t0, t1, t2, t3;
  467. fe25519_reduce(t, h);
  468. t0 = t[0] | (t[1] << 51);
  469. t1 = (t[1] >> 13) | (t[2] << 38);
  470. t2 = (t[2] >> 26) | (t[3] << 25);
  471. t3 = (t[3] >> 39) | (t[4] << 12);
  472. memcpy(s + 0, &t0, sizeof t0);
  473. memcpy(s + 8, &t1, sizeof t1);
  474. memcpy(s + 16, &t2, sizeof t2);
  475. memcpy(s + 24, &t3, sizeof t3);
  476. }
  477. static void ge25519_add(ge25519* r, const ge25519* p, const ge25519* q)
  478. {
  479. static const fe25519 d2 = {
  480. 0x69b9426b2f159, 0x35050762add7a,
  481. 0x3cf44c0038052, 0x6738cc7407977, 0x2406d9dc56dff };
  482. fe25519 a, b, c, d, t, e, f, g, h;
  483. fe25519_sub(a, p->Y, p->X);
  484. fe25519_sub(t, q->Y, q->X);
  485. fe25519_mul(a, a, t);
  486. fe25519_add(b, p->X, p->Y);
  487. fe25519_add(t, q->X, q->Y);
  488. fe25519_mul(b, b, t);
  489. fe25519_mul(c, p->T, q->T);
  490. fe25519_mul(c, c, d2);
  491. fe25519_mul(d, p->Z, q->Z);
  492. fe25519_add(d, d, d);
  493. fe25519_sub(e, b, a);
  494. fe25519_sub(f, d, c);
  495. fe25519_add(g, d, c);
  496. fe25519_add(h, b, a);
  497. fe25519_mul(r->X, e, f);
  498. fe25519_mul(r->Y, h, g);
  499. fe25519_mul(r->Z, g, f);
  500. fe25519_mul(r->T, e, h);
  501. }
  502. void keys_block::calculate_public_keys(const uint8_t random_bytes[KEYSIZE])
  503. {
  504. state.clear();
  505. for (int i = 0; i < 256; i++)
  506. key_bits[i] = (random_bytes[i / 8] >> (i & 7)) & 1;
  507. key_bits[0] = 0;
  508. key_bits[1] = 0;
  509. key_bits[2] = 0;
  510. key_bits[255] = 0;
  511. key_bits[254] = 1;
  512. for (int i = 0; i < points.size(); i++)
  513. {
  514. ge25519& h = points[i];
  515. if (state.size() == 0)
  516. h = g_base_powers.get(254);
  517. else
  518. h = state.back();
  519. for (int j = 253 - state.size(); j >= 3; j--)
  520. {
  521. if (key_bits[j])
  522. ge25519_add(&h, &h, &g_base_powers.get(j));
  523. state.push_back(h);
  524. }
  525. fe25519_add(h.T, h.Z, h.Y);
  526. fe25519_sub(temp_z[i], h.Z, h.Y);
  527. for (int j = 3; ; j++)
  528. {
  529. key_bits[j]--;
  530. state.pop_back();
  531. if (key_bits[j] == 0)
  532. break;
  533. else
  534. key_bits[j] = 1;
  535. }
  536. }
  537. // Multiple inversion
  538. memcpy(points[0].Y, temp_z[0], sizeof(fe25519));
  539. for (int i = 1; i < points.size(); i++)
  540. fe25519_mul(points[i].Y, temp_z[i], points[i - 1].Y);
  541. fe25519_invert(points.back().Z, points.back().Y);
  542. for (int i = points.size() - 1; i >= 1; i--)
  543. {
  544. fe25519_mul(points[i - 1].Z, points[i].Z, temp_z[i]);
  545. fe25519_mul(temp_z[i], points[i].Z, points[i - 1].Y);
  546. }
  547. memcpy(temp_z[0], points[0].Z, sizeof(fe25519));
  548. for (int i = 0; i < points.size(); i++)
  549. fe25519_mul(points[i].X, points[i].T, temp_z[i]);
  550. }
  551. void keys_block::get_public_key(key25519 public_key, int index) const
  552. {
  553. fe25519_tobytes(public_key, points[index].X);
  554. }
  555. void keys_block::get_private_key(key25519 private_key, int index) const
  556. {
  557. for (int i = 0; i < 32; i++)
  558. private_key[i] = 0;
  559. for (int i = 0; i < 256; i++)
  560. private_key[i / 8] |= key_bits[i] << (i & 7);
  561. key25519 key_delta = { 0 };
  562. *(uint64_t*)key_delta = (points.size() - index) * 8;
  563. int carry = 0;
  564. for (int i = 0; i < 32; i++)
  565. {
  566. int temp = private_key[i] + key_delta[i] + carry;
  567. private_key[i] = (uint8_t)temp;
  568. carry = temp > 0xFF;
  569. }
  570. }
  571. keys_block::keys_block(int size)
  572. : points(size), temp_z(size)
  573. {
  574. }
  575. base_powers::base_powers()
  576. {
  577. ge25519 q = {
  578. { 0x62d608f25d51a, 0x412a4b4f6592a,
  579. 0x75b7171a4b31d, 0x1ff60527118fe, 0x216936d3cd6e5 },
  580. { 0x6666666666658, 0x4cccccccccccc,
  581. 0x1999999999999, 0x3333333333333, 0x6666666666666 },
  582. { 1, 0, 0, 0, 0 },
  583. { 0x68ab3a5b7dda3, 0x00eea2a5eadbb, 0x2af8df483c27e,
  584. 0x332b375274732, 0x67875f0fd78b7 }
  585. };
  586. for (int i = 0; i < 255; i++)
  587. {
  588. data[i] = q;
  589. ge25519_add(&q, &q, &q);
  590. }
  591. }
  592. const ge25519& base_powers::get(int index)
  593. {
  594. return data[index];
  595. }