x25519.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681
  1. // Source: https://github.com/jedisct1/libsodium/tree/1.0.18
  2. // Source: https://github.com/openssl/openssl/blob/OpenSSL_1_1_1g/crypto/ec/asm/x25519-x86_64.pl
  3. #include "x25519.h"
  4. #include <string.h> // memcpy
  5. typedef unsigned __int128 uint128_t;
  6. base_powers g_base_powers;
  7. /*
  8. h = f + g
  9. Can overlap h with f or g.
  10. */
  11. static inline void
  12. fe25519_add(fe25519 h, const fe25519 f, const fe25519 g)
  13. {
  14. uint64_t h0 = f[0] + g[0];
  15. uint64_t h1 = f[1] + g[1];
  16. uint64_t h2 = f[2] + g[2];
  17. uint64_t h3 = f[3] + g[3];
  18. uint64_t h4 = f[4] + g[4];
  19. h[0] = h0;
  20. h[1] = h1;
  21. h[2] = h2;
  22. h[3] = h3;
  23. h[4] = h4;
  24. }
  25. /*
  26. h = f - g
  27. */
  28. static void
  29. fe25519_sub(fe25519 h, const fe25519 f, const fe25519 g)
  30. {
  31. const uint64_t mask = 0x7ffffffffffffULL;
  32. uint64_t h0, h1, h2, h3, h4;
  33. h0 = g[0];
  34. h1 = g[1];
  35. h2 = g[2];
  36. h3 = g[3];
  37. h4 = g[4];
  38. h1 += h0 >> 51;
  39. h0 &= mask;
  40. h2 += h1 >> 51;
  41. h1 &= mask;
  42. h3 += h2 >> 51;
  43. h2 &= mask;
  44. h4 += h3 >> 51;
  45. h3 &= mask;
  46. h0 += 19ULL * (h4 >> 51);
  47. h4 &= mask;
  48. h0 = (f[0] + 0xfffffffffffdaULL) - h0;
  49. h1 = (f[1] + 0xffffffffffffeULL) - h1;
  50. h2 = (f[2] + 0xffffffffffffeULL) - h2;
  51. h3 = (f[3] + 0xffffffffffffeULL) - h3;
  52. h4 = (f[4] + 0xffffffffffffeULL) - h4;
  53. h[0] = h0;
  54. h[1] = h1;
  55. h[2] = h2;
  56. h[3] = h3;
  57. h[4] = h4;
  58. }
  59. #define REDUCE_ASM \
  60. "movq $0x7ffffffffffff,%%rbp \n" \
  61. \
  62. "movq %%r10,%%rdx \n" \
  63. "shrq $51,%%r10 \n" \
  64. "shlq $13,%%r11 \n" \
  65. "andq %%rbp,%%rdx \n" /* %%rdx = g2 = h2 & mask */ \
  66. "orq %%r10,%%r11 \n" /* h2>>51 */ \
  67. "addq %%r11,%%r12 \n" \
  68. "adcq $0,%%r13 \n" /* h3 += h2>>51 */ \
  69. \
  70. "movq %%rbx,%%rax \n" \
  71. "shrq $51,%%rbx \n" \
  72. "shlq $13,%%rcx \n" \
  73. "andq %%rbp,%%rax \n" /* %%rax = g0 = h0 & mask */ \
  74. "orq %%rbx,%%rcx \n" /* h0>>51 */ \
  75. "addq %%rcx,%%r8 \n" /* h1 += h0>>51 */ \
  76. "adcq $0,%%r9 \n" \
  77. \
  78. "movq %%r12,%%rbx \n" \
  79. "shrq $51,%%r12 \n" \
  80. "shlq $13,%%r13 \n" \
  81. "andq %%rbp,%%rbx \n" /* %%rbx = g3 = h3 & mask */ \
  82. "orq %%r12,%%r13 \n" /* h3>>51 */ \
  83. "addq %%r13,%%r14 \n" /* h4 += h3>>51 */ \
  84. "adcq $0,%%r15 \n" \
  85. \
  86. "movq %%r8,%%rcx \n" \
  87. "shrq $51,%%r8 \n" \
  88. "shlq $13,%%r9 \n" \
  89. "andq %%rbp,%%rcx \n" /* %%rcx = g1 = h1 & mask */ \
  90. "orq %%r8,%%r9 \n" \
  91. "addq %%r9,%%rdx \n" /* g2 += h1>>51 */ \
  92. \
  93. "movq %%r14,%%r10 \n" \
  94. "shrq $51,%%r14 \n" \
  95. "shlq $13,%%r15 \n" \
  96. "andq %%rbp,%%r10 \n" /* %%r10 = g4 = h0 & mask */ \
  97. "orq %%r14,%%r15 \n" /* h0>>51 */ \
  98. \
  99. "leaq (%%r15,%%r15,8),%%r14 \n" \
  100. "leaq (%%r15,%%r14,2),%%r15 \n" \
  101. "addq %%r15,%%rax \n" /* g0 += (h0>>51)*19 */ \
  102. \
  103. "movq %%rdx,%%r8 \n" \
  104. "andq %%rbp,%%rdx \n" /* g2 &= mask */ \
  105. "shrq $51,%%r8 \n" \
  106. "addq %%r8,%%rbx \n" /* g3 += g2>>51 */ \
  107. \
  108. "movq %%rax,%%r9 \n" \
  109. "andq %%rbp,%%rax \n" /* g0 &= mask */ \
  110. "shrq $51,%%r9 \n" \
  111. "addq %%r9,%%rcx \n" /* g1 += g0>>51 */ \
  112. \
  113. "movq %%rax,8*0(%%rdi) \n" /* save the result */ \
  114. "movq %%rcx,8*1(%%rdi) \n" \
  115. "movq %%rdx,8*2(%%rdi) \n" \
  116. "movq %%rbx,8*3(%%rdi) \n" \
  117. "movq %%r10,8*4(%%rdi) \n"
  118. /*
  119. h = f * g
  120. Can overlap h with f or g.
  121. */
  122. static void
  123. fe25519_mul(fe25519 h, const fe25519 f, const fe25519 g)
  124. {
  125. uint64_t t1, t2, t3, t4;
  126. __asm__ (
  127. "pushq %%r15 \n"
  128. "pushq %%r14 \n"
  129. "pushq %%r13 \n"
  130. "movq %4,%%rdi \n"
  131. "movq %5,%%rsi \n"
  132. "movq %6,%%rdx \n"
  133. "movq 8*0(%%rsi),%%rax \n" // f[0]
  134. "movq 8*0(%%rdx),%%r11 \n" // load g[0-4]
  135. "movq 8*1(%%rdx),%%r12 \n"
  136. "movq 8*2(%%rdx),%%r13 \n"
  137. "movq 8*3(%%rdx),%%rbp \n"
  138. "movq 8*4(%%rdx),%%r14 \n"
  139. "movq %%rdi,%3 \n" // offload 1st argument
  140. "movq %%rax,%%rdi \n"
  141. "mulq %%r11 \n" // f[0]*g[0]
  142. "movq %%r11,%0 \n" // offload g[0]
  143. "movq %%rax,%%rbx \n" // %%rbx:%%rcx = h0
  144. "movq %%rdi,%%rax \n"
  145. "movq %%rdx,%%rcx \n"
  146. "mulq %%r12 \n" // f[0]*g[1]
  147. "movq %%r12,%1 \n" // offload g[1]
  148. "movq %%rax,%%r8 \n" // %%r8:%%r9 = h1
  149. "movq %%rdi,%%rax \n"
  150. "leaq (%%r14,%%r14,8),%%r15 \n"
  151. "movq %%rdx,%%r9 \n"
  152. "mulq %%r13 \n" // f[0]*g[2]
  153. "movq %%r13,%2 \n" // offload g[2]
  154. "movq %%rax,%%r10 \n" // %%r10:%%r11 = h2
  155. "movq %%rdi,%%rax \n"
  156. "leaq (%%r14,%%r15,2),%%rdi \n" // g[4]*19
  157. "movq %%rdx,%%r11 \n"
  158. "mulq %%rbp \n" // f[0]*g[3]
  159. "movq %%rax,%%r12 \n" // %%r12:%%r13 = h3
  160. "movq 8*0(%%rsi),%%rax \n" // f[0]
  161. "movq %%rdx,%%r13 \n"
  162. "mulq %%r14 \n" // f[0]*g[4]
  163. "movq %%rax,%%r14 \n" // %%r14:%%r15 = h4
  164. "movq 8*1(%%rsi),%%rax \n" // f[1]
  165. "movq %%rdx,%%r15 \n"
  166. "mulq %%rdi \n" // f[1]*g[4]*19
  167. "addq %%rax,%%rbx \n"
  168. "movq 8*2(%%rsi),%%rax \n" // f[2]
  169. "adcq %%rdx,%%rcx \n"
  170. "mulq %%rdi \n" // f[2]*g[4]*19
  171. "addq %%rax,%%r8 \n"
  172. "movq 8*3(%%rsi),%%rax \n" // f[3]
  173. "adcq %%rdx,%%r9 \n"
  174. "mulq %%rdi \n" // f[3]*g[4]*19
  175. "addq %%rax,%%r10 \n"
  176. "movq 8*4(%%rsi),%%rax \n" // f[4]
  177. "adcq %%rdx,%%r11 \n"
  178. "mulq %%rdi \n" // f[4]*g[4]*19
  179. "imulq $19,%%rbp,%%rdi \n" // g[3]*19
  180. "addq %%rax,%%r12 \n"
  181. "movq 8*1(%%rsi),%%rax \n" // f[1]
  182. "adcq %%rdx,%%r13 \n"
  183. "mulq %%rbp \n" // f[1]*g[3]
  184. "movq %2,%%rbp \n" // g[2]
  185. "addq %%rax,%%r14 \n"
  186. "movq 8*2(%%rsi),%%rax \n" // f[2]
  187. "adcq %%rdx,%%r15 \n"
  188. "mulq %%rdi \n" // f[2]*g[3]*19
  189. "addq %%rax,%%rbx \n"
  190. "movq 8*3(%%rsi),%%rax \n" // f[3]
  191. "adcq %%rdx,%%rcx \n"
  192. "mulq %%rdi \n" // f[3]*g[3]*19
  193. "addq %%rax,%%r8 \n"
  194. "movq 8*4(%%rsi),%%rax \n" // f[4]
  195. "adcq %%rdx,%%r9 \n"
  196. "mulq %%rdi \n" // f[4]*g[3]*19
  197. "imulq $19,%%rbp,%%rdi \n" // g[2]*19
  198. "addq %%rax,%%r10 \n"
  199. "movq 8*1(%%rsi),%%rax \n" // f[1]
  200. "adcq %%rdx,%%r11 \n"
  201. "mulq %%rbp \n" // f[1]*g[2]
  202. "addq %%rax,%%r12 \n"
  203. "movq 8*2(%%rsi),%%rax \n" // f[2]
  204. "adcq %%rdx,%%r13 \n"
  205. "mulq %%rbp \n" // f[2]*g[2]
  206. "movq %1,%%rbp \n" // g[1]
  207. "addq %%rax,%%r14 \n"
  208. "movq 8*3(%%rsi),%%rax \n" // f[3]
  209. "adcq %%rdx,%%r15 \n"
  210. "mulq %%rdi \n" // f[3]*g[2]*19
  211. "addq %%rax,%%rbx \n"
  212. "movq 8*4(%%rsi),%%rax \n" // f[3]
  213. "adcq %%rdx,%%rcx \n"
  214. "mulq %%rdi \n" // f[4]*g[2]*19
  215. "addq %%rax,%%r8 \n"
  216. "movq 8*1(%%rsi),%%rax \n" // f[1]
  217. "adcq %%rdx,%%r9 \n"
  218. "mulq %%rbp \n" // f[1]*g[1]
  219. "imulq $19,%%rbp,%%rdi \n"
  220. "addq %%rax,%%r10 \n"
  221. "movq 8*2(%%rsi),%%rax \n" // f[2]
  222. "adcq %%rdx,%%r11 \n"
  223. "mulq %%rbp \n" // f[2]*g[1]
  224. "addq %%rax,%%r12 \n"
  225. "movq 8*3(%%rsi),%%rax \n" // f[3]
  226. "adcq %%rdx,%%r13 \n"
  227. "mulq %%rbp \n" // f[3]*g[1]
  228. "movq %0,%%rbp \n" // g[0]
  229. "addq %%rax,%%r14 \n"
  230. "movq 8*4(%%rsi),%%rax \n" // f[4]
  231. "adcq %%rdx,%%r15 \n"
  232. "mulq %%rdi \n" // f[4]*g[1]*19
  233. "addq %%rax,%%rbx \n"
  234. "movq 8*1(%%rsi),%%rax \n" // f[1]
  235. "adcq %%rdx,%%rcx \n"
  236. "mulq %%rbp \n" // f[1]*g[0]
  237. "addq %%rax,%%r8 \n"
  238. "movq 8*2(%%rsi),%%rax \n" // f[2]
  239. "adcq %%rdx,%%r9 \n"
  240. "mulq %%rbp \n" // f[2]*g[0]
  241. "addq %%rax,%%r10 \n"
  242. "movq 8*3(%%rsi),%%rax \n" // f[3]
  243. "adcq %%rdx,%%r11 \n"
  244. "mulq %%rbp \n" // f[3]*g[0]
  245. "addq %%rax,%%r12 \n"
  246. "movq 8*4(%%rsi),%%rax \n" // f[4]
  247. "adcq %%rdx,%%r13 \n"
  248. "mulq %%rbp \n" // f[4]*g[0]
  249. "addq %%rax,%%r14 \n"
  250. "adcq %%rdx,%%r15 \n"
  251. "movq %3,%%rdi \n" // restore 1st argument
  252. REDUCE_ASM
  253. "popq %%r13 \n"
  254. "popq %%r14 \n"
  255. "popq %%r15 \n"
  256. : "+m"(t1), "+m"(t2), "+m"(t3), "+m"(t4)
  257. : "r"(h), "r"(f), "r"(g)
  258. : "%rax", "%rbx", "%rcx", "%rdx", "%rdi", "%rsi", "%rbp",
  259. "%r8", "%r9", "%r10", "%r11", "%r12", "memory"
  260. );
  261. }
  262. /*
  263. h = f * f
  264. Can overlap h with f.
  265. */
  266. static void fe25519_sq(fe25519 h, const fe25519 f)
  267. {
  268. uint64_t t1, t2, t3, t4;
  269. __asm__ (
  270. "pushq %%r15 \n"
  271. "pushq %%r14 \n"
  272. "movq %4, %%rdi \n"
  273. "movq %5, %%rsi \n"
  274. "movq 8*0(%%rsi),%%rax \n" // g[0]
  275. "movq 8*2(%%rsi),%%r15 \n" // g[2]
  276. "movq 8*4(%%rsi),%%rbp \n" // g[4]
  277. "movq %%rdi,%3 \n" // offload 1st argument
  278. "leaq (%%rax,%%rax),%%r14 \n"
  279. "mulq %%rax \n" // g[0]*g[0]
  280. "movq %%rax,%%rbx \n"
  281. "movq 8*1(%%rsi),%%rax \n" // g[1]
  282. "movq %%rdx,%%rcx \n"
  283. "mulq %%r14 \n" // 2*g[0]*g[1]
  284. "movq %%rax,%%r8 \n"
  285. "movq %%r15,%%rax \n"
  286. "movq %%r15,%0 \n" // offload g[2]
  287. "movq %%rdx,%%r9 \n"
  288. "mulq %%r14 \n" // 2*g[0]*g[2]
  289. "movq %%rax,%%r10 \n"
  290. "movq 8*3(%%rsi),%%rax \n"
  291. "movq %%rdx,%%r11 \n"
  292. "imulq $19,%%rbp,%%rdi \n" // g[4]*19
  293. "mulq %%r14 \n" // 2*g[0]*g[3]
  294. "movq %%rax,%%r12 \n"
  295. "movq %%rbp,%%rax \n"
  296. "movq %%rdx,%%r13 \n"
  297. "mulq %%r14 \n" // 2*g[0]*g[4]
  298. "movq %%rax,%%r14 \n"
  299. "movq %%rbp,%%rax \n"
  300. "movq %%rdx,%%r15 \n"
  301. "mulq %%rdi \n" // g[4]*g[4]*19
  302. "addq %%rax,%%r12 \n"
  303. "movq 8*1(%%rsi),%%rax \n" // g[1]
  304. "adcq %%rdx,%%r13 \n"
  305. "movq 8*3(%%rsi),%%rsi \n" // g[3]
  306. "leaq (%%rax,%%rax),%%rbp \n"
  307. "mulq %%rax \n" // g[1]*g[1]
  308. "addq %%rax,%%r10 \n"
  309. "movq %0,%%rax \n" // g[2]
  310. "adcq %%rdx,%%r11 \n"
  311. "mulq %%rbp \n" // 2*g[1]*g[2]
  312. "addq %%rax,%%r12 \n"
  313. "movq %%rbp,%%rax \n"
  314. "adcq %%rdx,%%r13 \n"
  315. "mulq %%rsi \n" // 2*g[1]*g[3]
  316. "addq %%rax,%%r14 \n"
  317. "movq %%rbp,%%rax \n"
  318. "adcq %%rdx,%%r15 \n"
  319. "imulq $19,%%rsi,%%rbp \n" // g[3]*19
  320. "mulq %%rdi \n" // 2*g[1]*g[4]*19
  321. "addq %%rax,%%rbx \n"
  322. "leaq (%%rsi,%%rsi),%%rax \n"
  323. "adcq %%rdx,%%rcx \n"
  324. "mulq %%rdi \n" // 2*g[3]*g[4]*19
  325. "addq %%rax,%%r10 \n"
  326. "movq %%rsi,%%rax \n"
  327. "adcq %%rdx,%%r11 \n"
  328. "mulq %%rbp \n" // g[3]*g[3]*19
  329. "addq %%rax,%%r8 \n"
  330. "movq %0,%%rax \n" // g[2]
  331. "adcq %%rdx,%%r9 \n"
  332. "leaq (%%rax,%%rax),%%rsi \n"
  333. "mulq %%rax \n" // g[2]*g[2]
  334. "addq %%rax,%%r14 \n"
  335. "movq %%rbp,%%rax \n"
  336. "adcq %%rdx,%%r15 \n"
  337. "mulq %%rsi \n" // 2*g[2]*g[3]*19
  338. "addq %%rax,%%rbx \n"
  339. "movq %%rsi,%%rax \n"
  340. "adcq %%rdx,%%rcx \n"
  341. "mulq %%rdi \n" // 2*g[2]*g[4]*19
  342. "addq %%rax,%%r8 \n"
  343. "adcq %%rdx,%%r9 \n"
  344. "movq %3,%%rdi \n" // restore 1st argument
  345. REDUCE_ASM
  346. "popq %%r14 \n"
  347. "popq %%r15 \n"
  348. : "+m"(t1), "+m"(t2), "+m"(t3), "+m"(t4)
  349. : "r"(h), "r"(f)
  350. : "%rax", "%rbx", "%rcx", "%rdx", "%rdi", "%rsi", "%rbp",
  351. "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "memory"
  352. );
  353. }
  354. static void
  355. fe25519_invert(fe25519 out, const fe25519 z)
  356. {
  357. fe25519 t0;
  358. fe25519 t1;
  359. fe25519 t2;
  360. fe25519 t3;
  361. int i;
  362. fe25519_sq(t0, z);
  363. fe25519_sq(t1, t0);
  364. fe25519_sq(t1, t1);
  365. fe25519_mul(t1, z, t1);
  366. fe25519_mul(t0, t0, t1);
  367. fe25519_sq(t2, t0);
  368. fe25519_mul(t1, t1, t2);
  369. fe25519_sq(t2, t1);
  370. for (i = 1; i < 5; ++i) {
  371. fe25519_sq(t2, t2);
  372. }
  373. fe25519_mul(t1, t2, t1);
  374. fe25519_sq(t2, t1);
  375. for (i = 1; i < 10; ++i) {
  376. fe25519_sq(t2, t2);
  377. }
  378. fe25519_mul(t2, t2, t1);
  379. fe25519_sq(t3, t2);
  380. for (i = 1; i < 20; ++i) {
  381. fe25519_sq(t3, t3);
  382. }
  383. fe25519_mul(t2, t3, t2);
  384. for (i = 1; i < 11; ++i) {
  385. fe25519_sq(t2, t2);
  386. }
  387. fe25519_mul(t1, t2, t1);
  388. fe25519_sq(t2, t1);
  389. for (i = 1; i < 50; ++i) {
  390. fe25519_sq(t2, t2);
  391. }
  392. fe25519_mul(t2, t2, t1);
  393. fe25519_sq(t3, t2);
  394. for (i = 1; i < 100; ++i) {
  395. fe25519_sq(t3, t3);
  396. }
  397. fe25519_mul(t2, t3, t2);
  398. for (i = 1; i < 51; ++i) {
  399. fe25519_sq(t2, t2);
  400. }
  401. fe25519_mul(t1, t2, t1);
  402. for (i = 1; i < 6; ++i) {
  403. fe25519_sq(t1, t1);
  404. }
  405. fe25519_mul(out, t1, t0);
  406. }
  407. static void
  408. fe25519_reduce(fe25519 h, const fe25519 f)
  409. {
  410. const uint64_t mask = 0x7ffffffffffffULL;
  411. uint128_t t[5];
  412. t[0] = f[0];
  413. t[1] = f[1];
  414. t[2] = f[2];
  415. t[3] = f[3];
  416. t[4] = f[4];
  417. t[1] += t[0] >> 51;
  418. t[0] &= mask;
  419. t[2] += t[1] >> 51;
  420. t[1] &= mask;
  421. t[3] += t[2] >> 51;
  422. t[2] &= mask;
  423. t[4] += t[3] >> 51;
  424. t[3] &= mask;
  425. t[0] += 19 * (t[4] >> 51);
  426. t[4] &= mask;
  427. t[1] += t[0] >> 51;
  428. t[0] &= mask;
  429. t[2] += t[1] >> 51;
  430. t[1] &= mask;
  431. t[3] += t[2] >> 51;
  432. t[2] &= mask;
  433. t[4] += t[3] >> 51;
  434. t[3] &= mask;
  435. t[0] += 19 * (t[4] >> 51);
  436. t[4] &= mask;
  437. /* now t is between 0 and 2^255-1, properly carried. */
  438. /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
  439. t[0] += 19ULL;
  440. t[1] += t[0] >> 51;
  441. t[0] &= mask;
  442. t[2] += t[1] >> 51;
  443. t[1] &= mask;
  444. t[3] += t[2] >> 51;
  445. t[2] &= mask;
  446. t[4] += t[3] >> 51;
  447. t[3] &= mask;
  448. t[0] += 19ULL * (t[4] >> 51);
  449. t[4] &= mask;
  450. /* now between 19 and 2^255-1 in both cases, and offset by 19. */
  451. t[0] += 0x8000000000000 - 19ULL;
  452. t[1] += 0x8000000000000 - 1ULL;
  453. t[2] += 0x8000000000000 - 1ULL;
  454. t[3] += 0x8000000000000 - 1ULL;
  455. t[4] += 0x8000000000000 - 1ULL;
  456. /* now between 2^255 and 2^256-20, and offset by 2^255. */
  457. t[1] += t[0] >> 51;
  458. t[0] &= mask;
  459. t[2] += t[1] >> 51;
  460. t[1] &= mask;
  461. t[3] += t[2] >> 51;
  462. t[2] &= mask;
  463. t[4] += t[3] >> 51;
  464. t[3] &= mask;
  465. t[4] &= mask;
  466. h[0] = t[0];
  467. h[1] = t[1];
  468. h[2] = t[2];
  469. h[3] = t[3];
  470. h[4] = t[4];
  471. }
  472. static void
  473. fe25519_tobytes(unsigned char *s, const fe25519 h)
  474. {
  475. fe25519 t;
  476. uint64_t t0, t1, t2, t3;
  477. fe25519_reduce(t, h);
  478. t0 = t[0] | (t[1] << 51);
  479. t1 = (t[1] >> 13) | (t[2] << 38);
  480. t2 = (t[2] >> 26) | (t[3] << 25);
  481. t3 = (t[3] >> 39) | (t[4] << 12);
  482. memcpy(s + 0, &t0, sizeof t0);
  483. memcpy(s + 8, &t1, sizeof t1);
  484. memcpy(s + 16, &t2, sizeof t2);
  485. memcpy(s + 24, &t3, sizeof t3);
  486. }
  487. static void ge25519_add(ge25519* r, const ge25519* p, const ge25519* q)
  488. {
  489. static const fe25519 d2 = {
  490. 0x69b9426b2f159, 0x35050762add7a,
  491. 0x3cf44c0038052, 0x6738cc7407977, 0x2406d9dc56dff };
  492. fe25519 a, b, c, d, t, e, f, g, h;
  493. fe25519_sub(a, p->Y, p->X);
  494. fe25519_sub(t, q->Y, q->X);
  495. fe25519_mul(a, a, t);
  496. fe25519_add(b, p->X, p->Y);
  497. fe25519_add(t, q->X, q->Y);
  498. fe25519_mul(b, b, t);
  499. fe25519_mul(c, p->T, q->T);
  500. fe25519_mul(c, c, d2);
  501. fe25519_mul(d, p->Z, q->Z);
  502. fe25519_add(d, d, d);
  503. fe25519_sub(e, b, a);
  504. fe25519_sub(f, d, c);
  505. fe25519_add(g, d, c);
  506. fe25519_add(h, b, a);
  507. fe25519_mul(r->X, e, f);
  508. fe25519_mul(r->Y, h, g);
  509. fe25519_mul(r->Z, g, f);
  510. fe25519_mul(r->T, e, h);
  511. }
  512. void keys_block::calculate_public_keys(const uint8_t random_bytes[KEYSIZE])
  513. {
  514. state.clear();
  515. for (int i = 0; i < 256; i++)
  516. key_bits[i] = (random_bytes[i / 8] >> (i & 7)) & 1;
  517. key_bits[0] = 0;
  518. key_bits[1] = 0;
  519. key_bits[2] = 0;
  520. key_bits[255] = 0;
  521. key_bits[254] = 1;
  522. for (int i = 0; i < points.size(); i++)
  523. {
  524. ge25519& h = points[i];
  525. if (state.size() == 0)
  526. h = g_base_powers.get(254);
  527. else
  528. h = state.back();
  529. for (int j = 253 - state.size(); j >= 3; j--)
  530. {
  531. if (key_bits[j])
  532. ge25519_add(&h, &h, &g_base_powers.get(j));
  533. state.push_back(h);
  534. }
  535. fe25519_add(h.T, h.Z, h.Y);
  536. fe25519_sub(temp_z[i], h.Z, h.Y);
  537. for (int j = 3; ; j++)
  538. {
  539. key_bits[j]--;
  540. state.pop_back();
  541. if (key_bits[j] == 0)
  542. break;
  543. else
  544. key_bits[j] = 1;
  545. }
  546. }
  547. // Multiple inversion
  548. memcpy(points[0].Y, temp_z[0], sizeof(fe25519));
  549. for (int i = 1; i < points.size(); i++)
  550. fe25519_mul(points[i].Y, temp_z[i], points[i - 1].Y);
  551. fe25519_invert(points.back().Z, points.back().Y);
  552. for (int i = points.size() - 1; i >= 1; i--)
  553. {
  554. fe25519_mul(points[i - 1].Z, points[i].Z, temp_z[i]);
  555. fe25519_mul(temp_z[i], points[i].Z, points[i - 1].Y);
  556. }
  557. memcpy(temp_z[0], points[0].Z, sizeof(fe25519));
  558. for (int i = 0; i < points.size(); i++)
  559. fe25519_mul(points[i].X, points[i].T, temp_z[i]);
  560. }
  561. void keys_block::get_public_key(key25519 public_key, int index) const
  562. {
  563. fe25519_tobytes(public_key, points[index].X);
  564. }
  565. void keys_block::get_private_key(key25519 private_key, int index) const
  566. {
  567. for (int i = 0; i < 32; i++)
  568. private_key[i] = 0;
  569. for (int i = 0; i < 256; i++)
  570. private_key[i / 8] |= key_bits[i] << (i & 7);
  571. key25519 key_delta = { 0 };
  572. *(uint64_t*)key_delta = (points.size() - index) * 8;
  573. int carry = 0;
  574. for (int i = 0; i < 32; i++)
  575. {
  576. int temp = private_key[i] + key_delta[i] + carry;
  577. private_key[i] = (uint8_t)temp;
  578. carry = temp > 0xFF;
  579. }
  580. }
  581. keys_block::keys_block(int size)
  582. : points(size), temp_z(size)
  583. {
  584. }
  585. base_powers::base_powers()
  586. {
  587. ge25519 q = {
  588. { 0x62d608f25d51a, 0x412a4b4f6592a,
  589. 0x75b7171a4b31d, 0x1ff60527118fe, 0x216936d3cd6e5 },
  590. { 0x6666666666658, 0x4cccccccccccc,
  591. 0x1999999999999, 0x3333333333333, 0x6666666666666 },
  592. { 1, 0, 0, 0, 0 },
  593. { 0x68ab3a5b7dda3, 0x00eea2a5eadbb, 0x2af8df483c27e,
  594. 0x332b375274732, 0x67875f0fd78b7 }
  595. };
  596. for (int i = 0; i < 255; i++)
  597. {
  598. data[i] = q;
  599. ge25519_add(&q, &q, &q);
  600. }
  601. }
  602. const ge25519& base_powers::get(int index)
  603. {
  604. return data[index];
  605. }