ecc.c 24 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019
  1. /*
  2. * Copyright (c) 2013, Kenneth MacKay
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are
  7. * met:
  8. * * Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * * Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  15. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  16. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  17. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  18. * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  19. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  20. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include <linux/random.h>
  27. #include <linux/slab.h>
  28. #include <linux/swab.h>
  29. #include <linux/fips.h>
  30. #include <crypto/ecdh.h>
  31. #include "ecc.h"
  32. #include "ecc_curve_defs.h"
  33. typedef struct {
  34. u64 m_low;
  35. u64 m_high;
  36. } uint128_t;
  37. static inline const struct ecc_curve *ecc_get_curve(unsigned int curve_id)
  38. {
  39. switch (curve_id) {
  40. /* In FIPS mode only allow P256 and higher */
  41. case ECC_CURVE_NIST_P192:
  42. return fips_enabled ? NULL : &nist_p192;
  43. case ECC_CURVE_NIST_P256:
  44. return &nist_p256;
  45. default:
  46. return NULL;
  47. }
  48. }
  49. static u64 *ecc_alloc_digits_space(unsigned int ndigits)
  50. {
  51. size_t len = ndigits * sizeof(u64);
  52. if (!len)
  53. return NULL;
  54. return kmalloc(len, GFP_KERNEL);
  55. }
  56. static void ecc_free_digits_space(u64 *space)
  57. {
  58. kzfree(space);
  59. }
  60. static struct ecc_point *ecc_alloc_point(unsigned int ndigits)
  61. {
  62. struct ecc_point *p = kmalloc(sizeof(*p), GFP_KERNEL);
  63. if (!p)
  64. return NULL;
  65. p->x = ecc_alloc_digits_space(ndigits);
  66. if (!p->x)
  67. goto err_alloc_x;
  68. p->y = ecc_alloc_digits_space(ndigits);
  69. if (!p->y)
  70. goto err_alloc_y;
  71. p->ndigits = ndigits;
  72. return p;
  73. err_alloc_y:
  74. ecc_free_digits_space(p->x);
  75. err_alloc_x:
  76. kfree(p);
  77. return NULL;
  78. }
  79. static void ecc_free_point(struct ecc_point *p)
  80. {
  81. if (!p)
  82. return;
  83. kzfree(p->x);
  84. kzfree(p->y);
  85. kzfree(p);
  86. }
  87. static void vli_clear(u64 *vli, unsigned int ndigits)
  88. {
  89. int i;
  90. for (i = 0; i < ndigits; i++)
  91. vli[i] = 0;
  92. }
  93. /* Returns true if vli == 0, false otherwise. */
  94. static bool vli_is_zero(const u64 *vli, unsigned int ndigits)
  95. {
  96. int i;
  97. for (i = 0; i < ndigits; i++) {
  98. if (vli[i])
  99. return false;
  100. }
  101. return true;
  102. }
  103. /* Returns nonzero if bit bit of vli is set. */
  104. static u64 vli_test_bit(const u64 *vli, unsigned int bit)
  105. {
  106. return (vli[bit / 64] & ((u64)1 << (bit % 64)));
  107. }
  108. /* Counts the number of 64-bit "digits" in vli. */
  109. static unsigned int vli_num_digits(const u64 *vli, unsigned int ndigits)
  110. {
  111. int i;
  112. /* Search from the end until we find a non-zero digit.
  113. * We do it in reverse because we expect that most digits will
  114. * be nonzero.
  115. */
  116. for (i = ndigits - 1; i >= 0 && vli[i] == 0; i--);
  117. return (i + 1);
  118. }
  119. /* Counts the number of bits required for vli. */
  120. static unsigned int vli_num_bits(const u64 *vli, unsigned int ndigits)
  121. {
  122. unsigned int i, num_digits;
  123. u64 digit;
  124. num_digits = vli_num_digits(vli, ndigits);
  125. if (num_digits == 0)
  126. return 0;
  127. digit = vli[num_digits - 1];
  128. for (i = 0; digit; i++)
  129. digit >>= 1;
  130. return ((num_digits - 1) * 64 + i);
  131. }
  132. /* Sets dest = src. */
  133. static void vli_set(u64 *dest, const u64 *src, unsigned int ndigits)
  134. {
  135. int i;
  136. for (i = 0; i < ndigits; i++)
  137. dest[i] = src[i];
  138. }
  139. /* Returns sign of left - right. */
  140. static int vli_cmp(const u64 *left, const u64 *right, unsigned int ndigits)
  141. {
  142. int i;
  143. for (i = ndigits - 1; i >= 0; i--) {
  144. if (left[i] > right[i])
  145. return 1;
  146. else if (left[i] < right[i])
  147. return -1;
  148. }
  149. return 0;
  150. }
  151. /* Computes result = in << c, returning carry. Can modify in place
  152. * (if result == in). 0 < shift < 64.
  153. */
  154. static u64 vli_lshift(u64 *result, const u64 *in, unsigned int shift,
  155. unsigned int ndigits)
  156. {
  157. u64 carry = 0;
  158. int i;
  159. for (i = 0; i < ndigits; i++) {
  160. u64 temp = in[i];
  161. result[i] = (temp << shift) | carry;
  162. carry = temp >> (64 - shift);
  163. }
  164. return carry;
  165. }
  166. /* Computes vli = vli >> 1. */
  167. static void vli_rshift1(u64 *vli, unsigned int ndigits)
  168. {
  169. u64 *end = vli;
  170. u64 carry = 0;
  171. vli += ndigits;
  172. while (vli-- > end) {
  173. u64 temp = *vli;
  174. *vli = (temp >> 1) | carry;
  175. carry = temp << 63;
  176. }
  177. }
  178. /* Computes result = left + right, returning carry. Can modify in place. */
  179. static u64 vli_add(u64 *result, const u64 *left, const u64 *right,
  180. unsigned int ndigits)
  181. {
  182. u64 carry = 0;
  183. int i;
  184. for (i = 0; i < ndigits; i++) {
  185. u64 sum;
  186. sum = left[i] + right[i] + carry;
  187. if (sum != left[i])
  188. carry = (sum < left[i]);
  189. result[i] = sum;
  190. }
  191. return carry;
  192. }
  193. /* Computes result = left - right, returning borrow. Can modify in place. */
  194. static u64 vli_sub(u64 *result, const u64 *left, const u64 *right,
  195. unsigned int ndigits)
  196. {
  197. u64 borrow = 0;
  198. int i;
  199. for (i = 0; i < ndigits; i++) {
  200. u64 diff;
  201. diff = left[i] - right[i] - borrow;
  202. if (diff != left[i])
  203. borrow = (diff > left[i]);
  204. result[i] = diff;
  205. }
  206. return borrow;
  207. }
  208. static uint128_t mul_64_64(u64 left, u64 right)
  209. {
  210. u64 a0 = left & 0xffffffffull;
  211. u64 a1 = left >> 32;
  212. u64 b0 = right & 0xffffffffull;
  213. u64 b1 = right >> 32;
  214. u64 m0 = a0 * b0;
  215. u64 m1 = a0 * b1;
  216. u64 m2 = a1 * b0;
  217. u64 m3 = a1 * b1;
  218. uint128_t result;
  219. m2 += (m0 >> 32);
  220. m2 += m1;
  221. /* Overflow */
  222. if (m2 < m1)
  223. m3 += 0x100000000ull;
  224. result.m_low = (m0 & 0xffffffffull) | (m2 << 32);
  225. result.m_high = m3 + (m2 >> 32);
  226. return result;
  227. }
  228. static uint128_t add_128_128(uint128_t a, uint128_t b)
  229. {
  230. uint128_t result;
  231. result.m_low = a.m_low + b.m_low;
  232. result.m_high = a.m_high + b.m_high + (result.m_low < a.m_low);
  233. return result;
  234. }
  235. static void vli_mult(u64 *result, const u64 *left, const u64 *right,
  236. unsigned int ndigits)
  237. {
  238. uint128_t r01 = { 0, 0 };
  239. u64 r2 = 0;
  240. unsigned int i, k;
  241. /* Compute each digit of result in sequence, maintaining the
  242. * carries.
  243. */
  244. for (k = 0; k < ndigits * 2 - 1; k++) {
  245. unsigned int min;
  246. if (k < ndigits)
  247. min = 0;
  248. else
  249. min = (k + 1) - ndigits;
  250. for (i = min; i <= k && i < ndigits; i++) {
  251. uint128_t product;
  252. product = mul_64_64(left[i], right[k - i]);
  253. r01 = add_128_128(r01, product);
  254. r2 += (r01.m_high < product.m_high);
  255. }
  256. result[k] = r01.m_low;
  257. r01.m_low = r01.m_high;
  258. r01.m_high = r2;
  259. r2 = 0;
  260. }
  261. result[ndigits * 2 - 1] = r01.m_low;
  262. }
  263. static void vli_square(u64 *result, const u64 *left, unsigned int ndigits)
  264. {
  265. uint128_t r01 = { 0, 0 };
  266. u64 r2 = 0;
  267. int i, k;
  268. for (k = 0; k < ndigits * 2 - 1; k++) {
  269. unsigned int min;
  270. if (k < ndigits)
  271. min = 0;
  272. else
  273. min = (k + 1) - ndigits;
  274. for (i = min; i <= k && i <= k - i; i++) {
  275. uint128_t product;
  276. product = mul_64_64(left[i], left[k - i]);
  277. if (i < k - i) {
  278. r2 += product.m_high >> 63;
  279. product.m_high = (product.m_high << 1) |
  280. (product.m_low >> 63);
  281. product.m_low <<= 1;
  282. }
  283. r01 = add_128_128(r01, product);
  284. r2 += (r01.m_high < product.m_high);
  285. }
  286. result[k] = r01.m_low;
  287. r01.m_low = r01.m_high;
  288. r01.m_high = r2;
  289. r2 = 0;
  290. }
  291. result[ndigits * 2 - 1] = r01.m_low;
  292. }
  293. /* Computes result = (left + right) % mod.
  294. * Assumes that left < mod and right < mod, result != mod.
  295. */
  296. static void vli_mod_add(u64 *result, const u64 *left, const u64 *right,
  297. const u64 *mod, unsigned int ndigits)
  298. {
  299. u64 carry;
  300. carry = vli_add(result, left, right, ndigits);
  301. /* result > mod (result = mod + remainder), so subtract mod to
  302. * get remainder.
  303. */
  304. if (carry || vli_cmp(result, mod, ndigits) >= 0)
  305. vli_sub(result, result, mod, ndigits);
  306. }
  307. /* Computes result = (left - right) % mod.
  308. * Assumes that left < mod and right < mod, result != mod.
  309. */
  310. static void vli_mod_sub(u64 *result, const u64 *left, const u64 *right,
  311. const u64 *mod, unsigned int ndigits)
  312. {
  313. u64 borrow = vli_sub(result, left, right, ndigits);
  314. /* In this case, p_result == -diff == (max int) - diff.
  315. * Since -x % d == d - x, we can get the correct result from
  316. * result + mod (with overflow).
  317. */
  318. if (borrow)
  319. vli_add(result, result, mod, ndigits);
  320. }
  321. /* Computes p_result = p_product % curve_p.
  322. * See algorithm 5 and 6 from
  323. * http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf
  324. */
  325. static void vli_mmod_fast_192(u64 *result, const u64 *product,
  326. const u64 *curve_prime, u64 *tmp)
  327. {
  328. const unsigned int ndigits = 3;
  329. int carry;
  330. vli_set(result, product, ndigits);
  331. vli_set(tmp, &product[3], ndigits);
  332. carry = vli_add(result, result, tmp, ndigits);
  333. tmp[0] = 0;
  334. tmp[1] = product[3];
  335. tmp[2] = product[4];
  336. carry += vli_add(result, result, tmp, ndigits);
  337. tmp[0] = tmp[1] = product[5];
  338. tmp[2] = 0;
  339. carry += vli_add(result, result, tmp, ndigits);
  340. while (carry || vli_cmp(curve_prime, result, ndigits) != 1)
  341. carry -= vli_sub(result, result, curve_prime, ndigits);
  342. }
  343. /* Computes result = product % curve_prime
  344. * from http://www.nsa.gov/ia/_files/nist-routines.pdf
  345. */
  346. static void vli_mmod_fast_256(u64 *result, const u64 *product,
  347. const u64 *curve_prime, u64 *tmp)
  348. {
  349. int carry;
  350. const unsigned int ndigits = 4;
  351. /* t */
  352. vli_set(result, product, ndigits);
  353. /* s1 */
  354. tmp[0] = 0;
  355. tmp[1] = product[5] & 0xffffffff00000000ull;
  356. tmp[2] = product[6];
  357. tmp[3] = product[7];
  358. carry = vli_lshift(tmp, tmp, 1, ndigits);
  359. carry += vli_add(result, result, tmp, ndigits);
  360. /* s2 */
  361. tmp[1] = product[6] << 32;
  362. tmp[2] = (product[6] >> 32) | (product[7] << 32);
  363. tmp[3] = product[7] >> 32;
  364. carry += vli_lshift(tmp, tmp, 1, ndigits);
  365. carry += vli_add(result, result, tmp, ndigits);
  366. /* s3 */
  367. tmp[0] = product[4];
  368. tmp[1] = product[5] & 0xffffffff;
  369. tmp[2] = 0;
  370. tmp[3] = product[7];
  371. carry += vli_add(result, result, tmp, ndigits);
  372. /* s4 */
  373. tmp[0] = (product[4] >> 32) | (product[5] << 32);
  374. tmp[1] = (product[5] >> 32) | (product[6] & 0xffffffff00000000ull);
  375. tmp[2] = product[7];
  376. tmp[3] = (product[6] >> 32) | (product[4] << 32);
  377. carry += vli_add(result, result, tmp, ndigits);
  378. /* d1 */
  379. tmp[0] = (product[5] >> 32) | (product[6] << 32);
  380. tmp[1] = (product[6] >> 32);
  381. tmp[2] = 0;
  382. tmp[3] = (product[4] & 0xffffffff) | (product[5] << 32);
  383. carry -= vli_sub(result, result, tmp, ndigits);
  384. /* d2 */
  385. tmp[0] = product[6];
  386. tmp[1] = product[7];
  387. tmp[2] = 0;
  388. tmp[3] = (product[4] >> 32) | (product[5] & 0xffffffff00000000ull);
  389. carry -= vli_sub(result, result, tmp, ndigits);
  390. /* d3 */
  391. tmp[0] = (product[6] >> 32) | (product[7] << 32);
  392. tmp[1] = (product[7] >> 32) | (product[4] << 32);
  393. tmp[2] = (product[4] >> 32) | (product[5] << 32);
  394. tmp[3] = (product[6] << 32);
  395. carry -= vli_sub(result, result, tmp, ndigits);
  396. /* d4 */
  397. tmp[0] = product[7];
  398. tmp[1] = product[4] & 0xffffffff00000000ull;
  399. tmp[2] = product[5];
  400. tmp[3] = product[6] & 0xffffffff00000000ull;
  401. carry -= vli_sub(result, result, tmp, ndigits);
  402. if (carry < 0) {
  403. do {
  404. carry += vli_add(result, result, curve_prime, ndigits);
  405. } while (carry < 0);
  406. } else {
  407. while (carry || vli_cmp(curve_prime, result, ndigits) != 1)
  408. carry -= vli_sub(result, result, curve_prime, ndigits);
  409. }
  410. }
  411. /* Computes result = product % curve_prime
  412. * from http://www.nsa.gov/ia/_files/nist-routines.pdf
  413. */
  414. static bool vli_mmod_fast(u64 *result, u64 *product,
  415. const u64 *curve_prime, unsigned int ndigits)
  416. {
  417. u64 tmp[2 * ndigits];
  418. switch (ndigits) {
  419. case 3:
  420. vli_mmod_fast_192(result, product, curve_prime, tmp);
  421. break;
  422. case 4:
  423. vli_mmod_fast_256(result, product, curve_prime, tmp);
  424. break;
  425. default:
  426. pr_err("unsupports digits size!\n");
  427. return false;
  428. }
  429. return true;
  430. }
  431. /* Computes result = (left * right) % curve_prime. */
  432. static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right,
  433. const u64 *curve_prime, unsigned int ndigits)
  434. {
  435. u64 product[2 * ndigits];
  436. vli_mult(product, left, right, ndigits);
  437. vli_mmod_fast(result, product, curve_prime, ndigits);
  438. }
  439. /* Computes result = left^2 % curve_prime. */
  440. static void vli_mod_square_fast(u64 *result, const u64 *left,
  441. const u64 *curve_prime, unsigned int ndigits)
  442. {
  443. u64 product[2 * ndigits];
  444. vli_square(product, left, ndigits);
  445. vli_mmod_fast(result, product, curve_prime, ndigits);
  446. }
  447. #define EVEN(vli) (!(vli[0] & 1))
  448. /* Computes result = (1 / p_input) % mod. All VLIs are the same size.
  449. * See "From Euclid's GCD to Montgomery Multiplication to the Great Divide"
  450. * https://labs.oracle.com/techrep/2001/smli_tr-2001-95.pdf
  451. */
  452. static void vli_mod_inv(u64 *result, const u64 *input, const u64 *mod,
  453. unsigned int ndigits)
  454. {
  455. u64 a[ndigits], b[ndigits];
  456. u64 u[ndigits], v[ndigits];
  457. u64 carry;
  458. int cmp_result;
  459. if (vli_is_zero(input, ndigits)) {
  460. vli_clear(result, ndigits);
  461. return;
  462. }
  463. vli_set(a, input, ndigits);
  464. vli_set(b, mod, ndigits);
  465. vli_clear(u, ndigits);
  466. u[0] = 1;
  467. vli_clear(v, ndigits);
  468. while ((cmp_result = vli_cmp(a, b, ndigits)) != 0) {
  469. carry = 0;
  470. if (EVEN(a)) {
  471. vli_rshift1(a, ndigits);
  472. if (!EVEN(u))
  473. carry = vli_add(u, u, mod, ndigits);
  474. vli_rshift1(u, ndigits);
  475. if (carry)
  476. u[ndigits - 1] |= 0x8000000000000000ull;
  477. } else if (EVEN(b)) {
  478. vli_rshift1(b, ndigits);
  479. if (!EVEN(v))
  480. carry = vli_add(v, v, mod, ndigits);
  481. vli_rshift1(v, ndigits);
  482. if (carry)
  483. v[ndigits - 1] |= 0x8000000000000000ull;
  484. } else if (cmp_result > 0) {
  485. vli_sub(a, a, b, ndigits);
  486. vli_rshift1(a, ndigits);
  487. if (vli_cmp(u, v, ndigits) < 0)
  488. vli_add(u, u, mod, ndigits);
  489. vli_sub(u, u, v, ndigits);
  490. if (!EVEN(u))
  491. carry = vli_add(u, u, mod, ndigits);
  492. vli_rshift1(u, ndigits);
  493. if (carry)
  494. u[ndigits - 1] |= 0x8000000000000000ull;
  495. } else {
  496. vli_sub(b, b, a, ndigits);
  497. vli_rshift1(b, ndigits);
  498. if (vli_cmp(v, u, ndigits) < 0)
  499. vli_add(v, v, mod, ndigits);
  500. vli_sub(v, v, u, ndigits);
  501. if (!EVEN(v))
  502. carry = vli_add(v, v, mod, ndigits);
  503. vli_rshift1(v, ndigits);
  504. if (carry)
  505. v[ndigits - 1] |= 0x8000000000000000ull;
  506. }
  507. }
  508. vli_set(result, u, ndigits);
  509. }
  510. /* ------ Point operations ------ */
  511. /* Returns true if p_point is the point at infinity, false otherwise. */
  512. static bool ecc_point_is_zero(const struct ecc_point *point)
  513. {
  514. return (vli_is_zero(point->x, point->ndigits) &&
  515. vli_is_zero(point->y, point->ndigits));
  516. }
  517. /* Point multiplication algorithm using Montgomery's ladder with co-Z
  518. * coordinates. From http://eprint.iacr.org/2011/338.pdf
  519. */
  520. /* Double in place */
  521. static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1,
  522. u64 *curve_prime, unsigned int ndigits)
  523. {
  524. /* t1 = x, t2 = y, t3 = z */
  525. u64 t4[ndigits];
  526. u64 t5[ndigits];
  527. if (vli_is_zero(z1, ndigits))
  528. return;
  529. /* t4 = y1^2 */
  530. vli_mod_square_fast(t4, y1, curve_prime, ndigits);
  531. /* t5 = x1*y1^2 = A */
  532. vli_mod_mult_fast(t5, x1, t4, curve_prime, ndigits);
  533. /* t4 = y1^4 */
  534. vli_mod_square_fast(t4, t4, curve_prime, ndigits);
  535. /* t2 = y1*z1 = z3 */
  536. vli_mod_mult_fast(y1, y1, z1, curve_prime, ndigits);
  537. /* t3 = z1^2 */
  538. vli_mod_square_fast(z1, z1, curve_prime, ndigits);
  539. /* t1 = x1 + z1^2 */
  540. vli_mod_add(x1, x1, z1, curve_prime, ndigits);
  541. /* t3 = 2*z1^2 */
  542. vli_mod_add(z1, z1, z1, curve_prime, ndigits);
  543. /* t3 = x1 - z1^2 */
  544. vli_mod_sub(z1, x1, z1, curve_prime, ndigits);
  545. /* t1 = x1^2 - z1^4 */
  546. vli_mod_mult_fast(x1, x1, z1, curve_prime, ndigits);
  547. /* t3 = 2*(x1^2 - z1^4) */
  548. vli_mod_add(z1, x1, x1, curve_prime, ndigits);
  549. /* t1 = 3*(x1^2 - z1^4) */
  550. vli_mod_add(x1, x1, z1, curve_prime, ndigits);
  551. if (vli_test_bit(x1, 0)) {
  552. u64 carry = vli_add(x1, x1, curve_prime, ndigits);
  553. vli_rshift1(x1, ndigits);
  554. x1[ndigits - 1] |= carry << 63;
  555. } else {
  556. vli_rshift1(x1, ndigits);
  557. }
  558. /* t1 = 3/2*(x1^2 - z1^4) = B */
  559. /* t3 = B^2 */
  560. vli_mod_square_fast(z1, x1, curve_prime, ndigits);
  561. /* t3 = B^2 - A */
  562. vli_mod_sub(z1, z1, t5, curve_prime, ndigits);
  563. /* t3 = B^2 - 2A = x3 */
  564. vli_mod_sub(z1, z1, t5, curve_prime, ndigits);
  565. /* t5 = A - x3 */
  566. vli_mod_sub(t5, t5, z1, curve_prime, ndigits);
  567. /* t1 = B * (A - x3) */
  568. vli_mod_mult_fast(x1, x1, t5, curve_prime, ndigits);
  569. /* t4 = B * (A - x3) - y1^4 = y3 */
  570. vli_mod_sub(t4, x1, t4, curve_prime, ndigits);
  571. vli_set(x1, z1, ndigits);
  572. vli_set(z1, y1, ndigits);
  573. vli_set(y1, t4, ndigits);
  574. }
  575. /* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */
  576. static void apply_z(u64 *x1, u64 *y1, u64 *z, u64 *curve_prime,
  577. unsigned int ndigits)
  578. {
  579. u64 t1[ndigits];
  580. vli_mod_square_fast(t1, z, curve_prime, ndigits); /* z^2 */
  581. vli_mod_mult_fast(x1, x1, t1, curve_prime, ndigits); /* x1 * z^2 */
  582. vli_mod_mult_fast(t1, t1, z, curve_prime, ndigits); /* z^3 */
  583. vli_mod_mult_fast(y1, y1, t1, curve_prime, ndigits); /* y1 * z^3 */
  584. }
  585. /* P = (x1, y1) => 2P, (x2, y2) => P' */
  586. static void xycz_initial_double(u64 *x1, u64 *y1, u64 *x2, u64 *y2,
  587. u64 *p_initial_z, u64 *curve_prime,
  588. unsigned int ndigits)
  589. {
  590. u64 z[ndigits];
  591. vli_set(x2, x1, ndigits);
  592. vli_set(y2, y1, ndigits);
  593. vli_clear(z, ndigits);
  594. z[0] = 1;
  595. if (p_initial_z)
  596. vli_set(z, p_initial_z, ndigits);
  597. apply_z(x1, y1, z, curve_prime, ndigits);
  598. ecc_point_double_jacobian(x1, y1, z, curve_prime, ndigits);
  599. apply_z(x2, y2, z, curve_prime, ndigits);
  600. }
  601. /* Input P = (x1, y1, Z), Q = (x2, y2, Z)
  602. * Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3)
  603. * or P => P', Q => P + Q
  604. */
  605. static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime,
  606. unsigned int ndigits)
  607. {
  608. /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
  609. u64 t5[ndigits];
  610. /* t5 = x2 - x1 */
  611. vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
  612. /* t5 = (x2 - x1)^2 = A */
  613. vli_mod_square_fast(t5, t5, curve_prime, ndigits);
  614. /* t1 = x1*A = B */
  615. vli_mod_mult_fast(x1, x1, t5, curve_prime, ndigits);
  616. /* t3 = x2*A = C */
  617. vli_mod_mult_fast(x2, x2, t5, curve_prime, ndigits);
  618. /* t4 = y2 - y1 */
  619. vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
  620. /* t5 = (y2 - y1)^2 = D */
  621. vli_mod_square_fast(t5, y2, curve_prime, ndigits);
  622. /* t5 = D - B */
  623. vli_mod_sub(t5, t5, x1, curve_prime, ndigits);
  624. /* t5 = D - B - C = x3 */
  625. vli_mod_sub(t5, t5, x2, curve_prime, ndigits);
  626. /* t3 = C - B */
  627. vli_mod_sub(x2, x2, x1, curve_prime, ndigits);
  628. /* t2 = y1*(C - B) */
  629. vli_mod_mult_fast(y1, y1, x2, curve_prime, ndigits);
  630. /* t3 = B - x3 */
  631. vli_mod_sub(x2, x1, t5, curve_prime, ndigits);
  632. /* t4 = (y2 - y1)*(B - x3) */
  633. vli_mod_mult_fast(y2, y2, x2, curve_prime, ndigits);
  634. /* t4 = y3 */
  635. vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
  636. vli_set(x2, t5, ndigits);
  637. }
  638. /* Input P = (x1, y1, Z), Q = (x2, y2, Z)
  639. * Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3)
  640. * or P => P - Q, Q => P + Q
  641. */
  642. static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime,
  643. unsigned int ndigits)
  644. {
  645. /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
  646. u64 t5[ndigits];
  647. u64 t6[ndigits];
  648. u64 t7[ndigits];
  649. /* t5 = x2 - x1 */
  650. vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
  651. /* t5 = (x2 - x1)^2 = A */
  652. vli_mod_square_fast(t5, t5, curve_prime, ndigits);
  653. /* t1 = x1*A = B */
  654. vli_mod_mult_fast(x1, x1, t5, curve_prime, ndigits);
  655. /* t3 = x2*A = C */
  656. vli_mod_mult_fast(x2, x2, t5, curve_prime, ndigits);
  657. /* t4 = y2 + y1 */
  658. vli_mod_add(t5, y2, y1, curve_prime, ndigits);
  659. /* t4 = y2 - y1 */
  660. vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
  661. /* t6 = C - B */
  662. vli_mod_sub(t6, x2, x1, curve_prime, ndigits);
  663. /* t2 = y1 * (C - B) */
  664. vli_mod_mult_fast(y1, y1, t6, curve_prime, ndigits);
  665. /* t6 = B + C */
  666. vli_mod_add(t6, x1, x2, curve_prime, ndigits);
  667. /* t3 = (y2 - y1)^2 */
  668. vli_mod_square_fast(x2, y2, curve_prime, ndigits);
  669. /* t3 = x3 */
  670. vli_mod_sub(x2, x2, t6, curve_prime, ndigits);
  671. /* t7 = B - x3 */
  672. vli_mod_sub(t7, x1, x2, curve_prime, ndigits);
  673. /* t4 = (y2 - y1)*(B - x3) */
  674. vli_mod_mult_fast(y2, y2, t7, curve_prime, ndigits);
  675. /* t4 = y3 */
  676. vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
  677. /* t7 = (y2 + y1)^2 = F */
  678. vli_mod_square_fast(t7, t5, curve_prime, ndigits);
  679. /* t7 = x3' */
  680. vli_mod_sub(t7, t7, t6, curve_prime, ndigits);
  681. /* t6 = x3' - B */
  682. vli_mod_sub(t6, t7, x1, curve_prime, ndigits);
  683. /* t6 = (y2 + y1)*(x3' - B) */
  684. vli_mod_mult_fast(t6, t6, t5, curve_prime, ndigits);
  685. /* t2 = y3' */
  686. vli_mod_sub(y1, t6, y1, curve_prime, ndigits);
  687. vli_set(x1, t7, ndigits);
  688. }
  689. static void ecc_point_mult(struct ecc_point *result,
  690. const struct ecc_point *point, const u64 *scalar,
  691. u64 *initial_z, u64 *curve_prime,
  692. unsigned int ndigits)
  693. {
  694. /* R0 and R1 */
  695. u64 rx[2][ndigits];
  696. u64 ry[2][ndigits];
  697. u64 z[ndigits];
  698. int i, nb;
  699. int num_bits = vli_num_bits(scalar, ndigits);
  700. vli_set(rx[1], point->x, ndigits);
  701. vli_set(ry[1], point->y, ndigits);
  702. xycz_initial_double(rx[1], ry[1], rx[0], ry[0], initial_z, curve_prime,
  703. ndigits);
  704. for (i = num_bits - 2; i > 0; i--) {
  705. nb = !vli_test_bit(scalar, i);
  706. xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve_prime,
  707. ndigits);
  708. xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve_prime,
  709. ndigits);
  710. }
  711. nb = !vli_test_bit(scalar, 0);
  712. xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve_prime,
  713. ndigits);
  714. /* Find final 1/Z value. */
  715. /* X1 - X0 */
  716. vli_mod_sub(z, rx[1], rx[0], curve_prime, ndigits);
  717. /* Yb * (X1 - X0) */
  718. vli_mod_mult_fast(z, z, ry[1 - nb], curve_prime, ndigits);
  719. /* xP * Yb * (X1 - X0) */
  720. vli_mod_mult_fast(z, z, point->x, curve_prime, ndigits);
  721. /* 1 / (xP * Yb * (X1 - X0)) */
  722. vli_mod_inv(z, z, curve_prime, point->ndigits);
  723. /* yP / (xP * Yb * (X1 - X0)) */
  724. vli_mod_mult_fast(z, z, point->y, curve_prime, ndigits);
  725. /* Xb * yP / (xP * Yb * (X1 - X0)) */
  726. vli_mod_mult_fast(z, z, rx[1 - nb], curve_prime, ndigits);
  727. /* End 1/Z calculation */
  728. xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve_prime, ndigits);
  729. apply_z(rx[0], ry[0], z, curve_prime, ndigits);
  730. vli_set(result->x, rx[0], ndigits);
  731. vli_set(result->y, ry[0], ndigits);
  732. }
  733. static inline void ecc_swap_digits(const u64 *in, u64 *out,
  734. unsigned int ndigits)
  735. {
  736. int i;
  737. for (i = 0; i < ndigits; i++)
  738. out[i] = __swab64(in[ndigits - 1 - i]);
  739. }
  740. int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
  741. const u8 *private_key, unsigned int private_key_len)
  742. {
  743. int nbytes;
  744. const struct ecc_curve *curve = ecc_get_curve(curve_id);
  745. if (!private_key)
  746. return -EINVAL;
  747. nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
  748. if (private_key_len != nbytes)
  749. return -EINVAL;
  750. if (vli_is_zero((const u64 *)&private_key[0], ndigits))
  751. return -EINVAL;
  752. /* Make sure the private key is in the range [1, n-1]. */
  753. if (vli_cmp(curve->n, (const u64 *)&private_key[0], ndigits) != 1)
  754. return -EINVAL;
  755. return 0;
  756. }
  757. int ecdh_make_pub_key(unsigned int curve_id, unsigned int ndigits,
  758. const u8 *private_key, unsigned int private_key_len,
  759. u8 *public_key, unsigned int public_key_len)
  760. {
  761. int ret = 0;
  762. struct ecc_point *pk;
  763. u64 priv[ndigits];
  764. unsigned int nbytes;
  765. const struct ecc_curve *curve = ecc_get_curve(curve_id);
  766. if (!private_key || !curve) {
  767. ret = -EINVAL;
  768. goto out;
  769. }
  770. ecc_swap_digits((const u64 *)private_key, priv, ndigits);
  771. pk = ecc_alloc_point(ndigits);
  772. if (!pk) {
  773. ret = -ENOMEM;
  774. goto out;
  775. }
  776. ecc_point_mult(pk, &curve->g, priv, NULL, curve->p, ndigits);
  777. if (ecc_point_is_zero(pk)) {
  778. ret = -EAGAIN;
  779. goto err_free_point;
  780. }
  781. nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
  782. ecc_swap_digits(pk->x, (u64 *)public_key, ndigits);
  783. ecc_swap_digits(pk->y, (u64 *)&public_key[nbytes], ndigits);
  784. err_free_point:
  785. ecc_free_point(pk);
  786. out:
  787. return ret;
  788. }
  789. int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
  790. const u8 *private_key, unsigned int private_key_len,
  791. const u8 *public_key, unsigned int public_key_len,
  792. u8 *secret, unsigned int secret_len)
  793. {
  794. int ret = 0;
  795. struct ecc_point *product, *pk;
  796. u64 priv[ndigits];
  797. u64 rand_z[ndigits];
  798. unsigned int nbytes;
  799. const struct ecc_curve *curve = ecc_get_curve(curve_id);
  800. if (!private_key || !public_key || !curve) {
  801. ret = -EINVAL;
  802. goto out;
  803. }
  804. nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
  805. get_random_bytes(rand_z, nbytes);
  806. pk = ecc_alloc_point(ndigits);
  807. if (!pk) {
  808. ret = -ENOMEM;
  809. goto out;
  810. }
  811. product = ecc_alloc_point(ndigits);
  812. if (!product) {
  813. ret = -ENOMEM;
  814. goto err_alloc_product;
  815. }
  816. ecc_swap_digits((const u64 *)public_key, pk->x, ndigits);
  817. ecc_swap_digits((const u64 *)&public_key[nbytes], pk->y, ndigits);
  818. ecc_swap_digits((const u64 *)private_key, priv, ndigits);
  819. ecc_point_mult(product, pk, priv, rand_z, curve->p, ndigits);
  820. ecc_swap_digits(product->x, (u64 *)secret, ndigits);
  821. if (ecc_point_is_zero(product))
  822. ret = -EFAULT;
  823. ecc_free_point(product);
  824. err_alloc_product:
  825. ecc_free_point(pk);
  826. out:
  827. return ret;
  828. }