padlock-sha.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578
  1. /*
  2. * Cryptographic API.
  3. *
  4. * Support for VIA PadLock hardware crypto engine.
  5. *
  6. * Copyright (c) 2006 Michal Ludvig <michal@logix.cz>
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 2 of the License, or
  11. * (at your option) any later version.
  12. *
  13. */
  14. #include <crypto/internal/hash.h>
  15. #include <crypto/padlock.h>
  16. #include <crypto/sha.h>
  17. #include <linux/err.h>
  18. #include <linux/module.h>
  19. #include <linux/init.h>
  20. #include <linux/errno.h>
  21. #include <linux/interrupt.h>
  22. #include <linux/kernel.h>
  23. #include <linux/scatterlist.h>
  24. #include <asm/cpu_device_id.h>
  25. #include <asm/fpu/api.h>
  26. struct padlock_sha_desc {
  27. struct shash_desc fallback;
  28. };
  29. struct padlock_sha_ctx {
  30. struct crypto_shash *fallback;
  31. };
  32. static int padlock_sha_init(struct shash_desc *desc)
  33. {
  34. struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
  35. struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
  36. dctx->fallback.tfm = ctx->fallback;
  37. dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
  38. return crypto_shash_init(&dctx->fallback);
  39. }
  40. static int padlock_sha_update(struct shash_desc *desc,
  41. const u8 *data, unsigned int length)
  42. {
  43. struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
  44. dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
  45. return crypto_shash_update(&dctx->fallback, data, length);
  46. }
  47. static int padlock_sha_export(struct shash_desc *desc, void *out)
  48. {
  49. struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
  50. return crypto_shash_export(&dctx->fallback, out);
  51. }
  52. static int padlock_sha_import(struct shash_desc *desc, const void *in)
  53. {
  54. struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
  55. struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
  56. dctx->fallback.tfm = ctx->fallback;
  57. dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
  58. return crypto_shash_import(&dctx->fallback, in);
  59. }
  60. static inline void padlock_output_block(uint32_t *src,
  61. uint32_t *dst, size_t count)
  62. {
  63. while (count--)
  64. *dst++ = swab32(*src++);
  65. }
  66. static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in,
  67. unsigned int count, u8 *out)
  68. {
  69. /* We can't store directly to *out as it may be unaligned. */
  70. /* BTW Don't reduce the buffer size below 128 Bytes!
  71. * PadLock microcode needs it that big. */
  72. char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
  73. ((aligned(STACK_ALIGN)));
  74. char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
  75. struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
  76. struct sha1_state state;
  77. unsigned int space;
  78. unsigned int leftover;
  79. int err;
  80. dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
  81. err = crypto_shash_export(&dctx->fallback, &state);
  82. if (err)
  83. goto out;
  84. if (state.count + count > ULONG_MAX)
  85. return crypto_shash_finup(&dctx->fallback, in, count, out);
  86. leftover = ((state.count - 1) & (SHA1_BLOCK_SIZE - 1)) + 1;
  87. space = SHA1_BLOCK_SIZE - leftover;
  88. if (space) {
  89. if (count > space) {
  90. err = crypto_shash_update(&dctx->fallback, in, space) ?:
  91. crypto_shash_export(&dctx->fallback, &state);
  92. if (err)
  93. goto out;
  94. count -= space;
  95. in += space;
  96. } else {
  97. memcpy(state.buffer + leftover, in, count);
  98. in = state.buffer;
  99. count += leftover;
  100. state.count &= ~(SHA1_BLOCK_SIZE - 1);
  101. }
  102. }
  103. memcpy(result, &state.state, SHA1_DIGEST_SIZE);
  104. asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
  105. : \
  106. : "c"((unsigned long)state.count + count), \
  107. "a"((unsigned long)state.count), \
  108. "S"(in), "D"(result));
  109. padlock_output_block((uint32_t *)result, (uint32_t *)out, 5);
  110. out:
  111. return err;
  112. }
  113. static int padlock_sha1_final(struct shash_desc *desc, u8 *out)
  114. {
  115. u8 buf[4];
  116. return padlock_sha1_finup(desc, buf, 0, out);
  117. }
  118. static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,
  119. unsigned int count, u8 *out)
  120. {
  121. /* We can't store directly to *out as it may be unaligned. */
  122. /* BTW Don't reduce the buffer size below 128 Bytes!
  123. * PadLock microcode needs it that big. */
  124. char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
  125. ((aligned(STACK_ALIGN)));
  126. char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
  127. struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
  128. struct sha256_state state;
  129. unsigned int space;
  130. unsigned int leftover;
  131. int err;
  132. dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
  133. err = crypto_shash_export(&dctx->fallback, &state);
  134. if (err)
  135. goto out;
  136. if (state.count + count > ULONG_MAX)
  137. return crypto_shash_finup(&dctx->fallback, in, count, out);
  138. leftover = ((state.count - 1) & (SHA256_BLOCK_SIZE - 1)) + 1;
  139. space = SHA256_BLOCK_SIZE - leftover;
  140. if (space) {
  141. if (count > space) {
  142. err = crypto_shash_update(&dctx->fallback, in, space) ?:
  143. crypto_shash_export(&dctx->fallback, &state);
  144. if (err)
  145. goto out;
  146. count -= space;
  147. in += space;
  148. } else {
  149. memcpy(state.buf + leftover, in, count);
  150. in = state.buf;
  151. count += leftover;
  152. state.count &= ~(SHA1_BLOCK_SIZE - 1);
  153. }
  154. }
  155. memcpy(result, &state.state, SHA256_DIGEST_SIZE);
  156. asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
  157. : \
  158. : "c"((unsigned long)state.count + count), \
  159. "a"((unsigned long)state.count), \
  160. "S"(in), "D"(result));
  161. padlock_output_block((uint32_t *)result, (uint32_t *)out, 8);
  162. out:
  163. return err;
  164. }
  165. static int padlock_sha256_final(struct shash_desc *desc, u8 *out)
  166. {
  167. u8 buf[4];
  168. return padlock_sha256_finup(desc, buf, 0, out);
  169. }
  170. static int padlock_cra_init(struct crypto_tfm *tfm)
  171. {
  172. struct crypto_shash *hash = __crypto_shash_cast(tfm);
  173. const char *fallback_driver_name = crypto_tfm_alg_name(tfm);
  174. struct padlock_sha_ctx *ctx = crypto_tfm_ctx(tfm);
  175. struct crypto_shash *fallback_tfm;
  176. int err = -ENOMEM;
  177. /* Allocate a fallback and abort if it failed. */
  178. fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0,
  179. CRYPTO_ALG_NEED_FALLBACK);
  180. if (IS_ERR(fallback_tfm)) {
  181. printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n",
  182. fallback_driver_name);
  183. err = PTR_ERR(fallback_tfm);
  184. goto out;
  185. }
  186. ctx->fallback = fallback_tfm;
  187. hash->descsize += crypto_shash_descsize(fallback_tfm);
  188. return 0;
  189. out:
  190. return err;
  191. }
  192. static void padlock_cra_exit(struct crypto_tfm *tfm)
  193. {
  194. struct padlock_sha_ctx *ctx = crypto_tfm_ctx(tfm);
  195. crypto_free_shash(ctx->fallback);
  196. }
  197. static struct shash_alg sha1_alg = {
  198. .digestsize = SHA1_DIGEST_SIZE,
  199. .init = padlock_sha_init,
  200. .update = padlock_sha_update,
  201. .finup = padlock_sha1_finup,
  202. .final = padlock_sha1_final,
  203. .export = padlock_sha_export,
  204. .import = padlock_sha_import,
  205. .descsize = sizeof(struct padlock_sha_desc),
  206. .statesize = sizeof(struct sha1_state),
  207. .base = {
  208. .cra_name = "sha1",
  209. .cra_driver_name = "sha1-padlock",
  210. .cra_priority = PADLOCK_CRA_PRIORITY,
  211. .cra_flags = CRYPTO_ALG_NEED_FALLBACK,
  212. .cra_blocksize = SHA1_BLOCK_SIZE,
  213. .cra_ctxsize = sizeof(struct padlock_sha_ctx),
  214. .cra_module = THIS_MODULE,
  215. .cra_init = padlock_cra_init,
  216. .cra_exit = padlock_cra_exit,
  217. }
  218. };
  219. static struct shash_alg sha256_alg = {
  220. .digestsize = SHA256_DIGEST_SIZE,
  221. .init = padlock_sha_init,
  222. .update = padlock_sha_update,
  223. .finup = padlock_sha256_finup,
  224. .final = padlock_sha256_final,
  225. .export = padlock_sha_export,
  226. .import = padlock_sha_import,
  227. .descsize = sizeof(struct padlock_sha_desc),
  228. .statesize = sizeof(struct sha256_state),
  229. .base = {
  230. .cra_name = "sha256",
  231. .cra_driver_name = "sha256-padlock",
  232. .cra_priority = PADLOCK_CRA_PRIORITY,
  233. .cra_flags = CRYPTO_ALG_NEED_FALLBACK,
  234. .cra_blocksize = SHA256_BLOCK_SIZE,
  235. .cra_ctxsize = sizeof(struct padlock_sha_ctx),
  236. .cra_module = THIS_MODULE,
  237. .cra_init = padlock_cra_init,
  238. .cra_exit = padlock_cra_exit,
  239. }
  240. };
  241. /* Add two shash_alg instance for hardware-implemented *
  242. * multiple-parts hash supported by VIA Nano Processor.*/
  243. static int padlock_sha1_init_nano(struct shash_desc *desc)
  244. {
  245. struct sha1_state *sctx = shash_desc_ctx(desc);
  246. *sctx = (struct sha1_state){
  247. .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
  248. };
  249. return 0;
  250. }
  251. static int padlock_sha1_update_nano(struct shash_desc *desc,
  252. const u8 *data, unsigned int len)
  253. {
  254. struct sha1_state *sctx = shash_desc_ctx(desc);
  255. unsigned int partial, done;
  256. const u8 *src;
  257. /*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
  258. u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
  259. ((aligned(STACK_ALIGN)));
  260. u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
  261. partial = sctx->count & 0x3f;
  262. sctx->count += len;
  263. done = 0;
  264. src = data;
  265. memcpy(dst, (u8 *)(sctx->state), SHA1_DIGEST_SIZE);
  266. if ((partial + len) >= SHA1_BLOCK_SIZE) {
  267. /* Append the bytes in state's buffer to a block to handle */
  268. if (partial) {
  269. done = -partial;
  270. memcpy(sctx->buffer + partial, data,
  271. done + SHA1_BLOCK_SIZE);
  272. src = sctx->buffer;
  273. asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
  274. : "+S"(src), "+D"(dst) \
  275. : "a"((long)-1), "c"((unsigned long)1));
  276. done += SHA1_BLOCK_SIZE;
  277. src = data + done;
  278. }
  279. /* Process the left bytes from the input data */
  280. if (len - done >= SHA1_BLOCK_SIZE) {
  281. asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
  282. : "+S"(src), "+D"(dst)
  283. : "a"((long)-1),
  284. "c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE)));
  285. done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE);
  286. src = data + done;
  287. }
  288. partial = 0;
  289. }
  290. memcpy((u8 *)(sctx->state), dst, SHA1_DIGEST_SIZE);
  291. memcpy(sctx->buffer + partial, src, len - done);
  292. return 0;
  293. }
  294. static int padlock_sha1_final_nano(struct shash_desc *desc, u8 *out)
  295. {
  296. struct sha1_state *state = (struct sha1_state *)shash_desc_ctx(desc);
  297. unsigned int partial, padlen;
  298. __be64 bits;
  299. static const u8 padding[64] = { 0x80, };
  300. bits = cpu_to_be64(state->count << 3);
  301. /* Pad out to 56 mod 64 */
  302. partial = state->count & 0x3f;
  303. padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
  304. padlock_sha1_update_nano(desc, padding, padlen);
  305. /* Append length field bytes */
  306. padlock_sha1_update_nano(desc, (const u8 *)&bits, sizeof(bits));
  307. /* Swap to output */
  308. padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 5);
  309. return 0;
  310. }
  311. static int padlock_sha256_init_nano(struct shash_desc *desc)
  312. {
  313. struct sha256_state *sctx = shash_desc_ctx(desc);
  314. *sctx = (struct sha256_state){
  315. .state = { SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, \
  316. SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7},
  317. };
  318. return 0;
  319. }
  320. static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data,
  321. unsigned int len)
  322. {
  323. struct sha256_state *sctx = shash_desc_ctx(desc);
  324. unsigned int partial, done;
  325. const u8 *src;
  326. /*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
  327. u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
  328. ((aligned(STACK_ALIGN)));
  329. u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
  330. partial = sctx->count & 0x3f;
  331. sctx->count += len;
  332. done = 0;
  333. src = data;
  334. memcpy(dst, (u8 *)(sctx->state), SHA256_DIGEST_SIZE);
  335. if ((partial + len) >= SHA256_BLOCK_SIZE) {
  336. /* Append the bytes in state's buffer to a block to handle */
  337. if (partial) {
  338. done = -partial;
  339. memcpy(sctx->buf + partial, data,
  340. done + SHA256_BLOCK_SIZE);
  341. src = sctx->buf;
  342. asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
  343. : "+S"(src), "+D"(dst)
  344. : "a"((long)-1), "c"((unsigned long)1));
  345. done += SHA256_BLOCK_SIZE;
  346. src = data + done;
  347. }
  348. /* Process the left bytes from input data*/
  349. if (len - done >= SHA256_BLOCK_SIZE) {
  350. asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
  351. : "+S"(src), "+D"(dst)
  352. : "a"((long)-1),
  353. "c"((unsigned long)((len - done) / 64)));
  354. done += ((len - done) - (len - done) % 64);
  355. src = data + done;
  356. }
  357. partial = 0;
  358. }
  359. memcpy((u8 *)(sctx->state), dst, SHA256_DIGEST_SIZE);
  360. memcpy(sctx->buf + partial, src, len - done);
  361. return 0;
  362. }
  363. static int padlock_sha256_final_nano(struct shash_desc *desc, u8 *out)
  364. {
  365. struct sha256_state *state =
  366. (struct sha256_state *)shash_desc_ctx(desc);
  367. unsigned int partial, padlen;
  368. __be64 bits;
  369. static const u8 padding[64] = { 0x80, };
  370. bits = cpu_to_be64(state->count << 3);
  371. /* Pad out to 56 mod 64 */
  372. partial = state->count & 0x3f;
  373. padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
  374. padlock_sha256_update_nano(desc, padding, padlen);
  375. /* Append length field bytes */
  376. padlock_sha256_update_nano(desc, (const u8 *)&bits, sizeof(bits));
  377. /* Swap to output */
  378. padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 8);
  379. return 0;
  380. }
  381. static int padlock_sha_export_nano(struct shash_desc *desc,
  382. void *out)
  383. {
  384. int statesize = crypto_shash_statesize(desc->tfm);
  385. void *sctx = shash_desc_ctx(desc);
  386. memcpy(out, sctx, statesize);
  387. return 0;
  388. }
  389. static int padlock_sha_import_nano(struct shash_desc *desc,
  390. const void *in)
  391. {
  392. int statesize = crypto_shash_statesize(desc->tfm);
  393. void *sctx = shash_desc_ctx(desc);
  394. memcpy(sctx, in, statesize);
  395. return 0;
  396. }
  397. static struct shash_alg sha1_alg_nano = {
  398. .digestsize = SHA1_DIGEST_SIZE,
  399. .init = padlock_sha1_init_nano,
  400. .update = padlock_sha1_update_nano,
  401. .final = padlock_sha1_final_nano,
  402. .export = padlock_sha_export_nano,
  403. .import = padlock_sha_import_nano,
  404. .descsize = sizeof(struct sha1_state),
  405. .statesize = sizeof(struct sha1_state),
  406. .base = {
  407. .cra_name = "sha1",
  408. .cra_driver_name = "sha1-padlock-nano",
  409. .cra_priority = PADLOCK_CRA_PRIORITY,
  410. .cra_blocksize = SHA1_BLOCK_SIZE,
  411. .cra_module = THIS_MODULE,
  412. }
  413. };
  414. static struct shash_alg sha256_alg_nano = {
  415. .digestsize = SHA256_DIGEST_SIZE,
  416. .init = padlock_sha256_init_nano,
  417. .update = padlock_sha256_update_nano,
  418. .final = padlock_sha256_final_nano,
  419. .export = padlock_sha_export_nano,
  420. .import = padlock_sha_import_nano,
  421. .descsize = sizeof(struct sha256_state),
  422. .statesize = sizeof(struct sha256_state),
  423. .base = {
  424. .cra_name = "sha256",
  425. .cra_driver_name = "sha256-padlock-nano",
  426. .cra_priority = PADLOCK_CRA_PRIORITY,
  427. .cra_blocksize = SHA256_BLOCK_SIZE,
  428. .cra_module = THIS_MODULE,
  429. }
  430. };
  431. static const struct x86_cpu_id padlock_sha_ids[] = {
  432. X86_FEATURE_MATCH(X86_FEATURE_PHE),
  433. {}
  434. };
  435. MODULE_DEVICE_TABLE(x86cpu, padlock_sha_ids);
  436. static int __init padlock_init(void)
  437. {
  438. int rc = -ENODEV;
  439. struct cpuinfo_x86 *c = &cpu_data(0);
  440. struct shash_alg *sha1;
  441. struct shash_alg *sha256;
  442. if (!x86_match_cpu(padlock_sha_ids) || !boot_cpu_has(X86_FEATURE_PHE_EN))
  443. return -ENODEV;
  444. /* Register the newly added algorithm module if on *
  445. * VIA Nano processor, or else just do as before */
  446. if (c->x86_model < 0x0f) {
  447. sha1 = &sha1_alg;
  448. sha256 = &sha256_alg;
  449. } else {
  450. sha1 = &sha1_alg_nano;
  451. sha256 = &sha256_alg_nano;
  452. }
  453. rc = crypto_register_shash(sha1);
  454. if (rc)
  455. goto out;
  456. rc = crypto_register_shash(sha256);
  457. if (rc)
  458. goto out_unreg1;
  459. printk(KERN_NOTICE PFX "Using VIA PadLock ACE for SHA1/SHA256 algorithms.\n");
  460. return 0;
  461. out_unreg1:
  462. crypto_unregister_shash(sha1);
  463. out:
  464. printk(KERN_ERR PFX "VIA PadLock SHA1/SHA256 initialization failed.\n");
  465. return rc;
  466. }
  467. static void __exit padlock_fini(void)
  468. {
  469. struct cpuinfo_x86 *c = &cpu_data(0);
  470. if (c->x86_model >= 0x0f) {
  471. crypto_unregister_shash(&sha1_alg_nano);
  472. crypto_unregister_shash(&sha256_alg_nano);
  473. } else {
  474. crypto_unregister_shash(&sha1_alg);
  475. crypto_unregister_shash(&sha256_alg);
  476. }
  477. }
  478. module_init(padlock_init);
  479. module_exit(padlock_fini);
  480. MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support.");
  481. MODULE_LICENSE("GPL");
  482. MODULE_AUTHOR("Michal Ludvig");
  483. MODULE_ALIAS_CRYPTO("sha1-all");
  484. MODULE_ALIAS_CRYPTO("sha256-all");
  485. MODULE_ALIAS_CRYPTO("sha1-padlock");
  486. MODULE_ALIAS_CRYPTO("sha256-padlock");