sun4i-ss-hash.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * sun4i-ss-hash.c - hardware cryptographic accelerator for Allwinner A20 SoC
  4. *
  5. * Copyright (C) 2013-2015 Corentin LABBE <clabbe.montjoie@gmail.com>
  6. *
  7. * This file add support for MD5 and SHA1.
  8. *
  9. * You could find the datasheet in Documentation/arm/sunxi.rst
  10. */
  11. #include "sun4i-ss.h"
  12. #include <linux/scatterlist.h>
  13. /* This is a totally arbitrary value */
  14. #define SS_TIMEOUT 100
  15. int sun4i_hash_crainit(struct crypto_tfm *tfm)
  16. {
  17. struct sun4i_tfm_ctx *op = crypto_tfm_ctx(tfm);
  18. struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg);
  19. struct sun4i_ss_alg_template *algt;
  20. memset(op, 0, sizeof(struct sun4i_tfm_ctx));
  21. algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash);
  22. op->ss = algt->ss;
  23. crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
  24. sizeof(struct sun4i_req_ctx));
  25. return 0;
  26. }
  27. /* sun4i_hash_init: initialize request context */
  28. int sun4i_hash_init(struct ahash_request *areq)
  29. {
  30. struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  31. struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
  32. struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
  33. struct sun4i_ss_alg_template *algt;
  34. memset(op, 0, sizeof(struct sun4i_req_ctx));
  35. algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash);
  36. op->mode = algt->mode;
  37. return 0;
  38. }
  39. int sun4i_hash_export_md5(struct ahash_request *areq, void *out)
  40. {
  41. struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  42. struct md5_state *octx = out;
  43. int i;
  44. octx->byte_count = op->byte_count + op->len;
  45. memcpy(octx->block, op->buf, op->len);
  46. if (op->byte_count) {
  47. for (i = 0; i < 4; i++)
  48. octx->hash[i] = op->hash[i];
  49. } else {
  50. octx->hash[0] = SHA1_H0;
  51. octx->hash[1] = SHA1_H1;
  52. octx->hash[2] = SHA1_H2;
  53. octx->hash[3] = SHA1_H3;
  54. }
  55. return 0;
  56. }
  57. int sun4i_hash_import_md5(struct ahash_request *areq, const void *in)
  58. {
  59. struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  60. const struct md5_state *ictx = in;
  61. int i;
  62. sun4i_hash_init(areq);
  63. op->byte_count = ictx->byte_count & ~0x3F;
  64. op->len = ictx->byte_count & 0x3F;
  65. memcpy(op->buf, ictx->block, op->len);
  66. for (i = 0; i < 4; i++)
  67. op->hash[i] = ictx->hash[i];
  68. return 0;
  69. }
  70. int sun4i_hash_export_sha1(struct ahash_request *areq, void *out)
  71. {
  72. struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  73. struct sha1_state *octx = out;
  74. int i;
  75. octx->count = op->byte_count + op->len;
  76. memcpy(octx->buffer, op->buf, op->len);
  77. if (op->byte_count) {
  78. for (i = 0; i < 5; i++)
  79. octx->state[i] = op->hash[i];
  80. } else {
  81. octx->state[0] = SHA1_H0;
  82. octx->state[1] = SHA1_H1;
  83. octx->state[2] = SHA1_H2;
  84. octx->state[3] = SHA1_H3;
  85. octx->state[4] = SHA1_H4;
  86. }
  87. return 0;
  88. }
  89. int sun4i_hash_import_sha1(struct ahash_request *areq, const void *in)
  90. {
  91. struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  92. const struct sha1_state *ictx = in;
  93. int i;
  94. sun4i_hash_init(areq);
  95. op->byte_count = ictx->count & ~0x3F;
  96. op->len = ictx->count & 0x3F;
  97. memcpy(op->buf, ictx->buffer, op->len);
  98. for (i = 0; i < 5; i++)
  99. op->hash[i] = ictx->state[i];
  100. return 0;
  101. }
  102. #define SS_HASH_UPDATE 1
  103. #define SS_HASH_FINAL 2
  104. /*
  105. * sun4i_hash_update: update hash engine
  106. *
  107. * Could be used for both SHA1 and MD5
  108. * Write data by step of 32bits and put then in the SS.
  109. *
  110. * Since we cannot leave partial data and hash state in the engine,
  111. * we need to get the hash state at the end of this function.
  112. * We can get the hash state every 64 bytes
  113. *
  114. * So the first work is to get the number of bytes to write to SS modulo 64
  115. * The extra bytes will go to a temporary buffer op->buf storing op->len bytes
  116. *
  117. * So at the begin of update()
  118. * if op->len + areq->nbytes < 64
  119. * => all data will be written to wait buffer (op->buf) and end=0
  120. * if not, write all data from op->buf to the device and position end to
  121. * complete to 64bytes
  122. *
  123. * example 1:
  124. * update1 60o => op->len=60
  125. * update2 60o => need one more word to have 64 bytes
  126. * end=4
  127. * so write all data from op->buf and one word of SGs
  128. * write remaining data in op->buf
  129. * final state op->len=56
  130. */
  131. static int sun4i_hash(struct ahash_request *areq)
  132. {
  133. /*
  134. * i is the total bytes read from SGs, to be compared to areq->nbytes
  135. * i is important because we cannot rely on SG length since the sum of
  136. * SG->length could be greater than areq->nbytes
  137. *
  138. * end is the position when we need to stop writing to the device,
  139. * to be compared to i
  140. *
  141. * in_i: advancement in the current SG
  142. */
  143. unsigned int i = 0, end, fill, min_fill, nwait, nbw = 0, j = 0, todo;
  144. unsigned int in_i = 0;
  145. u32 spaces, rx_cnt = SS_RX_DEFAULT, bf[32] = {0}, v, ivmode = 0;
  146. struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  147. struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
  148. struct sun4i_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
  149. struct sun4i_ss_ctx *ss = tfmctx->ss;
  150. struct scatterlist *in_sg = areq->src;
  151. struct sg_mapping_iter mi;
  152. int in_r, err = 0;
  153. size_t copied = 0;
  154. __le32 wb = 0;
  155. dev_dbg(ss->dev, "%s %s bc=%llu len=%u mode=%x wl=%u h0=%0x",
  156. __func__, crypto_tfm_alg_name(areq->base.tfm),
  157. op->byte_count, areq->nbytes, op->mode,
  158. op->len, op->hash[0]);
  159. if (unlikely(!areq->nbytes) && !(op->flags & SS_HASH_FINAL))
  160. return 0;
  161. /* protect against overflow */
  162. if (unlikely(areq->nbytes > UINT_MAX - op->len)) {
  163. dev_err(ss->dev, "Cannot process too large request\n");
  164. return -EINVAL;
  165. }
  166. if (op->len + areq->nbytes < 64 && !(op->flags & SS_HASH_FINAL)) {
  167. /* linearize data to op->buf */
  168. copied = sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
  169. op->buf + op->len, areq->nbytes, 0);
  170. op->len += copied;
  171. return 0;
  172. }
  173. spin_lock_bh(&ss->slock);
  174. /*
  175. * if some data have been processed before,
  176. * we need to restore the partial hash state
  177. */
  178. if (op->byte_count) {
  179. ivmode = SS_IV_ARBITRARY;
  180. for (i = 0; i < 5; i++)
  181. writel(op->hash[i], ss->base + SS_IV0 + i * 4);
  182. }
  183. /* Enable the device */
  184. writel(op->mode | SS_ENABLED | ivmode, ss->base + SS_CTL);
  185. if (!(op->flags & SS_HASH_UPDATE))
  186. goto hash_final;
  187. /* start of handling data */
  188. if (!(op->flags & SS_HASH_FINAL)) {
  189. end = ((areq->nbytes + op->len) / 64) * 64 - op->len;
  190. if (end > areq->nbytes || areq->nbytes - end > 63) {
  191. dev_err(ss->dev, "ERROR: Bound error %u %u\n",
  192. end, areq->nbytes);
  193. err = -EINVAL;
  194. goto release_ss;
  195. }
  196. } else {
  197. /* Since we have the flag final, we can go up to modulo 4 */
  198. if (areq->nbytes < 4)
  199. end = 0;
  200. else
  201. end = ((areq->nbytes + op->len) / 4) * 4 - op->len;
  202. }
  203. /* TODO if SGlen % 4 and !op->len then DMA */
  204. i = 1;
  205. while (in_sg && i == 1) {
  206. if (in_sg->length % 4)
  207. i = 0;
  208. in_sg = sg_next(in_sg);
  209. }
  210. if (i == 1 && !op->len && areq->nbytes)
  211. dev_dbg(ss->dev, "We can DMA\n");
  212. i = 0;
  213. sg_miter_start(&mi, areq->src, sg_nents(areq->src),
  214. SG_MITER_FROM_SG | SG_MITER_ATOMIC);
  215. sg_miter_next(&mi);
  216. in_i = 0;
  217. do {
  218. /*
  219. * we need to linearize in two case:
  220. * - the buffer is already used
  221. * - the SG does not have enough byte remaining ( < 4)
  222. */
  223. if (op->len || (mi.length - in_i) < 4) {
  224. /*
  225. * if we have entered here we have two reason to stop
  226. * - the buffer is full
  227. * - reach the end
  228. */
  229. while (op->len < 64 && i < end) {
  230. /* how many bytes we can read from current SG */
  231. in_r = min(end - i, 64 - op->len);
  232. in_r = min_t(size_t, mi.length - in_i, in_r);
  233. memcpy(op->buf + op->len, mi.addr + in_i, in_r);
  234. op->len += in_r;
  235. i += in_r;
  236. in_i += in_r;
  237. if (in_i == mi.length) {
  238. sg_miter_next(&mi);
  239. in_i = 0;
  240. }
  241. }
  242. if (op->len > 3 && !(op->len % 4)) {
  243. /* write buf to the device */
  244. writesl(ss->base + SS_RXFIFO, op->buf,
  245. op->len / 4);
  246. op->byte_count += op->len;
  247. op->len = 0;
  248. }
  249. }
  250. if (mi.length - in_i > 3 && i < end) {
  251. /* how many bytes we can read from current SG */
  252. in_r = min_t(size_t, mi.length - in_i, areq->nbytes - i);
  253. in_r = min_t(size_t, ((mi.length - in_i) / 4) * 4, in_r);
  254. /* how many bytes we can write in the device*/
  255. todo = min3((u32)(end - i) / 4, rx_cnt, (u32)in_r / 4);
  256. writesl(ss->base + SS_RXFIFO, mi.addr + in_i, todo);
  257. op->byte_count += todo * 4;
  258. i += todo * 4;
  259. in_i += todo * 4;
  260. rx_cnt -= todo;
  261. if (!rx_cnt) {
  262. spaces = readl(ss->base + SS_FCSR);
  263. rx_cnt = SS_RXFIFO_SPACES(spaces);
  264. }
  265. if (in_i == mi.length) {
  266. sg_miter_next(&mi);
  267. in_i = 0;
  268. }
  269. }
  270. } while (i < end);
  271. /*
  272. * Now we have written to the device all that we can,
  273. * store the remaining bytes in op->buf
  274. */
  275. if ((areq->nbytes - i) < 64) {
  276. while (i < areq->nbytes && in_i < mi.length && op->len < 64) {
  277. /* how many bytes we can read from current SG */
  278. in_r = min(areq->nbytes - i, 64 - op->len);
  279. in_r = min_t(size_t, mi.length - in_i, in_r);
  280. memcpy(op->buf + op->len, mi.addr + in_i, in_r);
  281. op->len += in_r;
  282. i += in_r;
  283. in_i += in_r;
  284. if (in_i == mi.length) {
  285. sg_miter_next(&mi);
  286. in_i = 0;
  287. }
  288. }
  289. }
  290. sg_miter_stop(&mi);
  291. /*
  292. * End of data process
  293. * Now if we have the flag final go to finalize part
  294. * If not, store the partial hash
  295. */
  296. if (op->flags & SS_HASH_FINAL)
  297. goto hash_final;
  298. writel(op->mode | SS_ENABLED | SS_DATA_END, ss->base + SS_CTL);
  299. i = 0;
  300. do {
  301. v = readl(ss->base + SS_CTL);
  302. i++;
  303. } while (i < SS_TIMEOUT && (v & SS_DATA_END));
  304. if (unlikely(i >= SS_TIMEOUT)) {
  305. dev_err_ratelimited(ss->dev,
  306. "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
  307. i, SS_TIMEOUT, v, areq->nbytes);
  308. err = -EIO;
  309. goto release_ss;
  310. }
  311. /*
  312. * The datasheet isn't very clear about when to retrieve the digest. The
  313. * bit SS_DATA_END is cleared when the engine has processed the data and
  314. * when the digest is computed *but* it doesn't mean the digest is
  315. * available in the digest registers. Hence the delay to be sure we can
  316. * read it.
  317. */
  318. ndelay(1);
  319. for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++)
  320. op->hash[i] = readl(ss->base + SS_MD0 + i * 4);
  321. goto release_ss;
  322. /*
  323. * hash_final: finalize hashing operation
  324. *
  325. * If we have some remaining bytes, we write them.
  326. * Then ask the SS for finalizing the hashing operation
  327. *
  328. * I do not check RX FIFO size in this function since the size is 32
  329. * after each enabling and this function neither write more than 32 words.
  330. * If we come from the update part, we cannot have more than
  331. * 3 remaining bytes to write and SS is fast enough to not care about it.
  332. */
  333. hash_final:
  334. /* write the remaining words of the wait buffer */
  335. if (op->len) {
  336. nwait = op->len / 4;
  337. if (nwait) {
  338. writesl(ss->base + SS_RXFIFO, op->buf, nwait);
  339. op->byte_count += 4 * nwait;
  340. }
  341. nbw = op->len - 4 * nwait;
  342. if (nbw) {
  343. wb = cpu_to_le32(*(u32 *)(op->buf + nwait * 4));
  344. wb &= GENMASK((nbw * 8) - 1, 0);
  345. op->byte_count += nbw;
  346. }
  347. }
  348. /* write the remaining bytes of the nbw buffer */
  349. wb |= ((1 << 7) << (nbw * 8));
  350. bf[j++] = le32_to_cpu(wb);
  351. /*
  352. * number of space to pad to obtain 64o minus 8(size) minus 4 (final 1)
  353. * I take the operations from other MD5/SHA1 implementations
  354. */
  355. /* last block size */
  356. fill = 64 - (op->byte_count % 64);
  357. min_fill = 2 * sizeof(u32) + (nbw ? 0 : sizeof(u32));
  358. /* if we can't fill all data, jump to the next 64 block */
  359. if (fill < min_fill)
  360. fill += 64;
  361. j += (fill - min_fill) / sizeof(u32);
  362. /* write the length of data */
  363. if (op->mode == SS_OP_SHA1) {
  364. __be64 *bits = (__be64 *)&bf[j];
  365. *bits = cpu_to_be64(op->byte_count << 3);
  366. j += 2;
  367. } else {
  368. __le64 *bits = (__le64 *)&bf[j];
  369. *bits = cpu_to_le64(op->byte_count << 3);
  370. j += 2;
  371. }
  372. writesl(ss->base + SS_RXFIFO, bf, j);
  373. /* Tell the SS to stop the hashing */
  374. writel(op->mode | SS_ENABLED | SS_DATA_END, ss->base + SS_CTL);
  375. /*
  376. * Wait for SS to finish the hash.
  377. * The timeout could happen only in case of bad overclocking
  378. * or driver bug.
  379. */
  380. i = 0;
  381. do {
  382. v = readl(ss->base + SS_CTL);
  383. i++;
  384. } while (i < SS_TIMEOUT && (v & SS_DATA_END));
  385. if (unlikely(i >= SS_TIMEOUT)) {
  386. dev_err_ratelimited(ss->dev,
  387. "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
  388. i, SS_TIMEOUT, v, areq->nbytes);
  389. err = -EIO;
  390. goto release_ss;
  391. }
  392. /*
  393. * The datasheet isn't very clear about when to retrieve the digest. The
  394. * bit SS_DATA_END is cleared when the engine has processed the data and
  395. * when the digest is computed *but* it doesn't mean the digest is
  396. * available in the digest registers. Hence the delay to be sure we can
  397. * read it.
  398. */
  399. ndelay(1);
  400. /* Get the hash from the device */
  401. if (op->mode == SS_OP_SHA1) {
  402. for (i = 0; i < 5; i++) {
  403. v = cpu_to_be32(readl(ss->base + SS_MD0 + i * 4));
  404. memcpy(areq->result + i * 4, &v, 4);
  405. }
  406. } else {
  407. for (i = 0; i < 4; i++) {
  408. v = cpu_to_le32(readl(ss->base + SS_MD0 + i * 4));
  409. memcpy(areq->result + i * 4, &v, 4);
  410. }
  411. }
  412. release_ss:
  413. writel(0, ss->base + SS_CTL);
  414. spin_unlock_bh(&ss->slock);
  415. return err;
  416. }
  417. int sun4i_hash_final(struct ahash_request *areq)
  418. {
  419. struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  420. op->flags = SS_HASH_FINAL;
  421. return sun4i_hash(areq);
  422. }
  423. int sun4i_hash_update(struct ahash_request *areq)
  424. {
  425. struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  426. op->flags = SS_HASH_UPDATE;
  427. return sun4i_hash(areq);
  428. }
  429. /* sun4i_hash_finup: finalize hashing operation after an update */
  430. int sun4i_hash_finup(struct ahash_request *areq)
  431. {
  432. struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  433. op->flags = SS_HASH_UPDATE | SS_HASH_FINAL;
  434. return sun4i_hash(areq);
  435. }
  436. /* combo of init/update/final functions */
  437. int sun4i_hash_digest(struct ahash_request *areq)
  438. {
  439. int err;
  440. struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  441. err = sun4i_hash_init(areq);
  442. if (err)
  443. return err;
  444. op->flags = SS_HASH_UPDATE | SS_HASH_FINAL;
  445. return sun4i_hash(areq);
  446. }