nx-842.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532
  1. /*
  2. * Cryptographic API for the NX-842 hardware compression.
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 2 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * Copyright (C) IBM Corporation, 2011-2015
  15. *
  16. * Designer of the Power data compression engine:
  17. * Bulent Abali <abali@us.ibm.com>
  18. *
  19. * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
  20. * Seth Jennings <sjenning@linux.vnet.ibm.com>
  21. *
  22. * Rewrite: Dan Streetman <ddstreet@ieee.org>
  23. *
  24. * This is an interface to the NX-842 compression hardware in PowerPC
  25. * processors. Most of the complexity of this drvier is due to the fact that
  26. * the NX-842 compression hardware requires the input and output data buffers
  27. * to be specifically aligned, to be a specific multiple in length, and within
  28. * specific minimum and maximum lengths. Those restrictions, provided by the
  29. * nx-842 driver via nx842_constraints, mean this driver must use bounce
  30. * buffers and headers to correct misaligned in or out buffers, and to split
  31. * input buffers that are too large.
  32. *
  33. * This driver will fall back to software decompression if the hardware
  34. * decompression fails, so this driver's decompression should never fail as
  35. * long as the provided compressed buffer is valid. Any compressed buffer
  36. * created by this driver will have a header (except ones where the input
  37. * perfectly matches the constraints); so users of this driver cannot simply
  38. * pass a compressed buffer created by this driver over to the 842 software
  39. * decompression library. Instead, users must use this driver to decompress;
  40. * if the hardware fails or is unavailable, the compressed buffer will be
  41. * parsed and the header removed, and the raw 842 buffer(s) passed to the 842
  42. * software decompression library.
  43. *
  44. * This does not fall back to software compression, however, since the caller
  45. * of this function is specifically requesting hardware compression; if the
  46. * hardware compression fails, the caller can fall back to software
  47. * compression, and the raw 842 compressed buffer that the software compressor
  48. * creates can be passed to this driver for hardware decompression; any
  49. * buffer without our specific header magic is assumed to be a raw 842 buffer
  50. * and passed directly to the hardware. Note that the software compression
  51. * library will produce a compressed buffer that is incompatible with the
  52. * hardware decompressor if the original input buffer length is not a multiple
  53. * of 8; if such a compressed buffer is passed to this driver for
  54. * decompression, the hardware will reject it and this driver will then pass
  55. * it over to the software library for decompression.
  56. */
  57. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  58. #include <linux/vmalloc.h>
  59. #include <linux/sw842.h>
  60. #include <linux/spinlock.h>
  61. #include "nx-842.h"
  62. /* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit
  63. * template (see lib/842/842.h), so this magic number will never appear at
  64. * the start of a raw 842 compressed buffer. That is important, as any buffer
  65. * passed to us without this magic is assumed to be a raw 842 compressed
  66. * buffer, and passed directly to the hardware to decompress.
  67. */
  68. #define NX842_CRYPTO_MAGIC (0xf842)
  69. #define NX842_CRYPTO_HEADER_SIZE(g) \
  70. (sizeof(struct nx842_crypto_header) + \
  71. sizeof(struct nx842_crypto_header_group) * (g))
  72. #define NX842_CRYPTO_HEADER_MAX_SIZE \
  73. NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX)
  74. /* bounce buffer size */
  75. #define BOUNCE_BUFFER_ORDER (2)
  76. #define BOUNCE_BUFFER_SIZE \
  77. ((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER))
  78. /* try longer on comp because we can fallback to sw decomp if hw is busy */
  79. #define COMP_BUSY_TIMEOUT (250) /* ms */
  80. #define DECOMP_BUSY_TIMEOUT (50) /* ms */
  81. struct nx842_crypto_param {
  82. u8 *in;
  83. unsigned int iremain;
  84. u8 *out;
  85. unsigned int oremain;
  86. unsigned int ototal;
  87. };
  88. static int update_param(struct nx842_crypto_param *p,
  89. unsigned int slen, unsigned int dlen)
  90. {
  91. if (p->iremain < slen)
  92. return -EOVERFLOW;
  93. if (p->oremain < dlen)
  94. return -ENOSPC;
  95. p->in += slen;
  96. p->iremain -= slen;
  97. p->out += dlen;
  98. p->oremain -= dlen;
  99. p->ototal += dlen;
  100. return 0;
  101. }
  102. int nx842_crypto_init(struct crypto_tfm *tfm, struct nx842_driver *driver)
  103. {
  104. struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
  105. spin_lock_init(&ctx->lock);
  106. ctx->driver = driver;
  107. ctx->wmem = kmalloc(driver->workmem_size, GFP_KERNEL);
  108. ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
  109. ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
  110. if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) {
  111. kfree(ctx->wmem);
  112. free_page((unsigned long)ctx->sbounce);
  113. free_page((unsigned long)ctx->dbounce);
  114. return -ENOMEM;
  115. }
  116. return 0;
  117. }
  118. EXPORT_SYMBOL_GPL(nx842_crypto_init);
  119. void nx842_crypto_exit(struct crypto_tfm *tfm)
  120. {
  121. struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
  122. kfree(ctx->wmem);
  123. free_page((unsigned long)ctx->sbounce);
  124. free_page((unsigned long)ctx->dbounce);
  125. }
  126. EXPORT_SYMBOL_GPL(nx842_crypto_exit);
  127. static void check_constraints(struct nx842_constraints *c)
  128. {
  129. /* limit maximum, to always have enough bounce buffer to decompress */
  130. if (c->maximum > BOUNCE_BUFFER_SIZE)
  131. c->maximum = BOUNCE_BUFFER_SIZE;
  132. }
  133. static int nx842_crypto_add_header(struct nx842_crypto_header *hdr, u8 *buf)
  134. {
  135. int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
  136. /* compress should have added space for header */
  137. if (s > be16_to_cpu(hdr->group[0].padding)) {
  138. pr_err("Internal error: no space for header\n");
  139. return -EINVAL;
  140. }
  141. memcpy(buf, hdr, s);
  142. print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0);
  143. return 0;
  144. }
  145. static int compress(struct nx842_crypto_ctx *ctx,
  146. struct nx842_crypto_param *p,
  147. struct nx842_crypto_header_group *g,
  148. struct nx842_constraints *c,
  149. u16 *ignore,
  150. unsigned int hdrsize)
  151. {
  152. unsigned int slen = p->iremain, dlen = p->oremain, tmplen;
  153. unsigned int adj_slen = slen;
  154. u8 *src = p->in, *dst = p->out;
  155. int ret, dskip = 0;
  156. ktime_t timeout;
  157. if (p->iremain == 0)
  158. return -EOVERFLOW;
  159. if (p->oremain == 0 || hdrsize + c->minimum > dlen)
  160. return -ENOSPC;
  161. if (slen % c->multiple)
  162. adj_slen = round_up(slen, c->multiple);
  163. if (slen < c->minimum)
  164. adj_slen = c->minimum;
  165. if (slen > c->maximum)
  166. adj_slen = slen = c->maximum;
  167. if (adj_slen > slen || (u64)src % c->alignment) {
  168. adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE);
  169. slen = min(slen, BOUNCE_BUFFER_SIZE);
  170. if (adj_slen > slen)
  171. memset(ctx->sbounce + slen, 0, adj_slen - slen);
  172. memcpy(ctx->sbounce, src, slen);
  173. src = ctx->sbounce;
  174. slen = adj_slen;
  175. pr_debug("using comp sbounce buffer, len %x\n", slen);
  176. }
  177. dst += hdrsize;
  178. dlen -= hdrsize;
  179. if ((u64)dst % c->alignment) {
  180. dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst);
  181. dst += dskip;
  182. dlen -= dskip;
  183. }
  184. if (dlen % c->multiple)
  185. dlen = round_down(dlen, c->multiple);
  186. if (dlen < c->minimum) {
  187. nospc:
  188. dst = ctx->dbounce;
  189. dlen = min(p->oremain, BOUNCE_BUFFER_SIZE);
  190. dlen = round_down(dlen, c->multiple);
  191. dskip = 0;
  192. pr_debug("using comp dbounce buffer, len %x\n", dlen);
  193. }
  194. if (dlen > c->maximum)
  195. dlen = c->maximum;
  196. tmplen = dlen;
  197. timeout = ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT);
  198. do {
  199. dlen = tmplen; /* reset dlen, if we're retrying */
  200. ret = ctx->driver->compress(src, slen, dst, &dlen, ctx->wmem);
  201. /* possibly we should reduce the slen here, instead of
  202. * retrying with the dbounce buffer?
  203. */
  204. if (ret == -ENOSPC && dst != ctx->dbounce)
  205. goto nospc;
  206. } while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
  207. if (ret)
  208. return ret;
  209. dskip += hdrsize;
  210. if (dst == ctx->dbounce)
  211. memcpy(p->out + dskip, dst, dlen);
  212. g->padding = cpu_to_be16(dskip);
  213. g->compressed_length = cpu_to_be32(dlen);
  214. g->uncompressed_length = cpu_to_be32(slen);
  215. if (p->iremain < slen) {
  216. *ignore = slen - p->iremain;
  217. slen = p->iremain;
  218. }
  219. pr_debug("compress slen %x ignore %x dlen %x padding %x\n",
  220. slen, *ignore, dlen, dskip);
  221. return update_param(p, slen, dskip + dlen);
  222. }
  223. int nx842_crypto_compress(struct crypto_tfm *tfm,
  224. const u8 *src, unsigned int slen,
  225. u8 *dst, unsigned int *dlen)
  226. {
  227. struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
  228. struct nx842_crypto_header *hdr = &ctx->header;
  229. struct nx842_crypto_param p;
  230. struct nx842_constraints c = *ctx->driver->constraints;
  231. unsigned int groups, hdrsize, h;
  232. int ret, n;
  233. bool add_header;
  234. u16 ignore = 0;
  235. check_constraints(&c);
  236. p.in = (u8 *)src;
  237. p.iremain = slen;
  238. p.out = dst;
  239. p.oremain = *dlen;
  240. p.ototal = 0;
  241. *dlen = 0;
  242. groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX,
  243. DIV_ROUND_UP(p.iremain, c.maximum));
  244. hdrsize = NX842_CRYPTO_HEADER_SIZE(groups);
  245. spin_lock_bh(&ctx->lock);
  246. /* skip adding header if the buffers meet all constraints */
  247. add_header = (p.iremain % c.multiple ||
  248. p.iremain < c.minimum ||
  249. p.iremain > c.maximum ||
  250. (u64)p.in % c.alignment ||
  251. p.oremain % c.multiple ||
  252. p.oremain < c.minimum ||
  253. p.oremain > c.maximum ||
  254. (u64)p.out % c.alignment);
  255. hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC);
  256. hdr->groups = 0;
  257. hdr->ignore = 0;
  258. while (p.iremain > 0) {
  259. n = hdr->groups++;
  260. ret = -ENOSPC;
  261. if (hdr->groups > NX842_CRYPTO_GROUP_MAX)
  262. goto unlock;
  263. /* header goes before first group */
  264. h = !n && add_header ? hdrsize : 0;
  265. if (ignore)
  266. pr_warn("internal error, ignore is set %x\n", ignore);
  267. ret = compress(ctx, &p, &hdr->group[n], &c, &ignore, h);
  268. if (ret)
  269. goto unlock;
  270. }
  271. if (!add_header && hdr->groups > 1) {
  272. pr_err("Internal error: No header but multiple groups\n");
  273. ret = -EINVAL;
  274. goto unlock;
  275. }
  276. /* ignore indicates the input stream needed to be padded */
  277. hdr->ignore = cpu_to_be16(ignore);
  278. if (ignore)
  279. pr_debug("marked %d bytes as ignore\n", ignore);
  280. if (add_header)
  281. ret = nx842_crypto_add_header(hdr, dst);
  282. if (ret)
  283. goto unlock;
  284. *dlen = p.ototal;
  285. pr_debug("compress total slen %x dlen %x\n", slen, *dlen);
  286. unlock:
  287. spin_unlock_bh(&ctx->lock);
  288. return ret;
  289. }
  290. EXPORT_SYMBOL_GPL(nx842_crypto_compress);
  291. static int decompress(struct nx842_crypto_ctx *ctx,
  292. struct nx842_crypto_param *p,
  293. struct nx842_crypto_header_group *g,
  294. struct nx842_constraints *c,
  295. u16 ignore)
  296. {
  297. unsigned int slen = be32_to_cpu(g->compressed_length);
  298. unsigned int required_len = be32_to_cpu(g->uncompressed_length);
  299. unsigned int dlen = p->oremain, tmplen;
  300. unsigned int adj_slen = slen;
  301. u8 *src = p->in, *dst = p->out;
  302. u16 padding = be16_to_cpu(g->padding);
  303. int ret, spadding = 0, dpadding = 0;
  304. ktime_t timeout;
  305. if (!slen || !required_len)
  306. return -EINVAL;
  307. if (p->iremain <= 0 || padding + slen > p->iremain)
  308. return -EOVERFLOW;
  309. if (p->oremain <= 0 || required_len - ignore > p->oremain)
  310. return -ENOSPC;
  311. src += padding;
  312. if (slen % c->multiple)
  313. adj_slen = round_up(slen, c->multiple);
  314. if (slen < c->minimum)
  315. adj_slen = c->minimum;
  316. if (slen > c->maximum)
  317. goto usesw;
  318. if (slen < adj_slen || (u64)src % c->alignment) {
  319. /* we can append padding bytes because the 842 format defines
  320. * an "end" template (see lib/842/842_decompress.c) and will
  321. * ignore any bytes following it.
  322. */
  323. if (slen < adj_slen)
  324. memset(ctx->sbounce + slen, 0, adj_slen - slen);
  325. memcpy(ctx->sbounce, src, slen);
  326. src = ctx->sbounce;
  327. spadding = adj_slen - slen;
  328. slen = adj_slen;
  329. pr_debug("using decomp sbounce buffer, len %x\n", slen);
  330. }
  331. if (dlen % c->multiple)
  332. dlen = round_down(dlen, c->multiple);
  333. if (dlen < required_len || (u64)dst % c->alignment) {
  334. dst = ctx->dbounce;
  335. dlen = min(required_len, BOUNCE_BUFFER_SIZE);
  336. pr_debug("using decomp dbounce buffer, len %x\n", dlen);
  337. }
  338. if (dlen < c->minimum)
  339. goto usesw;
  340. if (dlen > c->maximum)
  341. dlen = c->maximum;
  342. tmplen = dlen;
  343. timeout = ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT);
  344. do {
  345. dlen = tmplen; /* reset dlen, if we're retrying */
  346. ret = ctx->driver->decompress(src, slen, dst, &dlen, ctx->wmem);
  347. } while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
  348. if (ret) {
  349. usesw:
  350. /* reset everything, sw doesn't have constraints */
  351. src = p->in + padding;
  352. slen = be32_to_cpu(g->compressed_length);
  353. spadding = 0;
  354. dst = p->out;
  355. dlen = p->oremain;
  356. dpadding = 0;
  357. if (dlen < required_len) { /* have ignore bytes */
  358. dst = ctx->dbounce;
  359. dlen = BOUNCE_BUFFER_SIZE;
  360. }
  361. pr_info_ratelimited("using software 842 decompression\n");
  362. ret = sw842_decompress(src, slen, dst, &dlen);
  363. }
  364. if (ret)
  365. return ret;
  366. slen -= spadding;
  367. dlen -= ignore;
  368. if (ignore)
  369. pr_debug("ignoring last %x bytes\n", ignore);
  370. if (dst == ctx->dbounce)
  371. memcpy(p->out, dst, dlen);
  372. pr_debug("decompress slen %x padding %x dlen %x ignore %x\n",
  373. slen, padding, dlen, ignore);
  374. return update_param(p, slen + padding, dlen);
  375. }
  376. int nx842_crypto_decompress(struct crypto_tfm *tfm,
  377. const u8 *src, unsigned int slen,
  378. u8 *dst, unsigned int *dlen)
  379. {
  380. struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
  381. struct nx842_crypto_header *hdr;
  382. struct nx842_crypto_param p;
  383. struct nx842_constraints c = *ctx->driver->constraints;
  384. int n, ret, hdr_len;
  385. u16 ignore = 0;
  386. check_constraints(&c);
  387. p.in = (u8 *)src;
  388. p.iremain = slen;
  389. p.out = dst;
  390. p.oremain = *dlen;
  391. p.ototal = 0;
  392. *dlen = 0;
  393. hdr = (struct nx842_crypto_header *)src;
  394. spin_lock_bh(&ctx->lock);
  395. /* If it doesn't start with our header magic number, assume it's a raw
  396. * 842 compressed buffer and pass it directly to the hardware driver
  397. */
  398. if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) {
  399. struct nx842_crypto_header_group g = {
  400. .padding = 0,
  401. .compressed_length = cpu_to_be32(p.iremain),
  402. .uncompressed_length = cpu_to_be32(p.oremain),
  403. };
  404. ret = decompress(ctx, &p, &g, &c, 0);
  405. if (ret)
  406. goto unlock;
  407. goto success;
  408. }
  409. if (!hdr->groups) {
  410. pr_err("header has no groups\n");
  411. ret = -EINVAL;
  412. goto unlock;
  413. }
  414. if (hdr->groups > NX842_CRYPTO_GROUP_MAX) {
  415. pr_err("header has too many groups %x, max %x\n",
  416. hdr->groups, NX842_CRYPTO_GROUP_MAX);
  417. ret = -EINVAL;
  418. goto unlock;
  419. }
  420. hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
  421. if (hdr_len > slen) {
  422. ret = -EOVERFLOW;
  423. goto unlock;
  424. }
  425. memcpy(&ctx->header, src, hdr_len);
  426. hdr = &ctx->header;
  427. for (n = 0; n < hdr->groups; n++) {
  428. /* ignore applies to last group */
  429. if (n + 1 == hdr->groups)
  430. ignore = be16_to_cpu(hdr->ignore);
  431. ret = decompress(ctx, &p, &hdr->group[n], &c, ignore);
  432. if (ret)
  433. goto unlock;
  434. }
  435. success:
  436. *dlen = p.ototal;
  437. pr_debug("decompress total slen %x dlen %x\n", slen, *dlen);
  438. ret = 0;
  439. unlock:
  440. spin_unlock_bh(&ctx->lock);
  441. return ret;
  442. }
  443. EXPORT_SYMBOL_GPL(nx842_crypto_decompress);
  444. MODULE_LICENSE("GPL");
  445. MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Driver");
  446. MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");