842_compress.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640
  1. /*
  2. * 842 Software Compression
  3. *
  4. * Copyright (C) 2015 Dan Streetman, IBM Corp
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * See 842.h for details of the 842 compressed format.
  17. */
  18. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  19. #define MODULE_NAME "842_compress"
  20. #include <linux/hashtable.h>
  21. #include "842.h"
  22. #include "842_debugfs.h"
  23. #define SW842_HASHTABLE8_BITS (10)
  24. #define SW842_HASHTABLE4_BITS (11)
  25. #define SW842_HASHTABLE2_BITS (10)
  26. /* By default, we allow compressing input buffers of any length, but we must
  27. * use the non-standard "short data" template so the decompressor can correctly
  28. * reproduce the uncompressed data buffer at the right length. However the
  29. * hardware 842 compressor will not recognize the "short data" template, and
  30. * will fail to decompress any compressed buffer containing it (I have no idea
  31. * why anyone would want to use software to compress and hardware to decompress
  32. * but that's beside the point). This parameter forces the compression
  33. * function to simply reject any input buffer that isn't a multiple of 8 bytes
  34. * long, instead of using the "short data" template, so that all compressed
  35. * buffers produced by this function will be decompressable by the 842 hardware
  36. * decompressor. Unless you have a specific need for that, leave this disabled
  37. * so that any length buffer can be compressed.
  38. */
  39. static bool sw842_strict;
  40. module_param_named(strict, sw842_strict, bool, 0644);
  41. static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */
  42. { I8, N0, N0, N0, 0x19 }, /* 8 */
  43. { I4, I4, N0, N0, 0x18 }, /* 18 */
  44. { I4, I2, I2, N0, 0x17 }, /* 25 */
  45. { I2, I2, I4, N0, 0x13 }, /* 25 */
  46. { I2, I2, I2, I2, 0x12 }, /* 32 */
  47. { I4, I2, D2, N0, 0x16 }, /* 33 */
  48. { I4, D2, I2, N0, 0x15 }, /* 33 */
  49. { I2, D2, I4, N0, 0x0e }, /* 33 */
  50. { D2, I2, I4, N0, 0x09 }, /* 33 */
  51. { I2, I2, I2, D2, 0x11 }, /* 40 */
  52. { I2, I2, D2, I2, 0x10 }, /* 40 */
  53. { I2, D2, I2, I2, 0x0d }, /* 40 */
  54. { D2, I2, I2, I2, 0x08 }, /* 40 */
  55. { I4, D4, N0, N0, 0x14 }, /* 41 */
  56. { D4, I4, N0, N0, 0x04 }, /* 41 */
  57. { I2, I2, D4, N0, 0x0f }, /* 48 */
  58. { I2, D2, I2, D2, 0x0c }, /* 48 */
  59. { I2, D4, I2, N0, 0x0b }, /* 48 */
  60. { D2, I2, I2, D2, 0x07 }, /* 48 */
  61. { D2, I2, D2, I2, 0x06 }, /* 48 */
  62. { D4, I2, I2, N0, 0x03 }, /* 48 */
  63. { I2, D2, D4, N0, 0x0a }, /* 56 */
  64. { D2, I2, D4, N0, 0x05 }, /* 56 */
  65. { D4, I2, D2, N0, 0x02 }, /* 56 */
  66. { D4, D2, I2, N0, 0x01 }, /* 56 */
  67. { D8, N0, N0, N0, 0x00 }, /* 64 */
  68. };
  69. struct sw842_hlist_node8 {
  70. struct hlist_node node;
  71. u64 data;
  72. u8 index;
  73. };
  74. struct sw842_hlist_node4 {
  75. struct hlist_node node;
  76. u32 data;
  77. u16 index;
  78. };
  79. struct sw842_hlist_node2 {
  80. struct hlist_node node;
  81. u16 data;
  82. u8 index;
  83. };
  84. #define INDEX_NOT_FOUND (-1)
  85. #define INDEX_NOT_CHECKED (-2)
  86. struct sw842_param {
  87. u8 *in;
  88. u8 *instart;
  89. u64 ilen;
  90. u8 *out;
  91. u64 olen;
  92. u8 bit;
  93. u64 data8[1];
  94. u32 data4[2];
  95. u16 data2[4];
  96. int index8[1];
  97. int index4[2];
  98. int index2[4];
  99. DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS);
  100. DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS);
  101. DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS);
  102. struct sw842_hlist_node8 node8[1 << I8_BITS];
  103. struct sw842_hlist_node4 node4[1 << I4_BITS];
  104. struct sw842_hlist_node2 node2[1 << I2_BITS];
  105. };
  106. #define get_input_data(p, o, b) \
  107. be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o))))
  108. #define init_hashtable_nodes(p, b) do { \
  109. int _i; \
  110. hash_init((p)->htable##b); \
  111. for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) { \
  112. (p)->node##b[_i].index = _i; \
  113. (p)->node##b[_i].data = 0; \
  114. INIT_HLIST_NODE(&(p)->node##b[_i].node); \
  115. } \
  116. } while (0)
  117. #define find_index(p, b, n) ({ \
  118. struct sw842_hlist_node##b *_n; \
  119. p->index##b[n] = INDEX_NOT_FOUND; \
  120. hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) { \
  121. if (p->data##b[n] == _n->data) { \
  122. p->index##b[n] = _n->index; \
  123. break; \
  124. } \
  125. } \
  126. p->index##b[n] >= 0; \
  127. })
  128. #define check_index(p, b, n) \
  129. ((p)->index##b[n] == INDEX_NOT_CHECKED \
  130. ? find_index(p, b, n) \
  131. : (p)->index##b[n] >= 0)
  132. #define replace_hash(p, b, i, d) do { \
  133. struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)]; \
  134. hash_del(&_n->node); \
  135. _n->data = (p)->data##b[d]; \
  136. pr_debug("add hash index%x %x pos %x data %lx\n", b, \
  137. (unsigned int)_n->index, \
  138. (unsigned int)((p)->in - (p)->instart), \
  139. (unsigned long)_n->data); \
  140. hash_add((p)->htable##b, &_n->node, _n->data); \
  141. } while (0)
  142. static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
  143. static int add_bits(struct sw842_param *p, u64 d, u8 n);
  144. static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s)
  145. {
  146. int ret;
  147. if (n <= s)
  148. return -EINVAL;
  149. ret = add_bits(p, d >> s, n - s);
  150. if (ret)
  151. return ret;
  152. return add_bits(p, d & GENMASK_ULL(s - 1, 0), s);
  153. }
  154. static int add_bits(struct sw842_param *p, u64 d, u8 n)
  155. {
  156. int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits;
  157. u64 o;
  158. u8 *out = p->out;
  159. pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d);
  160. if (n > 64)
  161. return -EINVAL;
  162. /* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0),
  163. * or if we're at the end of the output buffer and would write past end
  164. */
  165. if (bits > 64)
  166. return __split_add_bits(p, d, n, 32);
  167. else if (p->olen < 8 && bits > 32 && bits <= 56)
  168. return __split_add_bits(p, d, n, 16);
  169. else if (p->olen < 4 && bits > 16 && bits <= 24)
  170. return __split_add_bits(p, d, n, 8);
  171. if (DIV_ROUND_UP(bits, 8) > p->olen)
  172. return -ENOSPC;
  173. o = *out & bmask[b];
  174. d <<= s;
  175. if (bits <= 8)
  176. *out = o | d;
  177. else if (bits <= 16)
  178. put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out);
  179. else if (bits <= 24)
  180. put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out);
  181. else if (bits <= 32)
  182. put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out);
  183. else if (bits <= 40)
  184. put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out);
  185. else if (bits <= 48)
  186. put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out);
  187. else if (bits <= 56)
  188. put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out);
  189. else
  190. put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out);
  191. p->bit += n;
  192. if (p->bit > 7) {
  193. p->out += p->bit / 8;
  194. p->olen -= p->bit / 8;
  195. p->bit %= 8;
  196. }
  197. return 0;
  198. }
  199. static int add_template(struct sw842_param *p, u8 c)
  200. {
  201. int ret, i, b = 0;
  202. u8 *t = comp_ops[c];
  203. bool inv = false;
  204. if (c >= OPS_MAX)
  205. return -EINVAL;
  206. pr_debug("template %x\n", t[4]);
  207. ret = add_bits(p, t[4], OP_BITS);
  208. if (ret)
  209. return ret;
  210. for (i = 0; i < 4; i++) {
  211. pr_debug("op %x\n", t[i]);
  212. switch (t[i] & OP_AMOUNT) {
  213. case OP_AMOUNT_8:
  214. if (b)
  215. inv = true;
  216. else if (t[i] & OP_ACTION_INDEX)
  217. ret = add_bits(p, p->index8[0], I8_BITS);
  218. else if (t[i] & OP_ACTION_DATA)
  219. ret = add_bits(p, p->data8[0], 64);
  220. else
  221. inv = true;
  222. break;
  223. case OP_AMOUNT_4:
  224. if (b == 2 && t[i] & OP_ACTION_DATA)
  225. ret = add_bits(p, get_input_data(p, 2, 32), 32);
  226. else if (b != 0 && b != 4)
  227. inv = true;
  228. else if (t[i] & OP_ACTION_INDEX)
  229. ret = add_bits(p, p->index4[b >> 2], I4_BITS);
  230. else if (t[i] & OP_ACTION_DATA)
  231. ret = add_bits(p, p->data4[b >> 2], 32);
  232. else
  233. inv = true;
  234. break;
  235. case OP_AMOUNT_2:
  236. if (b != 0 && b != 2 && b != 4 && b != 6)
  237. inv = true;
  238. if (t[i] & OP_ACTION_INDEX)
  239. ret = add_bits(p, p->index2[b >> 1], I2_BITS);
  240. else if (t[i] & OP_ACTION_DATA)
  241. ret = add_bits(p, p->data2[b >> 1], 16);
  242. else
  243. inv = true;
  244. break;
  245. case OP_AMOUNT_0:
  246. inv = (b != 8) || !(t[i] & OP_ACTION_NOOP);
  247. break;
  248. default:
  249. inv = true;
  250. break;
  251. }
  252. if (ret)
  253. return ret;
  254. if (inv) {
  255. pr_err("Invalid templ %x op %d : %x %x %x %x\n",
  256. c, i, t[0], t[1], t[2], t[3]);
  257. return -EINVAL;
  258. }
  259. b += t[i] & OP_AMOUNT;
  260. }
  261. if (b != 8) {
  262. pr_err("Invalid template %x len %x : %x %x %x %x\n",
  263. c, b, t[0], t[1], t[2], t[3]);
  264. return -EINVAL;
  265. }
  266. if (sw842_template_counts)
  267. atomic_inc(&template_count[t[4]]);
  268. return 0;
  269. }
  270. static int add_repeat_template(struct sw842_param *p, u8 r)
  271. {
  272. int ret;
  273. /* repeat param is 0-based */
  274. if (!r || --r > REPEAT_BITS_MAX)
  275. return -EINVAL;
  276. ret = add_bits(p, OP_REPEAT, OP_BITS);
  277. if (ret)
  278. return ret;
  279. ret = add_bits(p, r, REPEAT_BITS);
  280. if (ret)
  281. return ret;
  282. if (sw842_template_counts)
  283. atomic_inc(&template_repeat_count);
  284. return 0;
  285. }
  286. static int add_short_data_template(struct sw842_param *p, u8 b)
  287. {
  288. int ret, i;
  289. if (!b || b > SHORT_DATA_BITS_MAX)
  290. return -EINVAL;
  291. ret = add_bits(p, OP_SHORT_DATA, OP_BITS);
  292. if (ret)
  293. return ret;
  294. ret = add_bits(p, b, SHORT_DATA_BITS);
  295. if (ret)
  296. return ret;
  297. for (i = 0; i < b; i++) {
  298. ret = add_bits(p, p->in[i], 8);
  299. if (ret)
  300. return ret;
  301. }
  302. if (sw842_template_counts)
  303. atomic_inc(&template_short_data_count);
  304. return 0;
  305. }
  306. static int add_zeros_template(struct sw842_param *p)
  307. {
  308. int ret = add_bits(p, OP_ZEROS, OP_BITS);
  309. if (ret)
  310. return ret;
  311. if (sw842_template_counts)
  312. atomic_inc(&template_zeros_count);
  313. return 0;
  314. }
  315. static int add_end_template(struct sw842_param *p)
  316. {
  317. int ret = add_bits(p, OP_END, OP_BITS);
  318. if (ret)
  319. return ret;
  320. if (sw842_template_counts)
  321. atomic_inc(&template_end_count);
  322. return 0;
  323. }
  324. static bool check_template(struct sw842_param *p, u8 c)
  325. {
  326. u8 *t = comp_ops[c];
  327. int i, match, b = 0;
  328. if (c >= OPS_MAX)
  329. return false;
  330. for (i = 0; i < 4; i++) {
  331. if (t[i] & OP_ACTION_INDEX) {
  332. if (t[i] & OP_AMOUNT_2)
  333. match = check_index(p, 2, b >> 1);
  334. else if (t[i] & OP_AMOUNT_4)
  335. match = check_index(p, 4, b >> 2);
  336. else if (t[i] & OP_AMOUNT_8)
  337. match = check_index(p, 8, 0);
  338. else
  339. return false;
  340. if (!match)
  341. return false;
  342. }
  343. b += t[i] & OP_AMOUNT;
  344. }
  345. return true;
  346. }
  347. static void get_next_data(struct sw842_param *p)
  348. {
  349. p->data8[0] = get_input_data(p, 0, 64);
  350. p->data4[0] = get_input_data(p, 0, 32);
  351. p->data4[1] = get_input_data(p, 4, 32);
  352. p->data2[0] = get_input_data(p, 0, 16);
  353. p->data2[1] = get_input_data(p, 2, 16);
  354. p->data2[2] = get_input_data(p, 4, 16);
  355. p->data2[3] = get_input_data(p, 6, 16);
  356. }
  357. /* update the hashtable entries.
  358. * only call this after finding/adding the current template
  359. * the dataN fields for the current 8 byte block must be already updated
  360. */
  361. static void update_hashtables(struct sw842_param *p)
  362. {
  363. u64 pos = p->in - p->instart;
  364. u64 n8 = (pos >> 3) % (1 << I8_BITS);
  365. u64 n4 = (pos >> 2) % (1 << I4_BITS);
  366. u64 n2 = (pos >> 1) % (1 << I2_BITS);
  367. replace_hash(p, 8, n8, 0);
  368. replace_hash(p, 4, n4, 0);
  369. replace_hash(p, 4, n4, 1);
  370. replace_hash(p, 2, n2, 0);
  371. replace_hash(p, 2, n2, 1);
  372. replace_hash(p, 2, n2, 2);
  373. replace_hash(p, 2, n2, 3);
  374. }
  375. /* find the next template to use, and add it
  376. * the p->dataN fields must already be set for the current 8 byte block
  377. */
  378. static int process_next(struct sw842_param *p)
  379. {
  380. int ret, i;
  381. p->index8[0] = INDEX_NOT_CHECKED;
  382. p->index4[0] = INDEX_NOT_CHECKED;
  383. p->index4[1] = INDEX_NOT_CHECKED;
  384. p->index2[0] = INDEX_NOT_CHECKED;
  385. p->index2[1] = INDEX_NOT_CHECKED;
  386. p->index2[2] = INDEX_NOT_CHECKED;
  387. p->index2[3] = INDEX_NOT_CHECKED;
  388. /* check up to OPS_MAX - 1; last op is our fallback */
  389. for (i = 0; i < OPS_MAX - 1; i++) {
  390. if (check_template(p, i))
  391. break;
  392. }
  393. ret = add_template(p, i);
  394. if (ret)
  395. return ret;
  396. return 0;
  397. }
  398. /**
  399. * sw842_compress
  400. *
  401. * Compress the uncompressed buffer of length @ilen at @in to the output buffer
  402. * @out, using no more than @olen bytes, using the 842 compression format.
  403. *
  404. * Returns: 0 on success, error on failure. The @olen parameter
  405. * will contain the number of output bytes written on success, or
  406. * 0 on error.
  407. */
  408. int sw842_compress(const u8 *in, unsigned int ilen,
  409. u8 *out, unsigned int *olen, void *wmem)
  410. {
  411. struct sw842_param *p = (struct sw842_param *)wmem;
  412. int ret;
  413. u64 last, next, pad, total;
  414. u8 repeat_count = 0;
  415. u32 crc;
  416. BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS);
  417. init_hashtable_nodes(p, 8);
  418. init_hashtable_nodes(p, 4);
  419. init_hashtable_nodes(p, 2);
  420. p->in = (u8 *)in;
  421. p->instart = p->in;
  422. p->ilen = ilen;
  423. p->out = out;
  424. p->olen = *olen;
  425. p->bit = 0;
  426. total = p->olen;
  427. *olen = 0;
  428. /* if using strict mode, we can only compress a multiple of 8 */
  429. if (sw842_strict && (ilen % 8)) {
  430. pr_err("Using strict mode, can't compress len %d\n", ilen);
  431. return -EINVAL;
  432. }
  433. /* let's compress at least 8 bytes, mkay? */
  434. if (unlikely(ilen < 8))
  435. goto skip_comp;
  436. /* make initial 'last' different so we don't match the first time */
  437. last = ~get_unaligned((u64 *)p->in);
  438. while (p->ilen > 7) {
  439. next = get_unaligned((u64 *)p->in);
  440. /* must get the next data, as we need to update the hashtable
  441. * entries with the new data every time
  442. */
  443. get_next_data(p);
  444. /* we don't care about endianness in last or next;
  445. * we're just comparing 8 bytes to another 8 bytes,
  446. * they're both the same endianness
  447. */
  448. if (next == last) {
  449. /* repeat count bits are 0-based, so we stop at +1 */
  450. if (++repeat_count <= REPEAT_BITS_MAX)
  451. goto repeat;
  452. }
  453. if (repeat_count) {
  454. ret = add_repeat_template(p, repeat_count);
  455. repeat_count = 0;
  456. if (next == last) /* reached max repeat bits */
  457. goto repeat;
  458. }
  459. if (next == 0)
  460. ret = add_zeros_template(p);
  461. else
  462. ret = process_next(p);
  463. if (ret)
  464. return ret;
  465. repeat:
  466. last = next;
  467. update_hashtables(p);
  468. p->in += 8;
  469. p->ilen -= 8;
  470. }
  471. if (repeat_count) {
  472. ret = add_repeat_template(p, repeat_count);
  473. if (ret)
  474. return ret;
  475. }
  476. skip_comp:
  477. if (p->ilen > 0) {
  478. ret = add_short_data_template(p, p->ilen);
  479. if (ret)
  480. return ret;
  481. p->in += p->ilen;
  482. p->ilen = 0;
  483. }
  484. ret = add_end_template(p);
  485. if (ret)
  486. return ret;
  487. /*
  488. * crc(0:31) is appended to target data starting with the next
  489. * bit after End of stream template.
  490. * nx842 calculates CRC for data in big-endian format. So doing
  491. * same here so that sw842 decompression can be used for both
  492. * compressed data.
  493. */
  494. crc = crc32_be(0, in, ilen);
  495. ret = add_bits(p, crc, CRC_BITS);
  496. if (ret)
  497. return ret;
  498. if (p->bit) {
  499. p->out++;
  500. p->olen--;
  501. p->bit = 0;
  502. }
  503. /* pad compressed length to multiple of 8 */
  504. pad = (8 - ((total - p->olen) % 8)) % 8;
  505. if (pad) {
  506. if (pad > p->olen) /* we were so close! */
  507. return -ENOSPC;
  508. memset(p->out, 0, pad);
  509. p->out += pad;
  510. p->olen -= pad;
  511. }
  512. if (unlikely((total - p->olen) > UINT_MAX))
  513. return -ENOSPC;
  514. *olen = total - p->olen;
  515. return 0;
  516. }
  517. EXPORT_SYMBOL_GPL(sw842_compress);
  518. static int __init sw842_init(void)
  519. {
  520. if (sw842_template_counts)
  521. sw842_debugfs_create();
  522. return 0;
  523. }
  524. module_init(sw842_init);
  525. static void __exit sw842_exit(void)
  526. {
  527. if (sw842_template_counts)
  528. sw842_debugfs_remove();
  529. }
  530. module_exit(sw842_exit);
  531. MODULE_LICENSE("GPL");
  532. MODULE_DESCRIPTION("Software 842 Compressor");
  533. MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");