lz4hc_compress.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540
  1. /*
  2. * LZ4 HC - High Compression Mode of LZ4
  3. * Copyright (C) 2011-2012, Yann Collet.
  4. * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions are
  8. * met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above
  13. * copyright notice, this list of conditions and the following disclaimer
  14. * in the documentation and/or other materials provided with the
  15. * distribution.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  18. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  19. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  20. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  21. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  22. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  23. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  24. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  25. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  26. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  27. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28. *
  29. * You can contact the author at :
  30. * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
  31. * - LZ4 source repository : http://code.google.com/p/lz4/
  32. *
  33. * Changed for kernel use by:
  34. * Chanho Min <chanho.min@lge.com>
  35. */
  36. #include <linux/module.h>
  37. #include <linux/kernel.h>
  38. #include <linux/lz4.h>
  39. #include <asm/unaligned.h>
  40. #include "lz4defs.h"
  41. struct lz4hc_data {
  42. const u8 *base;
  43. HTYPE hashtable[HASHTABLESIZE];
  44. u16 chaintable[MAXD];
  45. const u8 *nexttoupdate;
  46. } __attribute__((__packed__));
  47. static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base)
  48. {
  49. memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable));
  50. memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable));
  51. #if LZ4_ARCH64
  52. hc4->nexttoupdate = base + 1;
  53. #else
  54. hc4->nexttoupdate = base;
  55. #endif
  56. hc4->base = base;
  57. return 1;
  58. }
  59. /* Update chains up to ip (excluded) */
  60. static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip)
  61. {
  62. u16 *chaintable = hc4->chaintable;
  63. HTYPE *hashtable = hc4->hashtable;
  64. #if LZ4_ARCH64
  65. const BYTE * const base = hc4->base;
  66. #else
  67. const int base = 0;
  68. #endif
  69. while (hc4->nexttoupdate < ip) {
  70. const u8 *p = hc4->nexttoupdate;
  71. size_t delta = p - (hashtable[HASH_VALUE(p)] + base);
  72. if (delta > MAX_DISTANCE)
  73. delta = MAX_DISTANCE;
  74. chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta;
  75. hashtable[HASH_VALUE(p)] = (p) - base;
  76. hc4->nexttoupdate++;
  77. }
  78. }
  79. static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2,
  80. const u8 *const matchlimit)
  81. {
  82. const u8 *p1t = p1;
  83. while (p1t < matchlimit - (STEPSIZE - 1)) {
  84. #if LZ4_ARCH64
  85. u64 diff = A64(p2) ^ A64(p1t);
  86. #else
  87. u32 diff = A32(p2) ^ A32(p1t);
  88. #endif
  89. if (!diff) {
  90. p1t += STEPSIZE;
  91. p2 += STEPSIZE;
  92. continue;
  93. }
  94. p1t += LZ4_NBCOMMONBYTES(diff);
  95. return p1t - p1;
  96. }
  97. #if LZ4_ARCH64
  98. if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) {
  99. p1t += 4;
  100. p2 += 4;
  101. }
  102. #endif
  103. if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) {
  104. p1t += 2;
  105. p2 += 2;
  106. }
  107. if ((p1t < matchlimit) && (*p2 == *p1t))
  108. p1t++;
  109. return p1t - p1;
  110. }
  111. static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4,
  112. const u8 *ip, const u8 *const matchlimit, const u8 **matchpos)
  113. {
  114. u16 *const chaintable = hc4->chaintable;
  115. HTYPE *const hashtable = hc4->hashtable;
  116. const u8 *ref;
  117. #if LZ4_ARCH64
  118. const BYTE * const base = hc4->base;
  119. #else
  120. const int base = 0;
  121. #endif
  122. int nbattempts = MAX_NB_ATTEMPTS;
  123. size_t repl = 0, ml = 0;
  124. u16 delta;
  125. /* HC4 match finder */
  126. lz4hc_insert(hc4, ip);
  127. ref = hashtable[HASH_VALUE(ip)] + base;
  128. /* potential repetition */
  129. if (ref >= ip-4) {
  130. /* confirmed */
  131. if (A32(ref) == A32(ip)) {
  132. delta = (u16)(ip-ref);
  133. repl = ml = lz4hc_commonlength(ip + MINMATCH,
  134. ref + MINMATCH, matchlimit) + MINMATCH;
  135. *matchpos = ref;
  136. }
  137. ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
  138. }
  139. while ((ref >= ip - MAX_DISTANCE) && nbattempts) {
  140. nbattempts--;
  141. if (*(ref + ml) == *(ip + ml)) {
  142. if (A32(ref) == A32(ip)) {
  143. size_t mlt =
  144. lz4hc_commonlength(ip + MINMATCH,
  145. ref + MINMATCH, matchlimit) + MINMATCH;
  146. if (mlt > ml) {
  147. ml = mlt;
  148. *matchpos = ref;
  149. }
  150. }
  151. }
  152. ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
  153. }
  154. /* Complete table */
  155. if (repl) {
  156. const BYTE *ptr = ip;
  157. const BYTE *end;
  158. end = ip + repl - (MINMATCH-1);
  159. /* Pre-Load */
  160. while (ptr < end - delta) {
  161. chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
  162. ptr++;
  163. }
  164. do {
  165. chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
  166. /* Head of chain */
  167. hashtable[HASH_VALUE(ptr)] = (ptr) - base;
  168. ptr++;
  169. } while (ptr < end);
  170. hc4->nexttoupdate = end;
  171. }
  172. return (int)ml;
  173. }
  174. static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4,
  175. const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest,
  176. const u8 **matchpos, const u8 **startpos)
  177. {
  178. u16 *const chaintable = hc4->chaintable;
  179. HTYPE *const hashtable = hc4->hashtable;
  180. #if LZ4_ARCH64
  181. const BYTE * const base = hc4->base;
  182. #else
  183. const int base = 0;
  184. #endif
  185. const u8 *ref;
  186. int nbattempts = MAX_NB_ATTEMPTS;
  187. int delta = (int)(ip - startlimit);
  188. /* First Match */
  189. lz4hc_insert(hc4, ip);
  190. ref = hashtable[HASH_VALUE(ip)] + base;
  191. while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base)
  192. && (nbattempts)) {
  193. nbattempts--;
  194. if (*(startlimit + longest) == *(ref - delta + longest)) {
  195. if (A32(ref) == A32(ip)) {
  196. const u8 *reft = ref + MINMATCH;
  197. const u8 *ipt = ip + MINMATCH;
  198. const u8 *startt = ip;
  199. while (ipt < matchlimit-(STEPSIZE - 1)) {
  200. #if LZ4_ARCH64
  201. u64 diff = A64(reft) ^ A64(ipt);
  202. #else
  203. u32 diff = A32(reft) ^ A32(ipt);
  204. #endif
  205. if (!diff) {
  206. ipt += STEPSIZE;
  207. reft += STEPSIZE;
  208. continue;
  209. }
  210. ipt += LZ4_NBCOMMONBYTES(diff);
  211. goto _endcount;
  212. }
  213. #if LZ4_ARCH64
  214. if ((ipt < (matchlimit - 3))
  215. && (A32(reft) == A32(ipt))) {
  216. ipt += 4;
  217. reft += 4;
  218. }
  219. ipt += 2;
  220. #endif
  221. if ((ipt < (matchlimit - 1))
  222. && (A16(reft) == A16(ipt))) {
  223. reft += 2;
  224. }
  225. if ((ipt < matchlimit) && (*reft == *ipt))
  226. ipt++;
  227. _endcount:
  228. reft = ref;
  229. while ((startt > startlimit)
  230. && (reft > hc4->base)
  231. && (startt[-1] == reft[-1])) {
  232. startt--;
  233. reft--;
  234. }
  235. if ((ipt - startt) > longest) {
  236. longest = (int)(ipt - startt);
  237. *matchpos = reft;
  238. *startpos = startt;
  239. }
  240. }
  241. }
  242. ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
  243. }
  244. return longest;
  245. }
  246. static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor,
  247. int ml, const u8 *ref)
  248. {
  249. int length, len;
  250. u8 *token;
  251. /* Encode Literal length */
  252. length = (int)(*ip - *anchor);
  253. token = (*op)++;
  254. if (length >= (int)RUN_MASK) {
  255. *token = (RUN_MASK << ML_BITS);
  256. len = length - RUN_MASK;
  257. for (; len > 254 ; len -= 255)
  258. *(*op)++ = 255;
  259. *(*op)++ = (u8)len;
  260. } else
  261. *token = (length << ML_BITS);
  262. /* Copy Literals */
  263. LZ4_BLINDCOPY(*anchor, *op, length);
  264. /* Encode Offset */
  265. LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref));
  266. /* Encode MatchLength */
  267. len = (int)(ml - MINMATCH);
  268. if (len >= (int)ML_MASK) {
  269. *token += ML_MASK;
  270. len -= ML_MASK;
  271. for (; len > 509 ; len -= 510) {
  272. *(*op)++ = 255;
  273. *(*op)++ = 255;
  274. }
  275. if (len > 254) {
  276. len -= 255;
  277. *(*op)++ = 255;
  278. }
  279. *(*op)++ = (u8)len;
  280. } else
  281. *token += len;
  282. /* Prepare next loop */
  283. *ip += ml;
  284. *anchor = *ip;
  285. return 0;
  286. }
  287. static int lz4_compresshcctx(struct lz4hc_data *ctx,
  288. const char *source,
  289. char *dest,
  290. int isize)
  291. {
  292. const u8 *ip = (const u8 *)source;
  293. const u8 *anchor = ip;
  294. const u8 *const iend = ip + isize;
  295. const u8 *const mflimit = iend - MFLIMIT;
  296. const u8 *const matchlimit = (iend - LASTLITERALS);
  297. u8 *op = (u8 *)dest;
  298. int ml, ml2, ml3, ml0;
  299. const u8 *ref = NULL;
  300. const u8 *start2 = NULL;
  301. const u8 *ref2 = NULL;
  302. const u8 *start3 = NULL;
  303. const u8 *ref3 = NULL;
  304. const u8 *start0;
  305. const u8 *ref0;
  306. int lastrun;
  307. ip++;
  308. /* Main Loop */
  309. while (ip < mflimit) {
  310. ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref));
  311. if (!ml) {
  312. ip++;
  313. continue;
  314. }
  315. /* saved, in case we would skip too much */
  316. start0 = ip;
  317. ref0 = ref;
  318. ml0 = ml;
  319. _search2:
  320. if (ip+ml < mflimit)
  321. ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2,
  322. ip + 1, matchlimit, ml, &ref2, &start2);
  323. else
  324. ml2 = ml;
  325. /* No better match */
  326. if (ml2 == ml) {
  327. lz4_encodesequence(&ip, &op, &anchor, ml, ref);
  328. continue;
  329. }
  330. if (start0 < ip) {
  331. /* empirical */
  332. if (start2 < ip + ml0) {
  333. ip = start0;
  334. ref = ref0;
  335. ml = ml0;
  336. }
  337. }
  338. /*
  339. * Here, start0==ip
  340. * First Match too small : removed
  341. */
  342. if ((start2 - ip) < 3) {
  343. ml = ml2;
  344. ip = start2;
  345. ref = ref2;
  346. goto _search2;
  347. }
  348. _search3:
  349. /*
  350. * Currently we have :
  351. * ml2 > ml1, and
  352. * ip1+3 <= ip2 (usually < ip1+ml1)
  353. */
  354. if ((start2 - ip) < OPTIMAL_ML) {
  355. int correction;
  356. int new_ml = ml;
  357. if (new_ml > OPTIMAL_ML)
  358. new_ml = OPTIMAL_ML;
  359. if (ip + new_ml > start2 + ml2 - MINMATCH)
  360. new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
  361. correction = new_ml - (int)(start2 - ip);
  362. if (correction > 0) {
  363. start2 += correction;
  364. ref2 += correction;
  365. ml2 -= correction;
  366. }
  367. }
  368. /*
  369. * Now, we have start2 = ip+new_ml,
  370. * with new_ml=min(ml, OPTIMAL_ML=18)
  371. */
  372. if (start2 + ml2 < mflimit)
  373. ml3 = lz4hc_insertandgetwidermatch(ctx,
  374. start2 + ml2 - 3, start2, matchlimit,
  375. ml2, &ref3, &start3);
  376. else
  377. ml3 = ml2;
  378. /* No better match : 2 sequences to encode */
  379. if (ml3 == ml2) {
  380. /* ip & ref are known; Now for ml */
  381. if (start2 < ip+ml)
  382. ml = (int)(start2 - ip);
  383. /* Now, encode 2 sequences */
  384. lz4_encodesequence(&ip, &op, &anchor, ml, ref);
  385. ip = start2;
  386. lz4_encodesequence(&ip, &op, &anchor, ml2, ref2);
  387. continue;
  388. }
  389. /* Not enough space for match 2 : remove it */
  390. if (start3 < ip + ml + 3) {
  391. /*
  392. * can write Seq1 immediately ==> Seq2 is removed,
  393. * so Seq3 becomes Seq1
  394. */
  395. if (start3 >= (ip + ml)) {
  396. if (start2 < ip + ml) {
  397. int correction =
  398. (int)(ip + ml - start2);
  399. start2 += correction;
  400. ref2 += correction;
  401. ml2 -= correction;
  402. if (ml2 < MINMATCH) {
  403. start2 = start3;
  404. ref2 = ref3;
  405. ml2 = ml3;
  406. }
  407. }
  408. lz4_encodesequence(&ip, &op, &anchor, ml, ref);
  409. ip = start3;
  410. ref = ref3;
  411. ml = ml3;
  412. start0 = start2;
  413. ref0 = ref2;
  414. ml0 = ml2;
  415. goto _search2;
  416. }
  417. start2 = start3;
  418. ref2 = ref3;
  419. ml2 = ml3;
  420. goto _search3;
  421. }
  422. /*
  423. * OK, now we have 3 ascending matches; let's write at least
  424. * the first one ip & ref are known; Now for ml
  425. */
  426. if (start2 < ip + ml) {
  427. if ((start2 - ip) < (int)ML_MASK) {
  428. int correction;
  429. if (ml > OPTIMAL_ML)
  430. ml = OPTIMAL_ML;
  431. if (ip + ml > start2 + ml2 - MINMATCH)
  432. ml = (int)(start2 - ip) + ml2
  433. - MINMATCH;
  434. correction = ml - (int)(start2 - ip);
  435. if (correction > 0) {
  436. start2 += correction;
  437. ref2 += correction;
  438. ml2 -= correction;
  439. }
  440. } else
  441. ml = (int)(start2 - ip);
  442. }
  443. lz4_encodesequence(&ip, &op, &anchor, ml, ref);
  444. ip = start2;
  445. ref = ref2;
  446. ml = ml2;
  447. start2 = start3;
  448. ref2 = ref3;
  449. ml2 = ml3;
  450. goto _search3;
  451. }
  452. /* Encode Last Literals */
  453. lastrun = (int)(iend - anchor);
  454. if (lastrun >= (int)RUN_MASK) {
  455. *op++ = (RUN_MASK << ML_BITS);
  456. lastrun -= RUN_MASK;
  457. for (; lastrun > 254 ; lastrun -= 255)
  458. *op++ = 255;
  459. *op++ = (u8) lastrun;
  460. } else
  461. *op++ = (lastrun << ML_BITS);
  462. memcpy(op, anchor, iend - anchor);
  463. op += iend - anchor;
  464. /* End */
  465. return (int) (((char *)op) - dest);
  466. }
  467. int lz4hc_compress(const unsigned char *src, size_t src_len,
  468. unsigned char *dst, size_t *dst_len, void *wrkmem)
  469. {
  470. int ret = -1;
  471. int out_len = 0;
  472. struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem;
  473. lz4hc_init(hc4, (const u8 *)src);
  474. out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src,
  475. (char *)dst, (int)src_len);
  476. if (out_len < 0)
  477. goto exit;
  478. *dst_len = out_len;
  479. return 0;
  480. exit:
  481. return ret;
  482. }
  483. EXPORT_SYMBOL(lz4hc_compress);
  484. MODULE_LICENSE("Dual BSD/GPL");
  485. MODULE_DESCRIPTION("LZ4HC compressor");