lz4_compress.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. /*
  2. * LZ4 - Fast LZ compression algorithm
  3. * Copyright (C) 2011-2012, Yann Collet.
  4. * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are
  7. * met:
  8. *
  9. * * Redistributions of source code must retain the above copyright
  10. * notice, this list of conditions and the following disclaimer.
  11. * * Redistributions in binary form must reproduce the above
  12. * copyright notice, this list of conditions and the following disclaimer
  13. * in the documentation and/or other materials provided with the
  14. * distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. *
  28. * You can contact the author at :
  29. * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
  30. * - LZ4 source repository : http://code.google.com/p/lz4/
  31. *
  32. * Changed for kernel use by:
  33. * Chanho Min <chanho.min@lge.com>
  34. */
  35. #include <linux/module.h>
  36. #include <linux/kernel.h>
  37. #include <linux/lz4.h>
  38. #include <asm/unaligned.h>
  39. #include "lz4defs.h"
  40. /*
  41. * LZ4_compressCtx :
  42. * -----------------
  43. * Compress 'isize' bytes from 'source' into an output buffer 'dest' of
  44. * maximum size 'maxOutputSize'. * If it cannot achieve it, compression
  45. * will stop, and result of the function will be zero.
  46. * return : the number of bytes written in buffer 'dest', or 0 if the
  47. * compression fails
  48. */
  49. static inline int lz4_compressctx(void *ctx,
  50. const char *source,
  51. char *dest,
  52. int isize,
  53. int maxoutputsize)
  54. {
  55. HTYPE *hashtable = (HTYPE *)ctx;
  56. const u8 *ip = (u8 *)source;
  57. #if LZ4_ARCH64
  58. const BYTE * const base = ip;
  59. #else
  60. const int base = 0;
  61. #endif
  62. const u8 *anchor = ip;
  63. const u8 *const iend = ip + isize;
  64. const u8 *const mflimit = iend - MFLIMIT;
  65. #define MATCHLIMIT (iend - LASTLITERALS)
  66. u8 *op = (u8 *) dest;
  67. u8 *const oend = op + maxoutputsize;
  68. int length;
  69. const int skipstrength = SKIPSTRENGTH;
  70. u32 forwardh;
  71. int lastrun;
  72. /* Init */
  73. if (isize < MINLENGTH)
  74. goto _last_literals;
  75. memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
  76. /* First Byte */
  77. hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
  78. ip++;
  79. forwardh = LZ4_HASH_VALUE(ip);
  80. /* Main Loop */
  81. for (;;) {
  82. int findmatchattempts = (1U << skipstrength) + 3;
  83. const u8 *forwardip = ip;
  84. const u8 *ref;
  85. u8 *token;
  86. /* Find a match */
  87. do {
  88. u32 h = forwardh;
  89. int step = findmatchattempts++ >> skipstrength;
  90. ip = forwardip;
  91. forwardip = ip + step;
  92. if (unlikely(forwardip > mflimit))
  93. goto _last_literals;
  94. forwardh = LZ4_HASH_VALUE(forwardip);
  95. ref = base + hashtable[h];
  96. hashtable[h] = ip - base;
  97. } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
  98. /* Catch up */
  99. while ((ip > anchor) && (ref > (u8 *)source) &&
  100. unlikely(ip[-1] == ref[-1])) {
  101. ip--;
  102. ref--;
  103. }
  104. /* Encode Literal length */
  105. length = (int)(ip - anchor);
  106. token = op++;
  107. /* check output limit */
  108. if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
  109. (length >> 8) > oend))
  110. return 0;
  111. if (length >= (int)RUN_MASK) {
  112. int len;
  113. *token = (RUN_MASK << ML_BITS);
  114. len = length - RUN_MASK;
  115. for (; len > 254 ; len -= 255)
  116. *op++ = 255;
  117. *op++ = (u8)len;
  118. } else
  119. *token = (length << ML_BITS);
  120. /* Copy Literals */
  121. LZ4_BLINDCOPY(anchor, op, length);
  122. _next_match:
  123. /* Encode Offset */
  124. LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
  125. /* Start Counting */
  126. ip += MINMATCH;
  127. /* MinMatch verified */
  128. ref += MINMATCH;
  129. anchor = ip;
  130. while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) {
  131. #if LZ4_ARCH64
  132. u64 diff = A64(ref) ^ A64(ip);
  133. #else
  134. u32 diff = A32(ref) ^ A32(ip);
  135. #endif
  136. if (!diff) {
  137. ip += STEPSIZE;
  138. ref += STEPSIZE;
  139. continue;
  140. }
  141. ip += LZ4_NBCOMMONBYTES(diff);
  142. goto _endcount;
  143. }
  144. #if LZ4_ARCH64
  145. if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
  146. ip += 4;
  147. ref += 4;
  148. }
  149. #endif
  150. if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
  151. ip += 2;
  152. ref += 2;
  153. }
  154. if ((ip < MATCHLIMIT) && (*ref == *ip))
  155. ip++;
  156. _endcount:
  157. /* Encode MatchLength */
  158. length = (int)(ip - anchor);
  159. /* Check output limit */
  160. if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend))
  161. return 0;
  162. if (length >= (int)ML_MASK) {
  163. *token += ML_MASK;
  164. length -= ML_MASK;
  165. for (; length > 509 ; length -= 510) {
  166. *op++ = 255;
  167. *op++ = 255;
  168. }
  169. if (length > 254) {
  170. length -= 255;
  171. *op++ = 255;
  172. }
  173. *op++ = (u8)length;
  174. } else
  175. *token += length;
  176. /* Test end of chunk */
  177. if (ip > mflimit) {
  178. anchor = ip;
  179. break;
  180. }
  181. /* Fill table */
  182. hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base;
  183. /* Test next position */
  184. ref = base + hashtable[LZ4_HASH_VALUE(ip)];
  185. hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
  186. if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
  187. token = op++;
  188. *token = 0;
  189. goto _next_match;
  190. }
  191. /* Prepare next loop */
  192. anchor = ip++;
  193. forwardh = LZ4_HASH_VALUE(ip);
  194. }
  195. _last_literals:
  196. /* Encode Last Literals */
  197. lastrun = (int)(iend - anchor);
  198. if (((char *)op - dest) + lastrun + 1
  199. + ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize)
  200. return 0;
  201. if (lastrun >= (int)RUN_MASK) {
  202. *op++ = (RUN_MASK << ML_BITS);
  203. lastrun -= RUN_MASK;
  204. for (; lastrun > 254 ; lastrun -= 255)
  205. *op++ = 255;
  206. *op++ = (u8)lastrun;
  207. } else
  208. *op++ = (lastrun << ML_BITS);
  209. memcpy(op, anchor, iend - anchor);
  210. op += iend - anchor;
  211. /* End */
  212. return (int)(((char *)op) - dest);
  213. }
  214. static inline int lz4_compress64kctx(void *ctx,
  215. const char *source,
  216. char *dest,
  217. int isize,
  218. int maxoutputsize)
  219. {
  220. u16 *hashtable = (u16 *)ctx;
  221. const u8 *ip = (u8 *) source;
  222. const u8 *anchor = ip;
  223. const u8 *const base = ip;
  224. const u8 *const iend = ip + isize;
  225. const u8 *const mflimit = iend - MFLIMIT;
  226. #define MATCHLIMIT (iend - LASTLITERALS)
  227. u8 *op = (u8 *) dest;
  228. u8 *const oend = op + maxoutputsize;
  229. int len, length;
  230. const int skipstrength = SKIPSTRENGTH;
  231. u32 forwardh;
  232. int lastrun;
  233. /* Init */
  234. if (isize < MINLENGTH)
  235. goto _last_literals;
  236. memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
  237. /* First Byte */
  238. ip++;
  239. forwardh = LZ4_HASH64K_VALUE(ip);
  240. /* Main Loop */
  241. for (;;) {
  242. int findmatchattempts = (1U << skipstrength) + 3;
  243. const u8 *forwardip = ip;
  244. const u8 *ref;
  245. u8 *token;
  246. /* Find a match */
  247. do {
  248. u32 h = forwardh;
  249. int step = findmatchattempts++ >> skipstrength;
  250. ip = forwardip;
  251. forwardip = ip + step;
  252. if (forwardip > mflimit)
  253. goto _last_literals;
  254. forwardh = LZ4_HASH64K_VALUE(forwardip);
  255. ref = base + hashtable[h];
  256. hashtable[h] = (u16)(ip - base);
  257. } while (A32(ref) != A32(ip));
  258. /* Catch up */
  259. while ((ip > anchor) && (ref > (u8 *)source)
  260. && (ip[-1] == ref[-1])) {
  261. ip--;
  262. ref--;
  263. }
  264. /* Encode Literal length */
  265. length = (int)(ip - anchor);
  266. token = op++;
  267. /* Check output limit */
  268. if (unlikely(op + length + (2 + 1 + LASTLITERALS)
  269. + (length >> 8) > oend))
  270. return 0;
  271. if (length >= (int)RUN_MASK) {
  272. *token = (RUN_MASK << ML_BITS);
  273. len = length - RUN_MASK;
  274. for (; len > 254 ; len -= 255)
  275. *op++ = 255;
  276. *op++ = (u8)len;
  277. } else
  278. *token = (length << ML_BITS);
  279. /* Copy Literals */
  280. LZ4_BLINDCOPY(anchor, op, length);
  281. _next_match:
  282. /* Encode Offset */
  283. LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
  284. /* Start Counting */
  285. ip += MINMATCH;
  286. /* MinMatch verified */
  287. ref += MINMATCH;
  288. anchor = ip;
  289. while (ip < MATCHLIMIT - (STEPSIZE - 1)) {
  290. #if LZ4_ARCH64
  291. u64 diff = A64(ref) ^ A64(ip);
  292. #else
  293. u32 diff = A32(ref) ^ A32(ip);
  294. #endif
  295. if (!diff) {
  296. ip += STEPSIZE;
  297. ref += STEPSIZE;
  298. continue;
  299. }
  300. ip += LZ4_NBCOMMONBYTES(diff);
  301. goto _endcount;
  302. }
  303. #if LZ4_ARCH64
  304. if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
  305. ip += 4;
  306. ref += 4;
  307. }
  308. #endif
  309. if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
  310. ip += 2;
  311. ref += 2;
  312. }
  313. if ((ip < MATCHLIMIT) && (*ref == *ip))
  314. ip++;
  315. _endcount:
  316. /* Encode MatchLength */
  317. len = (int)(ip - anchor);
  318. /* Check output limit */
  319. if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
  320. return 0;
  321. if (len >= (int)ML_MASK) {
  322. *token += ML_MASK;
  323. len -= ML_MASK;
  324. for (; len > 509 ; len -= 510) {
  325. *op++ = 255;
  326. *op++ = 255;
  327. }
  328. if (len > 254) {
  329. len -= 255;
  330. *op++ = 255;
  331. }
  332. *op++ = (u8)len;
  333. } else
  334. *token += len;
  335. /* Test end of chunk */
  336. if (ip > mflimit) {
  337. anchor = ip;
  338. break;
  339. }
  340. /* Fill table */
  341. hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base);
  342. /* Test next position */
  343. ref = base + hashtable[LZ4_HASH64K_VALUE(ip)];
  344. hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base);
  345. if (A32(ref) == A32(ip)) {
  346. token = op++;
  347. *token = 0;
  348. goto _next_match;
  349. }
  350. /* Prepare next loop */
  351. anchor = ip++;
  352. forwardh = LZ4_HASH64K_VALUE(ip);
  353. }
  354. _last_literals:
  355. /* Encode Last Literals */
  356. lastrun = (int)(iend - anchor);
  357. if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend)
  358. return 0;
  359. if (lastrun >= (int)RUN_MASK) {
  360. *op++ = (RUN_MASK << ML_BITS);
  361. lastrun -= RUN_MASK;
  362. for (; lastrun > 254 ; lastrun -= 255)
  363. *op++ = 255;
  364. *op++ = (u8)lastrun;
  365. } else
  366. *op++ = (lastrun << ML_BITS);
  367. memcpy(op, anchor, iend - anchor);
  368. op += iend - anchor;
  369. /* End */
  370. return (int)(((char *)op) - dest);
  371. }
  372. int lz4_compress(const unsigned char *src, size_t src_len,
  373. unsigned char *dst, size_t *dst_len, void *wrkmem)
  374. {
  375. int ret = -1;
  376. int out_len = 0;
  377. if (src_len < LZ4_64KLIMIT)
  378. out_len = lz4_compress64kctx(wrkmem, src, dst, src_len,
  379. lz4_compressbound(src_len));
  380. else
  381. out_len = lz4_compressctx(wrkmem, src, dst, src_len,
  382. lz4_compressbound(src_len));
  383. if (out_len < 0)
  384. goto exit;
  385. *dst_len = out_len;
  386. return 0;
  387. exit:
  388. return ret;
  389. }
  390. EXPORT_SYMBOL(lz4_compress);
  391. MODULE_LICENSE("Dual BSD/GPL");
  392. MODULE_DESCRIPTION("LZ4 compressor");