lzma2_encoder.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. /// \file lzma2_encoder.c
  4. /// \brief LZMA2 encoder
  5. ///
  6. // Authors: Igor Pavlov
  7. // Lasse Collin
  8. //
  9. // This file has been put into the public domain.
  10. // You can do whatever you want with this file.
  11. //
  12. ///////////////////////////////////////////////////////////////////////////////
  13. #include "lz_encoder.h"
  14. #include "lzma_encoder.h"
  15. #include "fastpos.h"
  16. #include "lzma2_encoder.h"
  17. typedef struct {
  18. enum {
  19. SEQ_INIT,
  20. SEQ_LZMA_ENCODE,
  21. SEQ_LZMA_COPY,
  22. SEQ_UNCOMPRESSED_HEADER,
  23. SEQ_UNCOMPRESSED_COPY,
  24. } sequence;
  25. /// LZMA encoder
  26. void *lzma;
  27. /// LZMA options currently in use.
  28. lzma_options_lzma opt_cur;
  29. bool need_properties;
  30. bool need_state_reset;
  31. bool need_dictionary_reset;
  32. /// Uncompressed size of a chunk
  33. size_t uncompressed_size;
  34. /// Compressed size of a chunk (excluding headers); this is also used
  35. /// to indicate the end of buf[] in SEQ_LZMA_COPY.
  36. size_t compressed_size;
  37. /// Read position in buf[]
  38. size_t buf_pos;
  39. /// Buffer to hold the chunk header and LZMA compressed data
  40. uint8_t buf[LZMA2_HEADER_MAX + LZMA2_CHUNK_MAX];
  41. } lzma_lzma2_coder;
  42. static void
  43. lzma2_header_lzma(lzma_lzma2_coder *coder)
  44. {
  45. assert(coder->uncompressed_size > 0);
  46. assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX);
  47. assert(coder->compressed_size > 0);
  48. assert(coder->compressed_size <= LZMA2_CHUNK_MAX);
  49. size_t pos;
  50. if (coder->need_properties) {
  51. pos = 0;
  52. if (coder->need_dictionary_reset)
  53. coder->buf[pos] = 0x80 + (3 << 5);
  54. else
  55. coder->buf[pos] = 0x80 + (2 << 5);
  56. } else {
  57. pos = 1;
  58. if (coder->need_state_reset)
  59. coder->buf[pos] = 0x80 + (1 << 5);
  60. else
  61. coder->buf[pos] = 0x80;
  62. }
  63. // Set the start position for copying.
  64. coder->buf_pos = pos;
  65. // Uncompressed size
  66. size_t size = coder->uncompressed_size - 1;
  67. coder->buf[pos++] += size >> 16;
  68. coder->buf[pos++] = (size >> 8) & 0xFF;
  69. coder->buf[pos++] = size & 0xFF;
  70. // Compressed size
  71. size = coder->compressed_size - 1;
  72. coder->buf[pos++] = size >> 8;
  73. coder->buf[pos++] = size & 0xFF;
  74. // Properties, if needed
  75. if (coder->need_properties)
  76. lzma_lzma_lclppb_encode(&coder->opt_cur, coder->buf + pos);
  77. coder->need_properties = false;
  78. coder->need_state_reset = false;
  79. coder->need_dictionary_reset = false;
  80. // The copying code uses coder->compressed_size to indicate the end
  81. // of coder->buf[], so we need add the maximum size of the header here.
  82. coder->compressed_size += LZMA2_HEADER_MAX;
  83. return;
  84. }
  85. static void
  86. lzma2_header_uncompressed(lzma_lzma2_coder *coder)
  87. {
  88. assert(coder->uncompressed_size > 0);
  89. assert(coder->uncompressed_size <= LZMA2_CHUNK_MAX);
  90. // If this is the first chunk, we need to include dictionary
  91. // reset indicator.
  92. if (coder->need_dictionary_reset)
  93. coder->buf[0] = 1;
  94. else
  95. coder->buf[0] = 2;
  96. coder->need_dictionary_reset = false;
  97. // "Compressed" size
  98. coder->buf[1] = (coder->uncompressed_size - 1) >> 8;
  99. coder->buf[2] = (coder->uncompressed_size - 1) & 0xFF;
  100. // Set the start position for copying.
  101. coder->buf_pos = 0;
  102. return;
  103. }
  104. static lzma_ret
  105. lzma2_encode(void *coder_ptr, lzma_mf *restrict mf,
  106. uint8_t *restrict out, size_t *restrict out_pos,
  107. size_t out_size)
  108. {
  109. lzma_lzma2_coder *restrict coder = coder_ptr;
  110. while (*out_pos < out_size)
  111. switch (coder->sequence) {
  112. case SEQ_INIT:
  113. // If there's no input left and we are flushing or finishing,
  114. // don't start a new chunk.
  115. if (mf_unencoded(mf) == 0) {
  116. // Write end of payload marker if finishing.
  117. if (mf->action == LZMA_FINISH)
  118. out[(*out_pos)++] = 0;
  119. return mf->action == LZMA_RUN
  120. ? LZMA_OK : LZMA_STREAM_END;
  121. }
  122. if (coder->need_state_reset)
  123. return_if_error(lzma_lzma_encoder_reset(
  124. coder->lzma, &coder->opt_cur));
  125. coder->uncompressed_size = 0;
  126. coder->compressed_size = 0;
  127. coder->sequence = SEQ_LZMA_ENCODE;
  128. // Fall through
  129. case SEQ_LZMA_ENCODE: {
  130. // Calculate how much more uncompressed data this chunk
  131. // could accept.
  132. const uint32_t left = LZMA2_UNCOMPRESSED_MAX
  133. - coder->uncompressed_size;
  134. uint32_t limit;
  135. if (left < mf->match_len_max) {
  136. // Must flush immediately since the next LZMA symbol
  137. // could make the uncompressed size of the chunk too
  138. // big.
  139. limit = 0;
  140. } else {
  141. // Calculate maximum read_limit that is OK from point
  142. // of view of LZMA2 chunk size.
  143. limit = mf->read_pos - mf->read_ahead
  144. + left - mf->match_len_max;
  145. }
  146. // Save the start position so that we can update
  147. // coder->uncompressed_size.
  148. const uint32_t read_start = mf->read_pos - mf->read_ahead;
  149. // Call the LZMA encoder until the chunk is finished.
  150. const lzma_ret ret = lzma_lzma_encode(coder->lzma, mf,
  151. coder->buf + LZMA2_HEADER_MAX,
  152. &coder->compressed_size,
  153. LZMA2_CHUNK_MAX, limit);
  154. coder->uncompressed_size += mf->read_pos - mf->read_ahead
  155. - read_start;
  156. assert(coder->compressed_size <= LZMA2_CHUNK_MAX);
  157. assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX);
  158. if (ret != LZMA_STREAM_END)
  159. return LZMA_OK;
  160. // See if the chunk compressed. If it didn't, we encode it
  161. // as uncompressed chunk. This saves a few bytes of space
  162. // and makes decoding faster.
  163. if (coder->compressed_size >= coder->uncompressed_size) {
  164. coder->uncompressed_size += mf->read_ahead;
  165. assert(coder->uncompressed_size
  166. <= LZMA2_UNCOMPRESSED_MAX);
  167. mf->read_ahead = 0;
  168. lzma2_header_uncompressed(coder);
  169. coder->need_state_reset = true;
  170. coder->sequence = SEQ_UNCOMPRESSED_HEADER;
  171. break;
  172. }
  173. // The chunk did compress at least by one byte, so we store
  174. // the chunk as LZMA.
  175. lzma2_header_lzma(coder);
  176. coder->sequence = SEQ_LZMA_COPY;
  177. }
  178. // Fall through
  179. case SEQ_LZMA_COPY:
  180. // Copy the compressed chunk along its headers to the
  181. // output buffer.
  182. lzma_bufcpy(coder->buf, &coder->buf_pos,
  183. coder->compressed_size,
  184. out, out_pos, out_size);
  185. if (coder->buf_pos != coder->compressed_size)
  186. return LZMA_OK;
  187. coder->sequence = SEQ_INIT;
  188. break;
  189. case SEQ_UNCOMPRESSED_HEADER:
  190. // Copy the three-byte header to indicate uncompressed chunk.
  191. lzma_bufcpy(coder->buf, &coder->buf_pos,
  192. LZMA2_HEADER_UNCOMPRESSED,
  193. out, out_pos, out_size);
  194. if (coder->buf_pos != LZMA2_HEADER_UNCOMPRESSED)
  195. return LZMA_OK;
  196. coder->sequence = SEQ_UNCOMPRESSED_COPY;
  197. // Fall through
  198. case SEQ_UNCOMPRESSED_COPY:
  199. // Copy the uncompressed data as is from the dictionary
  200. // to the output buffer.
  201. mf_read(mf, out, out_pos, out_size, &coder->uncompressed_size);
  202. if (coder->uncompressed_size != 0)
  203. return LZMA_OK;
  204. coder->sequence = SEQ_INIT;
  205. break;
  206. }
  207. return LZMA_OK;
  208. }
  209. static void
  210. lzma2_encoder_end(void *coder_ptr, const lzma_allocator *allocator)
  211. {
  212. lzma_lzma2_coder *coder = coder_ptr;
  213. lzma_free(coder->lzma, allocator);
  214. lzma_free(coder, allocator);
  215. return;
  216. }
  217. static lzma_ret
  218. lzma2_encoder_options_update(void *coder_ptr, const lzma_filter *filter)
  219. {
  220. lzma_lzma2_coder *coder = coder_ptr;
  221. // New options can be set only when there is no incomplete chunk.
  222. // This is the case at the beginning of the raw stream and right
  223. // after LZMA_SYNC_FLUSH.
  224. if (filter->options == NULL || coder->sequence != SEQ_INIT)
  225. return LZMA_PROG_ERROR;
  226. // Look if there are new options. At least for now,
  227. // only lc/lp/pb can be changed.
  228. const lzma_options_lzma *opt = filter->options;
  229. if (coder->opt_cur.lc != opt->lc || coder->opt_cur.lp != opt->lp
  230. || coder->opt_cur.pb != opt->pb) {
  231. // Validate the options.
  232. if (opt->lc > LZMA_LCLP_MAX || opt->lp > LZMA_LCLP_MAX
  233. || opt->lc + opt->lp > LZMA_LCLP_MAX
  234. || opt->pb > LZMA_PB_MAX)
  235. return LZMA_OPTIONS_ERROR;
  236. // The new options will be used when the encoder starts
  237. // a new LZMA2 chunk.
  238. coder->opt_cur.lc = opt->lc;
  239. coder->opt_cur.lp = opt->lp;
  240. coder->opt_cur.pb = opt->pb;
  241. coder->need_properties = true;
  242. coder->need_state_reset = true;
  243. }
  244. return LZMA_OK;
  245. }
  246. static lzma_ret
  247. lzma2_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator,
  248. const void *options, lzma_lz_options *lz_options)
  249. {
  250. if (options == NULL)
  251. return LZMA_PROG_ERROR;
  252. lzma_lzma2_coder *coder = lz->coder;
  253. if (coder == NULL) {
  254. coder = lzma_alloc(sizeof(lzma_lzma2_coder), allocator);
  255. if (coder == NULL)
  256. return LZMA_MEM_ERROR;
  257. lz->coder = coder;
  258. lz->code = &lzma2_encode;
  259. lz->end = &lzma2_encoder_end;
  260. lz->options_update = &lzma2_encoder_options_update;
  261. coder->lzma = NULL;
  262. }
  263. coder->opt_cur = *(const lzma_options_lzma *)(options);
  264. coder->sequence = SEQ_INIT;
  265. coder->need_properties = true;
  266. coder->need_state_reset = false;
  267. coder->need_dictionary_reset
  268. = coder->opt_cur.preset_dict == NULL
  269. || coder->opt_cur.preset_dict_size == 0;
  270. // Initialize LZMA encoder
  271. return_if_error(lzma_lzma_encoder_create(&coder->lzma, allocator,
  272. &coder->opt_cur, lz_options));
  273. // Make sure that we will always have enough history available in
  274. // case we need to use uncompressed chunks. They are used when the
  275. // compressed size of a chunk is not smaller than the uncompressed
  276. // size, so we need to have at least LZMA2_COMPRESSED_MAX bytes
  277. // history available.
  278. if (lz_options->before_size + lz_options->dict_size < LZMA2_CHUNK_MAX)
  279. lz_options->before_size
  280. = LZMA2_CHUNK_MAX - lz_options->dict_size;
  281. return LZMA_OK;
  282. }
  283. extern lzma_ret
  284. lzma_lzma2_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
  285. const lzma_filter_info *filters)
  286. {
  287. return lzma_lz_encoder_init(
  288. next, allocator, filters, &lzma2_encoder_init);
  289. }
  290. extern uint64_t
  291. lzma_lzma2_encoder_memusage(const void *options)
  292. {
  293. const uint64_t lzma_mem = lzma_lzma_encoder_memusage(options);
  294. if (lzma_mem == UINT64_MAX)
  295. return UINT64_MAX;
  296. return sizeof(lzma_lzma2_coder) + lzma_mem;
  297. }
  298. extern lzma_ret
  299. lzma_lzma2_props_encode(const void *options, uint8_t *out)
  300. {
  301. const lzma_options_lzma *const opt = options;
  302. uint32_t d = my_max(opt->dict_size, LZMA_DICT_SIZE_MIN);
  303. // Round up to the next 2^n - 1 or 2^n + 2^(n - 1) - 1 depending
  304. // on which one is the next:
  305. --d;
  306. d |= d >> 2;
  307. d |= d >> 3;
  308. d |= d >> 4;
  309. d |= d >> 8;
  310. d |= d >> 16;
  311. // Get the highest two bits using the proper encoding:
  312. if (d == UINT32_MAX)
  313. out[0] = 40;
  314. else
  315. out[0] = get_dist_slot(d + 1) - 24;
  316. return LZMA_OK;
  317. }
  318. extern uint64_t
  319. lzma_lzma2_block_size(const void *options)
  320. {
  321. const lzma_options_lzma *const opt = options;
  322. // Use at least 1 MiB to keep compression ratio better.
  323. return my_max((uint64_t)(opt->dict_size) * 3, UINT64_C(1) << 20);
  324. }