archive_read_support_compression_compress.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. /*-
  2. * Copyright (c) 2003-2007 Tim Kientzle
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  15. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  16. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  17. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  18. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  19. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  23. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. */
  25. /*
  26. * This code borrows heavily from "compress" source code, which is
  27. * protected by the following copyright. (Clause 3 dropped by request
  28. * of the Regents.)
  29. */
  30. /*-
  31. * Copyright (c) 1985, 1986, 1992, 1993
  32. * The Regents of the University of California. All rights reserved.
  33. *
  34. * This code is derived from software contributed to Berkeley by
  35. * Diomidis Spinellis and James A. Woods, derived from original
  36. * work by Spencer Thomas and Joseph Orost.
  37. *
  38. * Redistribution and use in source and binary forms, with or without
  39. * modification, are permitted provided that the following conditions
  40. * are met:
  41. * 1. Redistributions of source code must retain the above copyright
  42. * notice, this list of conditions and the following disclaimer.
  43. * 2. Redistributions in binary form must reproduce the above copyright
  44. * notice, this list of conditions and the following disclaimer in the
  45. * documentation and/or other materials provided with the distribution.
  46. * 4. Neither the name of the University nor the names of its contributors
  47. * may be used to endorse or promote products derived from this software
  48. * without specific prior written permission.
  49. *
  50. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  51. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  52. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  53. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  54. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  55. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  56. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  57. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  58. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  59. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  60. * SUCH DAMAGE.
  61. */
  62. #include "archive_platform.h"
  63. __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_compression_compress.c,v 1.11 2008/12/06 06:45:15 kientzle Exp $");
  64. #ifdef HAVE_ERRNO_H
  65. #include <errno.h>
  66. #endif
  67. #ifdef HAVE_STDLIB_H
  68. #include <stdlib.h>
  69. #endif
  70. #ifdef HAVE_STRING_H
  71. #include <string.h>
  72. #endif
  73. #ifdef HAVE_UNISTD_H
  74. #include <unistd.h>
  75. #endif
  76. #include "archive.h"
  77. #include "archive_private.h"
  78. #include "archive_read_private.h"
  79. /*
  80. * Because LZW decompression is pretty simple, I've just implemented
  81. * the whole decompressor here (cribbing from "compress" source code,
  82. * of course), rather than relying on an external library. I have
  83. * made an effort to clarify and simplify the algorithm, so the
  84. * names and structure here don't exactly match those used by compress.
  85. */
  86. struct private_data {
  87. /* Input variables. */
  88. const unsigned char *next_in;
  89. size_t avail_in;
  90. int bit_buffer;
  91. int bits_avail;
  92. size_t bytes_in_section;
  93. /* Output variables. */
  94. size_t out_block_size;
  95. void *out_block;
  96. /* Decompression status variables. */
  97. int use_reset_code;
  98. int end_of_stream; /* EOF status. */
  99. int maxcode; /* Largest code. */
  100. int maxcode_bits; /* Length of largest code. */
  101. int section_end_code; /* When to increase bits. */
  102. int bits; /* Current code length. */
  103. int oldcode; /* Previous code. */
  104. int finbyte; /* Last byte of prev code. */
  105. /* Dictionary. */
  106. int free_ent; /* Next dictionary entry. */
  107. unsigned char suffix[65536];
  108. uint16_t prefix[65536];
  109. /*
  110. * Scratch area for expanding dictionary entries. Note:
  111. * "worst" case here comes from compressing /dev/zero: the
  112. * last code in the dictionary will code a sequence of
  113. * 65536-256 zero bytes. Thus, we need stack space to expand
  114. * a 65280-byte dictionary entry. (Of course, 32640:1
  115. * compression could also be considered the "best" case. ;-)
  116. */
  117. unsigned char *stackp;
  118. unsigned char stack[65300];
  119. };
  120. static int compress_bidder_bid(struct archive_read_filter_bidder *, struct archive_read_filter *);
  121. static int compress_bidder_init(struct archive_read_filter *);
  122. static int compress_bidder_free(struct archive_read_filter_bidder *);
  123. static ssize_t compress_filter_read(struct archive_read_filter *, const void **);
  124. static int compress_filter_close(struct archive_read_filter *);
  125. static int getbits(struct archive_read_filter *, int n);
  126. static int next_code(struct archive_read_filter *);
  127. int
  128. archive_read_support_compression_compress(struct archive *_a)
  129. {
  130. struct archive_read *a = (struct archive_read *)_a;
  131. struct archive_read_filter_bidder *bidder = __archive_read_get_bidder(a);
  132. if (bidder == NULL)
  133. return (ARCHIVE_FATAL);
  134. bidder->data = NULL;
  135. bidder->bid = compress_bidder_bid;
  136. bidder->init = compress_bidder_init;
  137. bidder->options = NULL;
  138. bidder->free = compress_bidder_free;
  139. return (ARCHIVE_OK);
  140. }
  141. /*
  142. * Test whether we can handle this data.
  143. *
  144. * This logic returns zero if any part of the signature fails. It
  145. * also tries to Do The Right Thing if a very short buffer prevents us
  146. * from verifying as much as we would like.
  147. */
  148. static int
  149. compress_bidder_bid(struct archive_read_filter_bidder *self,
  150. struct archive_read_filter *filter)
  151. {
  152. const unsigned char *buffer;
  153. ssize_t avail;
  154. int bits_checked;
  155. (void)self; /* UNUSED */
  156. buffer = __archive_read_filter_ahead(filter, 2, &avail);
  157. if (buffer == NULL)
  158. return (0);
  159. bits_checked = 0;
  160. if (buffer[0] != 037) /* Verify first ID byte. */
  161. return (0);
  162. bits_checked += 8;
  163. if (buffer[1] != 0235) /* Verify second ID byte. */
  164. return (0);
  165. bits_checked += 8;
  166. /*
  167. * TODO: Verify more.
  168. */
  169. return (bits_checked);
  170. }
  171. /*
  172. * Setup the callbacks.
  173. */
  174. static int
  175. compress_bidder_init(struct archive_read_filter *self)
  176. {
  177. struct private_data *state;
  178. static const size_t out_block_size = 64 * 1024;
  179. void *out_block;
  180. int code;
  181. self->code = ARCHIVE_COMPRESSION_COMPRESS;
  182. self->name = "compress (.Z)";
  183. state = (struct private_data *)calloc(sizeof(*state), 1);
  184. out_block = malloc(out_block_size);
  185. if (state == NULL || out_block == NULL) {
  186. free(out_block);
  187. free(state);
  188. archive_set_error(&self->archive->archive, ENOMEM,
  189. "Can't allocate data for %s decompression",
  190. self->name);
  191. return (ARCHIVE_FATAL);
  192. }
  193. self->data = state;
  194. state->out_block_size = out_block_size;
  195. state->out_block = out_block;
  196. self->read = compress_filter_read;
  197. self->skip = NULL; /* not supported */
  198. self->close = compress_filter_close;
  199. /* XXX MOVE THE FOLLOWING OUT OF INIT() XXX */
  200. (void)getbits(self, 8); /* Skip first signature byte. */
  201. (void)getbits(self, 8); /* Skip second signature byte. */
  202. code = getbits(self, 8);
  203. state->maxcode_bits = code & 0x1f;
  204. state->maxcode = (1 << state->maxcode_bits);
  205. state->use_reset_code = code & 0x80;
  206. /* Initialize decompressor. */
  207. state->free_ent = 256;
  208. state->stackp = state->stack;
  209. if (state->use_reset_code)
  210. state->free_ent++;
  211. state->bits = 9;
  212. state->section_end_code = (1<<state->bits) - 1;
  213. state->oldcode = -1;
  214. for (code = 255; code >= 0; code--) {
  215. state->prefix[code] = 0;
  216. state->suffix[code] = code;
  217. }
  218. next_code(self);
  219. return (ARCHIVE_OK);
  220. }
  221. /*
  222. * Return a block of data from the decompression buffer. Decompress more
  223. * as necessary.
  224. */
  225. static ssize_t
  226. compress_filter_read(struct archive_read_filter *self, const void **pblock)
  227. {
  228. struct private_data *state;
  229. unsigned char *p, *start, *end;
  230. int ret;
  231. state = (struct private_data *)self->data;
  232. if (state->end_of_stream) {
  233. *pblock = NULL;
  234. return (0);
  235. }
  236. p = start = (unsigned char *)state->out_block;
  237. end = start + state->out_block_size;
  238. while (p < end && !state->end_of_stream) {
  239. if (state->stackp > state->stack) {
  240. *p++ = *--state->stackp;
  241. } else {
  242. ret = next_code(self);
  243. if (ret == -1)
  244. state->end_of_stream = ret;
  245. else if (ret != ARCHIVE_OK)
  246. return (ret);
  247. }
  248. }
  249. *pblock = start;
  250. return (p - start);
  251. }
  252. /*
  253. * Clean up the reader.
  254. */
  255. static int
  256. compress_bidder_free(struct archive_read_filter_bidder *self)
  257. {
  258. self->data = NULL;
  259. return (ARCHIVE_OK);
  260. }
  261. /*
  262. * Close and release the filter.
  263. */
  264. static int
  265. compress_filter_close(struct archive_read_filter *self)
  266. {
  267. struct private_data *state = (struct private_data *)self->data;
  268. free(state->out_block);
  269. free(state);
  270. return (ARCHIVE_OK);
  271. }
  272. /*
  273. * Process the next code and fill the stack with the expansion
  274. * of the code. Returns ARCHIVE_FATAL if there is a fatal I/O or
  275. * format error, ARCHIVE_EOF if we hit end of data, ARCHIVE_OK otherwise.
  276. */
  277. static int
  278. next_code(struct archive_read_filter *self)
  279. {
  280. struct private_data *state = (struct private_data *)self->data;
  281. int code, newcode;
  282. static int debug_buff[1024];
  283. static unsigned debug_index;
  284. code = newcode = getbits(self, state->bits);
  285. if (code < 0)
  286. return (code);
  287. debug_buff[debug_index++] = code;
  288. if (debug_index >= sizeof(debug_buff)/sizeof(debug_buff[0]))
  289. debug_index = 0;
  290. /* If it's a reset code, reset the dictionary. */
  291. if ((code == 256) && state->use_reset_code) {
  292. /*
  293. * The original 'compress' implementation blocked its
  294. * I/O in a manner that resulted in junk bytes being
  295. * inserted after every reset. The next section skips
  296. * this junk. (Yes, the number of *bytes* to skip is
  297. * a function of the current *bit* length.)
  298. */
  299. int skip_bytes = state->bits -
  300. (state->bytes_in_section % state->bits);
  301. skip_bytes %= state->bits;
  302. state->bits_avail = 0; /* Discard rest of this byte. */
  303. while (skip_bytes-- > 0) {
  304. code = getbits(self, 8);
  305. if (code < 0)
  306. return (code);
  307. }
  308. /* Now, actually do the reset. */
  309. state->bytes_in_section = 0;
  310. state->bits = 9;
  311. state->section_end_code = (1 << state->bits) - 1;
  312. state->free_ent = 257;
  313. state->oldcode = -1;
  314. return (next_code(self));
  315. }
  316. if (code > state->free_ent) {
  317. /* An invalid code is a fatal error. */
  318. archive_set_error(&(self->archive->archive), -1,
  319. "Invalid compressed data");
  320. return (ARCHIVE_FATAL);
  321. }
  322. /* Special case for KwKwK string. */
  323. if (code >= state->free_ent) {
  324. *state->stackp++ = state->finbyte;
  325. code = state->oldcode;
  326. }
  327. /* Generate output characters in reverse order. */
  328. while (code >= 256) {
  329. *state->stackp++ = state->suffix[code];
  330. code = state->prefix[code];
  331. }
  332. *state->stackp++ = state->finbyte = code;
  333. /* Generate the new entry. */
  334. code = state->free_ent;
  335. if (code < state->maxcode && state->oldcode >= 0) {
  336. state->prefix[code] = state->oldcode;
  337. state->suffix[code] = state->finbyte;
  338. ++state->free_ent;
  339. }
  340. if (state->free_ent > state->section_end_code) {
  341. state->bits++;
  342. state->bytes_in_section = 0;
  343. if (state->bits == state->maxcode_bits)
  344. state->section_end_code = state->maxcode;
  345. else
  346. state->section_end_code = (1 << state->bits) - 1;
  347. }
  348. /* Remember previous code. */
  349. state->oldcode = newcode;
  350. return (ARCHIVE_OK);
  351. }
  352. /*
  353. * Return next 'n' bits from stream.
  354. *
  355. * -1 indicates end of available data.
  356. */
  357. static int
  358. getbits(struct archive_read_filter *self, int n)
  359. {
  360. struct private_data *state = (struct private_data *)self->data;
  361. int code;
  362. ssize_t ret;
  363. static const int mask[] = {
  364. 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff,
  365. 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
  366. };
  367. while (state->bits_avail < n) {
  368. if (state->avail_in <= 0) {
  369. state->next_in
  370. = __archive_read_filter_ahead(self->upstream,
  371. 1, &ret);
  372. if (ret == 0)
  373. return (-1);
  374. if (ret < 0 || state->next_in == NULL)
  375. return (ARCHIVE_FATAL);
  376. state->avail_in = ret;
  377. __archive_read_filter_consume(self->upstream, ret);
  378. }
  379. state->bit_buffer |= *state->next_in++ << state->bits_avail;
  380. state->avail_in--;
  381. state->bits_avail += 8;
  382. state->bytes_in_section++;
  383. }
  384. code = state->bit_buffer;
  385. state->bit_buffer >>= n;
  386. state->bits_avail -= n;
  387. return (code & mask[n]);
  388. }