123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337 |
- /*
- Copyright (c) 1990-2008 Info-ZIP. All rights reserved.
- See the accompanying file LICENSE, version 2000-Apr-09 or later
- (the contents of which are also included in unzip.h) for terms of use.
- If, for some reason, all these files are missing, the Info-ZIP license
- also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html
- */
- /*---------------------------------------------------------------------------
- unshrink.c version 1.22 19 Mar 2008
- NOTE: This code may or may not infringe on the so-called "Welch
- patent" owned by Unisys. (From reading the patent, it appears
- that a pure LZW decompressor is *not* covered, but this claim has
- not been tested in court, and Unisys is reported to believe other-
- wise.) It is therefore the responsibility of the user to acquire
- whatever license(s) may be required for legal use of this code.
- THE INFO-ZIP GROUP DISCLAIMS ALL LIABILITY FOR USE OF THIS CODE
- IN VIOLATION OF APPLICABLE PATENT LAW.
- Shrinking is basically a dynamic LZW algorithm with allowed code sizes of
- up to 13 bits; in addition, there is provision for partial clearing of
- leaf nodes. PKWARE uses the special code 256 (decimal) to indicate a
- change in code size or a partial clear of the code tree: 256,1 for the
- former and 256,2 for the latter. [Note that partial clearing can "orphan"
- nodes: the parent-to-be can be cleared before its new child is added,
- but the child is added anyway (as an orphan, as though the parent still
- existed). When the tree fills up to the point where the parent node is
- reused, the orphan is effectively "adopted." Versions prior to 1.05 were
- affected more due to greater use of pointers (to children and siblings
- as well as parents).]
- This replacement version of unshrink.c was written from scratch. It is
- based only on the algorithms described in Mark Nelson's _The Data Compres-
- sion Book_ and in Terry Welch's original paper in the June 1984 issue of
- IEEE _Computer_; no existing source code, including any in Nelson's book,
- was used.
- Memory requirements have been reduced in this version and are now no more
- than the original Sam Smith code. This is still larger than any of the
- other algorithms: at a minimum, 8K+8K+16K (stack+values+parents) assuming
- 16-bit short ints, and this does not even include the output buffer (the
- other algorithms leave the uncompressed data in the work area, typically
- called slide[]). For machines with a 64KB data space this is a problem,
- particularly when text conversion is required and line endings have more
- than one character. UnZip's solution is to use two roughly equal halves
- of outbuf for the ASCII conversion in such a case; the "unshrink" argument
- to flush() signals that this is the case.
- For large-memory machines, a second outbuf is allocated for translations,
- but only if unshrinking and only if translations are required.
- | binary mode | text mode
- ---------------------------------------------------
- big mem | big outbuf | big outbuf + big outbuf2 <- malloc'd here
- small mem | small outbuf | half + half small outbuf
- Copyright 1994, 1995 Greg Roelofs. See the accompanying file "COPYING"
- in UnZip 5.20 (or later) source or binary distributions.
- ---------------------------------------------------------------------------*/
- #define __UNSHRINK_C /* identifies this source module */
- #define UNZIP_INTERNAL
- #include "unzip.h"
- #ifndef LZW_CLEAN
- static void partial_clear OF((__GPRO__ int lastcodeused));
- #ifdef DEBUG
- # define OUTDBG(c) \
- if ((c)<32 || (c)>=127) fprintf(stderr,"\\x%02x",(c)); else putc((c),stderr);
- #else
- # define OUTDBG(c)
- #endif
- /* HSIZE is defined as 2^13 (8192) in unzip.h (resp. unzpriv.h */
- #define BOGUSCODE 256
- #define FLAG_BITS parent /* upper bits of parent[] used as flag bits */
- #define CODE_MASK (HSIZE - 1) /* 0x1fff (lower bits are parent's index) */
- #define FREE_CODE HSIZE /* 0x2000 (code is unused or was cleared) */
- #define HAS_CHILD (HSIZE << 1) /* 0x4000 (code has a child--do not clear) */
- #define parent G.area.shrink.Parent
- #define Value G.area.shrink.value /* "value" conflicts with Pyramid ioctl.h */
- #define stack G.area.shrink.Stack
- /***********************/
- /* Function unshrink() */
- /***********************/
- int unshrink(__G)
- __GDEF
- {
- uch *stacktop = stack + (HSIZE - 1);
- register uch *newstr;
- uch finalval;
- int codesize=9, len, error;
- shrint code, oldcode, curcode;
- shrint lastfreecode;
- unsigned int outbufsiz;
- #if (defined(DLL) && !defined(NO_SLIDE_REDIR))
- /* Normally realbuf and outbuf will be the same. However, if the data
- * are redirected to a large memory buffer, realbuf will point to the
- * new location while outbuf will remain pointing to the malloc'd
- * memory buffer. */
- uch *realbuf = G.outbuf;
- #else
- # define realbuf G.outbuf
- #endif
- /*---------------------------------------------------------------------------
- Initialize various variables.
- ---------------------------------------------------------------------------*/
- lastfreecode = BOGUSCODE;
- #ifndef VMS /* VMS uses its own buffer scheme for textmode flush(). */
- #ifndef SMALL_MEM
- /* non-memory-limited machines: allocate second (large) buffer for
- * textmode conversion in flush(), but only if needed */
- if (G.pInfo->textmode && !G.outbuf2 &&
- (G.outbuf2 = (uch *)malloc(TRANSBUFSIZ)) == (uch *)NULL)
- return PK_MEM3;
- #endif
- #endif /* !VMS */
- for (code = 0; code < BOGUSCODE; ++code) {
- Value[code] = (uch)code;
- parent[code] = BOGUSCODE;
- }
- for (code = BOGUSCODE+1; code < HSIZE; ++code)
- parent[code] = FREE_CODE;
- #if (defined(DLL) && !defined(NO_SLIDE_REDIR))
- if (G.redirect_slide) { /* use normal outbuf unless we're a DLL routine */
- realbuf = G.redirect_buffer;
- outbufsiz = (unsigned)G.redirect_size;
- } else
- #endif
- #ifdef DLL
- if (G.pInfo->textmode && !G.redirect_data)
- #else
- if (G.pInfo->textmode)
- #endif
- outbufsiz = RAWBUFSIZ;
- else
- outbufsiz = OUTBUFSIZ;
- G.outptr = realbuf;
- G.outcnt = 0L;
- /*---------------------------------------------------------------------------
- Get and output first code, then loop over remaining ones.
- ---------------------------------------------------------------------------*/
- READBITS(codesize, oldcode)
- if (G.zipeof)
- return PK_OK;
- finalval = (uch)oldcode;
- OUTDBG(finalval)
- *G.outptr++ = finalval;
- ++G.outcnt;
- while (TRUE) {
- READBITS(codesize, code)
- if (G.zipeof)
- break;
- if (code == BOGUSCODE) { /* possible to have consecutive escapes? */
- READBITS(codesize, code)
- if (G.zipeof)
- break;
- if (code == 1) {
- ++codesize;
- Trace((stderr, " (codesize now %d bits)\n", codesize));
- if (codesize > MAX_BITS) return PK_ERR;
- } else if (code == 2) {
- Trace((stderr, " (partial clear code)\n"));
- /* clear leafs (nodes with no children) */
- partial_clear(__G__ lastfreecode);
- Trace((stderr, " (done with partial clear)\n"));
- lastfreecode = BOGUSCODE; /* reset start of free-node search */
- }
- continue;
- }
- /*-----------------------------------------------------------------------
- Translate code: traverse tree from leaf back to root.
- -----------------------------------------------------------------------*/
- newstr = stacktop;
- curcode = code;
- if (parent[code] == FREE_CODE) {
- /* or (FLAG_BITS[code] & FREE_CODE)? */
- Trace((stderr, " (found a KwKwK code %d; oldcode = %d)\n", code,
- oldcode));
- *newstr-- = finalval;
- code = oldcode;
- }
- while (code != BOGUSCODE) {
- if (newstr < stack) {
- /* Bogus compression stream caused buffer underflow! */
- Trace((stderr, "unshrink stack overflow!\n"));
- return PK_ERR;
- }
- if (parent[code] == FREE_CODE) {
- /* or (FLAG_BITS[code] & FREE_CODE)? */
- Trace((stderr, " (found a KwKwK code %d; oldcode = %d)\n",
- code, oldcode));
- *newstr-- = finalval;
- code = oldcode;
- } else {
- *newstr-- = Value[code];
- code = (shrint)(parent[code] & CODE_MASK);
- }
- }
- len = (int)(stacktop - newstr++);
- finalval = *newstr;
- /*-----------------------------------------------------------------------
- Write expanded string in reverse order to output buffer.
- -----------------------------------------------------------------------*/
- Trace((stderr,
- "code %4d; oldcode %4d; char %3d (%c); len %d; string [", curcode,
- oldcode, (int)(*newstr), (*newstr<32 || *newstr>=127)? ' ':*newstr,
- len));
- {
- register uch *p;
- for (p = newstr; p < newstr+len; ++p) {
- *G.outptr++ = *p;
- OUTDBG(*p)
- if (++G.outcnt == outbufsiz) {
- Trace((stderr, "doing flush(), outcnt = %lu\n", G.outcnt));
- if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) {
- Trace((stderr, "unshrink: flush() error (%d)\n",
- error));
- return error;
- }
- G.outptr = realbuf;
- G.outcnt = 0L;
- Trace((stderr, "done with flush()\n"));
- }
- }
- }
- /*-----------------------------------------------------------------------
- Add new leaf (first character of newstr) to tree as child of oldcode.
- -----------------------------------------------------------------------*/
- /* search for freecode */
- code = (shrint)(lastfreecode + 1);
- /* add if-test before loop for speed? */
- while ((code < HSIZE) && (parent[code] != FREE_CODE))
- ++code;
- lastfreecode = code;
- Trace((stderr, "]; newcode %d\n", code));
- if (code >= HSIZE)
- /* invalid compressed data caused max-code overflow! */
- return PK_ERR;
- Value[code] = finalval;
- parent[code] = oldcode;
- oldcode = curcode;
- }
- /*---------------------------------------------------------------------------
- Flush any remaining data and return to sender...
- ---------------------------------------------------------------------------*/
- if (G.outcnt > 0L) {
- Trace((stderr, "doing final flush(), outcnt = %lu\n", G.outcnt));
- if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) {
- Trace((stderr, "unshrink: flush() error (%d)\n", error));
- return error;
- }
- Trace((stderr, "done with flush()\n"));
- }
- return PK_OK;
- } /* end function unshrink() */
- /****************************/
- /* Function partial_clear() */ /* no longer recursive... */
- /****************************/
- static void partial_clear(__G__ lastcodeused)
- __GDEF
- int lastcodeused;
- {
- register shrint code;
- /* clear all nodes which have no children (i.e., leaf nodes only) */
- /* first loop: mark each parent as such */
- for (code = BOGUSCODE+1; code <= lastcodeused; ++code) {
- register shrint cparent = (shrint)(parent[code] & CODE_MASK);
- if (cparent > BOGUSCODE)
- FLAG_BITS[cparent] |= HAS_CHILD; /* set parent's child-bit */
- }
- /* second loop: clear all nodes *not* marked as parents; reset flag bits */
- for (code = BOGUSCODE+1; code <= lastcodeused; ++code) {
- if (FLAG_BITS[code] & HAS_CHILD) /* just clear child-bit */
- FLAG_BITS[code] &= ~HAS_CHILD;
- else { /* leaf: lose it */
- Trace((stderr, "%d\n", code));
- parent[code] = FREE_CODE;
- }
- }
- return;
- }
- #endif /* !LZW_CLEAN */
|