123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547 |
- /* dictziplib.c --
- * http://stardict.sourceforge.net
- * Copyright (C) 2003-2003 Hu Zheng <huzheng_001@163.com>
- * This file is a modify version of dictd-1.9.7's data.c
- *
- * data.c --
- * Created: Tue Jul 16 12:45:41 1996 by faith@dict.org
- * Revised: Sat Mar 30 10:46:06 2002 by faith@dict.org
- * Copyright 1996, 1997, 1998, 2000, 2002 Rickard E. Faith (faith@dict.org)
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Library General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
- //#define HAVE_MMAP //it will defined in config.h. this can be done by configure.in with a AC_FUNC_MMAP.
- #ifdef HAVE_CONFIG_H
- # include "config.h"
- #endif
- #include <QtGlobal>
- #include <cassert>
- #include <cstdio>
- #include <cstdlib>
- #include <cstring>
- #ifdef Q_OS_UNIX
- #include <unistd.h>
- #endif
- #ifdef Q_OS_WIN32
- #include <io.h>
- #endif
- #include <limits.h>
- #include <fcntl.h>
- #include <sys/stat.h>
- #include "dictziplib.hpp"
- #define USE_CACHE 1
- #define BUFFERSIZE 10240
- /*
- * Output buffer must be greater than or
- * equal to 110% of input buffer size, plus
- * 12 bytes.
- */
- #define OUT_BUFFER_SIZE 0xffffL
- #define IN_BUFFER_SIZE ((unsigned long)((double)(OUT_BUFFER_SIZE - 12) * 0.89))
- /* For gzip-compatible header, as defined in RFC 1952 */
- /* Magic for GZIP (rfc1952) */
- #define GZ_MAGIC1 0x1f /* First magic byte */
- #define GZ_MAGIC2 0x8b /* Second magic byte */
- /* FLaGs (bitmapped), from rfc1952 */
- #define GZ_FTEXT 0x01 /* Set for ASCII text */
- #define GZ_FHCRC 0x02 /* Header CRC16 */
- #define GZ_FEXTRA 0x04 /* Optional field (random access index) */
- #define GZ_FNAME 0x08 /* Original name */
- #define GZ_COMMENT 0x10 /* Zero-terminated, human-readable comment */
- #define GZ_MAX 2 /* Maximum compression */
- #define GZ_FAST 4 /* Fasted compression */
- /* These are from rfc1952 */
- #define GZ_OS_FAT 0 /* FAT filesystem (MS-DOS, OS/2, NT/Win32) */
- #define GZ_OS_AMIGA 1 /* Amiga */
- #define GZ_OS_VMS 2 /* VMS (or OpenVMS) */
- #define GZ_OS_UNIX 3 /* Unix */
- #define GZ_OS_VMCMS 4 /* VM/CMS */
- #define GZ_OS_ATARI 5 /* Atari TOS */
- #define GZ_OS_HPFS 6 /* HPFS filesystem (OS/2, NT) */
- #define GZ_OS_MAC 7 /* Macintosh */
- #define GZ_OS_Z 8 /* Z-System */
- #define GZ_OS_CPM 9 /* CP/M */
- #define GZ_OS_TOPS20 10 /* TOPS-20 */
- #define GZ_OS_NTFS 11 /* NTFS filesystem (NT) */
- #define GZ_OS_QDOS 12 /* QDOS */
- #define GZ_OS_ACORN 13 /* Acorn RISCOS */
- #define GZ_OS_UNKNOWN 255 /* unknown */
- #define GZ_RND_S1 'R' /* First magic for random access format */
- #define GZ_RND_S2 'A' /* Second magic for random access format */
- #define GZ_ID1 0 /* GZ_MAGIC1 */
- #define GZ_ID2 1 /* GZ_MAGIC2 */
- #define GZ_CM 2 /* Compression Method (Z_DEFALTED) */
- #define GZ_FLG 3 /* FLaGs (see above) */
- #define GZ_MTIME 4 /* Modification TIME */
- #define GZ_XFL 8 /* eXtra FLags (GZ_MAX or GZ_FAST) */
- #define GZ_OS 9 /* Operating System */
- #define GZ_XLEN 10 /* eXtra LENgth (16bit) */
- #define GZ_FEXTRA_START 12 /* Start of extra fields */
- #define GZ_SI1 12 /* Subfield ID1 */
- #define GZ_SI2 13 /* Subfield ID2 */
- #define GZ_SUBLEN 14 /* Subfield length (16bit) */
- #define GZ_VERSION 16 /* Version for subfield format */
- #define GZ_CHUNKLEN 18 /* Chunk length (16bit) */
- #define GZ_CHUNKCNT 20 /* Number of chunks (16bit) */
- #define GZ_RNDDATA 22 /* Random access data (16bit) */
- #define DICT_UNKNOWN 0
- #define DICT_TEXT 1
- #define DICT_GZIP 2
- #define DICT_DZIP 3
- int dictData::read_header(const std::string &fname, int computeCRC)
- {
- FILE *str;
- int id1, id2, si1, si2;
- char buffer[BUFFERSIZE];
- int extraLength, subLength;
- int i;
- char *pt;
- int c;
- struct stat sb;
- unsigned long crc = crc32( 0L, Z_NULL, 0 );
- int count;
- unsigned long offset;
- if (!(str = fopen(fname.c_str(), "rb")))
- {
- //err_fatal_errno( __FUNCTION__,
- // "Cannot open data file \"%s\" for read\n", filename );
- }
- this->headerLength = GZ_XLEN - 1;
- this->type = DICT_UNKNOWN;
- id1 = getc( str );
- id2 = getc( str );
- if (id1 != GZ_MAGIC1 || id2 != GZ_MAGIC2)
- {
- this->type = DICT_TEXT;
- fstat( fileno( str ), &sb );
- this->compressedLength = this->length = sb.st_size;
- this->origFilename = fname;
- this->mtime = sb.st_mtime;
- if (computeCRC)
- {
- rewind( str );
- while (!feof( str ))
- {
- if ((count = fread( buffer, 1, BUFFERSIZE, str )))
- {
- crc = crc32(crc, (Bytef *)buffer, count);
- }
- }
- }
- this->crc = crc;
- fclose( str );
- return 0;
- }
- this->type = DICT_GZIP;
- this->method = getc( str );
- this->flags = getc( str );
- this->mtime = getc( str ) << 0;
- this->mtime |= getc( str ) << 8;
- this->mtime |= getc( str ) << 16;
- this->mtime |= getc( str ) << 24;
- this->extraFlags = getc( str );
- this->os = getc( str );
- if (this->flags & GZ_FEXTRA)
- {
- extraLength = getc( str ) << 0;
- extraLength |= getc( str ) << 8;
- this->headerLength += extraLength + 2;
- si1 = getc( str );
- si2 = getc( str );
- if (si1 == GZ_RND_S1 || si2 == GZ_RND_S2)
- {
- subLength = getc( str ) << 0;
- subLength |= getc( str ) << 8;
- this->version = getc( str ) << 0;
- this->version |= getc( str ) << 8;
- if (this->version != 1)
- {
- //err_internal( __FUNCTION__,
- // "dzip header version %d not supported\n",
- // this->version );
- }
- this->chunkLength = getc( str ) << 0;
- this->chunkLength |= getc( str ) << 8;
- this->chunkCount = getc( str ) << 0;
- this->chunkCount |= getc( str ) << 8;
- if (this->chunkCount <= 0)
- {
- fclose( str );
- return 5;
- }
- this->chunks = (int *)malloc(sizeof( this->chunks[0] )
- * this->chunkCount );
- for (i = 0; i < this->chunkCount; i++)
- {
- this->chunks[i] = getc( str ) << 0;
- this->chunks[i] |= getc( str ) << 8;
- }
- this->type = DICT_DZIP;
- }
- else
- {
- fseek( str, this->headerLength, SEEK_SET );
- }
- }
- if (this->flags & GZ_FNAME)
- { /* FIXME! Add checking against header len */
- pt = buffer;
- while ((c = getc( str )) && c != EOF)
- * pt++ = c;
- *pt = '\0';
- this->origFilename = buffer;
- this->headerLength += this->origFilename.length() + 1;
- }
- else
- {
- this->origFilename = "";
- }
- if (this->flags & GZ_COMMENT)
- { /* FIXME! Add checking for header len */
- pt = buffer;
- while ((c = getc( str )) && c != EOF)
- * pt++ = c;
- *pt = '\0';
- comment = buffer;
- headerLength += comment.length() + 1;
- }
- else
- {
- comment = "";
- }
- if (this->flags & GZ_FHCRC)
- {
- getc( str );
- getc( str );
- this->headerLength += 2;
- }
- if (ftell( str ) != this->headerLength + 1)
- {
- //err_internal( __FUNCTION__,
- // "File position (%lu) != header length + 1 (%d)\n",
- // ftell( str ), this->headerLength + 1 );
- }
- fseek( str, -8, SEEK_END );
- this->crc = getc( str ) << 0;
- this->crc |= getc( str ) << 8;
- this->crc |= getc( str ) << 16;
- this->crc |= getc( str ) << 24;
- this->length = getc( str ) << 0;
- this->length |= getc( str ) << 8;
- this->length |= getc( str ) << 16;
- this->length |= getc( str ) << 24;
- this->compressedLength = ftell( str );
- /* Compute offsets */
- this->offsets = (unsigned long *)malloc( sizeof( this->offsets[0] )
- * this->chunkCount );
- for (offset = this->headerLength + 1, i = 0;
- i < this->chunkCount;
- i++)
- {
- this->offsets[i] = offset;
- offset += this->chunks[i];
- }
- fclose( str );
- return 0;
- }
- bool dictData::open(const std::string& fname, int computeCRC)
- {
- struct stat sb;
- int j;
- int fd;
- this->initialized = 0;
- #ifdef Q_OS_UNIX
- if (stat(fname.c_str(), &sb) || !S_ISREG(sb.st_mode))
- #elif defined(Q_OS_WIN32)
- if (_stat(fname.c_str(), &sb) || !(sb.stMode & _S_IFREG))
- #endif
- {
- //err_warning( __FUNCTION__,
- // "%s is not a regular file -- ignoring\n", fname );
- return false;
- }
- if (read_header(fname, computeCRC))
- {
- //err_fatal( __FUNCTION__,
- // "\"%s\" not in text or dzip format\n", fname );
- return false;
- }
- if ((fd = ::open(fname.c_str(), O_RDONLY )) < 0)
- {
- //err_fatal_errno( __FUNCTION__,
- // "Cannot open data file \"%s\"\n", fname );
- return false;
- }
- if (fstat(fd, &sb))
- {
- //err_fatal_errno( __FUNCTION__,
- // "Cannot stat data file \"%s\"\n", fname );
- return false;
- }
- this->size = sb.st_size;
- ::close(fd);
- if (!mapfile.open(fname.c_str(), size))
- return false;
- this->start = mapfile.begin();
- this->end = this->start + this->size;
- for (j = 0; j < DICT_CACHE_SIZE; j++)
- {
- cache[j].chunk = -1;
- cache[j].stamp = -1;
- cache[j].inBuffer = NULL;
- cache[j].count = 0;
- }
- return true;
- }
- void dictData::close()
- {
- int i;
- if (this->chunks)
- free(this->chunks);
- if (this->offsets)
- free(this->offsets);
- if (this->initialized)
- {
- if (inflateEnd( &this->zStream ))
- {
- //err_internal( __FUNCTION__,
- // "Cannot shut down inflation engine: %s\n",
- // this->zStream.msg );
- }
- }
- for (i = 0; i < DICT_CACHE_SIZE; ++i)
- {
- if (this -> cache [i].inBuffer)
- free (this -> cache [i].inBuffer);
- }
- }
- void dictData::read(char *buffer, unsigned long start, unsigned long size)
- {
- char *pt;
- unsigned long end;
- int count;
- char *inBuffer;
- char outBuffer[OUT_BUFFER_SIZE];
- int firstChunk, lastChunk;
- int firstOffset, lastOffset;
- int i, j;
- int found, target, lastStamp;
- static int stamp = 0;
- end = start + size;
- //buffer = malloc( size + 1 );
- //PRINTF(DBG_UNZIP,
- // ("dict_data_read( %p, %lu, %lu )\n",
- //h, start, size ));
- switch (this->type)
- {
- case DICT_GZIP:
- //err_fatal( __FUNCTION__,
- // "Cannot seek on pure gzip format files.\n"
- // "Use plain text (for performance)"
- // " or dzip format (for space savings).\n" );
- break;
- case DICT_TEXT:
- memcpy( buffer, this->start + start, size );
- //buffer[size] = '\0';
- break;
- case DICT_DZIP:
- if (!this->initialized)
- {
- ++this->initialized;
- this->zStream.zalloc = NULL;
- this->zStream.zfree = NULL;
- this->zStream.opaque = NULL;
- this->zStream.next_in = 0;
- this->zStream.avail_in = 0;
- this->zStream.next_out = NULL;
- this->zStream.avail_out = 0;
- if (inflateInit2( &this->zStream, -15 ) != Z_OK)
- {
- //err_internal( __FUNCTION__,
- // "Cannot initialize inflation engine: %s\n",
- //this->zStream.msg );
- }
- }
- firstChunk = start / this->chunkLength;
- firstOffset = start - firstChunk * this->chunkLength;
- lastChunk = end / this->chunkLength;
- lastOffset = end - lastChunk * this->chunkLength;
- //PRINTF(DBG_UNZIP,
- // (" start = %lu, end = %lu\n"
- //"firstChunk = %d, firstOffset = %d,"
- //" lastChunk = %d, lastOffset = %d\n",
- //start, end, firstChunk, firstOffset, lastChunk, lastOffset ));
- for (pt = buffer, i = firstChunk; i <= lastChunk; i++)
- {
- /* Access cache */
- found = 0;
- target = 0;
- lastStamp = INT_MAX;
- for (j = 0; j < DICT_CACHE_SIZE; j++)
- {
- #if USE_CACHE
- if (this->cache[j].chunk == i)
- {
- found = 1;
- target = j;
- break;
- }
- #endif
- if (this->cache[j].stamp < lastStamp)
- {
- lastStamp = this->cache[j].stamp;
- target = j;
- }
- }
- this->cache[target].stamp = ++stamp;
- if (found)
- {
- count = this->cache[target].count;
- inBuffer = this->cache[target].inBuffer;
- }
- else
- {
- this->cache[target].chunk = i;
- if (!this->cache[target].inBuffer)
- this->cache[target].inBuffer = (char *)malloc( IN_BUFFER_SIZE );
- inBuffer = this->cache[target].inBuffer;
- if (this->chunks[i] >= OUT_BUFFER_SIZE )
- {
- //err_internal( __FUNCTION__,
- // "this->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n",
- // i, this->chunks[i], OUT_BUFFER_SIZE );
- }
- memcpy( outBuffer, this->start + this->offsets[i], this->chunks[i] );
- this->zStream.next_in = (Bytef *)outBuffer;
- this->zStream.avail_in = this->chunks[i];
- this->zStream.next_out = (Bytef *)inBuffer;
- this->zStream.avail_out = IN_BUFFER_SIZE;
- if (inflate( &this->zStream, Z_PARTIAL_FLUSH ) != Z_OK)
- {
- //err_fatal( __FUNCTION__, "inflate: %s\n", this->zStream.msg );
- }
- if (this->zStream.avail_in)
- {
- //err_internal( __FUNCTION__,
- // "inflate did not flush (%d pending, %d avail)\n",
- // this->zStream.avail_in, this->zStream.avail_out );
- }
- count = IN_BUFFER_SIZE - this->zStream.avail_out;
- this->cache[target].count = count;
- }
- if (i == firstChunk)
- {
- if (i == lastChunk)
- {
- memcpy( pt, inBuffer + firstOffset, lastOffset - firstOffset);
- pt += lastOffset - firstOffset;
- }
- else
- {
- if (count != this->chunkLength )
- {
- //err_internal( __FUNCTION__,
- // "Length = %d instead of %d\n",
- //count, this->chunkLength );
- }
- memcpy( pt, inBuffer + firstOffset,
- this->chunkLength - firstOffset );
- pt += this->chunkLength - firstOffset;
- }
- }
- else if (i == lastChunk)
- {
- memcpy( pt, inBuffer, lastOffset );
- pt += lastOffset;
- }
- else
- {
- assert( count == this->chunkLength );
- memcpy( pt, inBuffer, this->chunkLength );
- pt += this->chunkLength;
- }
- }
- //*pt = '\0';
- break;
- case DICT_UNKNOWN:
- //err_fatal( __FUNCTION__, "Cannot read unknown file type\n" );
- break;
- }
- }
|