dictziplib.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. /* dictziplib.c --
  2. * http://stardict.sourceforge.net
  3. * Copyright (C) 2003-2003 Hu Zheng <huzheng_001@163.com>
  4. * This file is a modify version of dictd-1.9.7's data.c
  5. *
  6. * data.c --
  7. * Created: Tue Jul 16 12:45:41 1996 by faith@dict.org
  8. * Revised: Sat Mar 30 10:46:06 2002 by faith@dict.org
  9. * Copyright 1996, 1997, 1998, 2000, 2002 Rickard E. Faith (faith@dict.org)
  10. *
  11. *
  12. * This program is free software; you can redistribute it and/or modify
  13. * it under the terms of the GNU General Public License as published by
  14. * the Free Software Foundation; either version 2 of the License, or
  15. * (at your option) any later version.
  16. *
  17. * This program is distributed in the hope that it will be useful,
  18. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  20. * GNU Library General Public License for more details.
  21. *
  22. * You should have received a copy of the GNU General Public License
  23. * along with this program; if not, write to the Free Software
  24. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  25. */
  26. //#define HAVE_MMAP //it will defined in config.h. this can be done by configure.in with a AC_FUNC_MMAP.
  27. #ifdef HAVE_CONFIG_H
  28. # include "config.h"
  29. #endif
  30. #include <QtGlobal>
  31. #include <cassert>
  32. #include <cstdio>
  33. #include <cstdlib>
  34. #include <cstring>
  35. #ifdef Q_OS_UNIX
  36. #include <unistd.h>
  37. #endif
  38. #ifdef Q_OS_WIN32
  39. #include <io.h>
  40. #endif
  41. #include <limits.h>
  42. #include <fcntl.h>
  43. #include <sys/stat.h>
  44. #include "dictziplib.hpp"
  45. #define USE_CACHE 1
  46. #define BUFFERSIZE 10240
  47. /*
  48. * Output buffer must be greater than or
  49. * equal to 110% of input buffer size, plus
  50. * 12 bytes.
  51. */
  52. #define OUT_BUFFER_SIZE 0xffffL
  53. #define IN_BUFFER_SIZE ((unsigned long)((double)(OUT_BUFFER_SIZE - 12) * 0.89))
  54. /* For gzip-compatible header, as defined in RFC 1952 */
  55. /* Magic for GZIP (rfc1952) */
  56. #define GZ_MAGIC1 0x1f /* First magic byte */
  57. #define GZ_MAGIC2 0x8b /* Second magic byte */
  58. /* FLaGs (bitmapped), from rfc1952 */
  59. #define GZ_FTEXT 0x01 /* Set for ASCII text */
  60. #define GZ_FHCRC 0x02 /* Header CRC16 */
  61. #define GZ_FEXTRA 0x04 /* Optional field (random access index) */
  62. #define GZ_FNAME 0x08 /* Original name */
  63. #define GZ_COMMENT 0x10 /* Zero-terminated, human-readable comment */
  64. #define GZ_MAX 2 /* Maximum compression */
  65. #define GZ_FAST 4 /* Fasted compression */
  66. /* These are from rfc1952 */
  67. #define GZ_OS_FAT 0 /* FAT filesystem (MS-DOS, OS/2, NT/Win32) */
  68. #define GZ_OS_AMIGA 1 /* Amiga */
  69. #define GZ_OS_VMS 2 /* VMS (or OpenVMS) */
  70. #define GZ_OS_UNIX 3 /* Unix */
  71. #define GZ_OS_VMCMS 4 /* VM/CMS */
  72. #define GZ_OS_ATARI 5 /* Atari TOS */
  73. #define GZ_OS_HPFS 6 /* HPFS filesystem (OS/2, NT) */
  74. #define GZ_OS_MAC 7 /* Macintosh */
  75. #define GZ_OS_Z 8 /* Z-System */
  76. #define GZ_OS_CPM 9 /* CP/M */
  77. #define GZ_OS_TOPS20 10 /* TOPS-20 */
  78. #define GZ_OS_NTFS 11 /* NTFS filesystem (NT) */
  79. #define GZ_OS_QDOS 12 /* QDOS */
  80. #define GZ_OS_ACORN 13 /* Acorn RISCOS */
  81. #define GZ_OS_UNKNOWN 255 /* unknown */
  82. #define GZ_RND_S1 'R' /* First magic for random access format */
  83. #define GZ_RND_S2 'A' /* Second magic for random access format */
  84. #define GZ_ID1 0 /* GZ_MAGIC1 */
  85. #define GZ_ID2 1 /* GZ_MAGIC2 */
  86. #define GZ_CM 2 /* Compression Method (Z_DEFALTED) */
  87. #define GZ_FLG 3 /* FLaGs (see above) */
  88. #define GZ_MTIME 4 /* Modification TIME */
  89. #define GZ_XFL 8 /* eXtra FLags (GZ_MAX or GZ_FAST) */
  90. #define GZ_OS 9 /* Operating System */
  91. #define GZ_XLEN 10 /* eXtra LENgth (16bit) */
  92. #define GZ_FEXTRA_START 12 /* Start of extra fields */
  93. #define GZ_SI1 12 /* Subfield ID1 */
  94. #define GZ_SI2 13 /* Subfield ID2 */
  95. #define GZ_SUBLEN 14 /* Subfield length (16bit) */
  96. #define GZ_VERSION 16 /* Version for subfield format */
  97. #define GZ_CHUNKLEN 18 /* Chunk length (16bit) */
  98. #define GZ_CHUNKCNT 20 /* Number of chunks (16bit) */
  99. #define GZ_RNDDATA 22 /* Random access data (16bit) */
  100. #define DICT_UNKNOWN 0
  101. #define DICT_TEXT 1
  102. #define DICT_GZIP 2
  103. #define DICT_DZIP 3
  104. int dictData::read_header(const std::string &fname, int computeCRC)
  105. {
  106. FILE *str;
  107. int id1, id2, si1, si2;
  108. char buffer[BUFFERSIZE];
  109. int extraLength, subLength;
  110. int i;
  111. char *pt;
  112. int c;
  113. struct stat sb;
  114. unsigned long crc = crc32( 0L, Z_NULL, 0 );
  115. int count;
  116. unsigned long offset;
  117. if (!(str = fopen(fname.c_str(), "rb")))
  118. {
  119. //err_fatal_errno( __FUNCTION__,
  120. // "Cannot open data file \"%s\" for read\n", filename );
  121. }
  122. this->headerLength = GZ_XLEN - 1;
  123. this->type = DICT_UNKNOWN;
  124. id1 = getc( str );
  125. id2 = getc( str );
  126. if (id1 != GZ_MAGIC1 || id2 != GZ_MAGIC2)
  127. {
  128. this->type = DICT_TEXT;
  129. fstat( fileno( str ), &sb );
  130. this->compressedLength = this->length = sb.st_size;
  131. this->origFilename = fname;
  132. this->mtime = sb.st_mtime;
  133. if (computeCRC)
  134. {
  135. rewind( str );
  136. while (!feof( str ))
  137. {
  138. if ((count = fread( buffer, 1, BUFFERSIZE, str )))
  139. {
  140. crc = crc32(crc, (Bytef *)buffer, count);
  141. }
  142. }
  143. }
  144. this->crc = crc;
  145. fclose( str );
  146. return 0;
  147. }
  148. this->type = DICT_GZIP;
  149. this->method = getc( str );
  150. this->flags = getc( str );
  151. this->mtime = getc( str ) << 0;
  152. this->mtime |= getc( str ) << 8;
  153. this->mtime |= getc( str ) << 16;
  154. this->mtime |= getc( str ) << 24;
  155. this->extraFlags = getc( str );
  156. this->os = getc( str );
  157. if (this->flags & GZ_FEXTRA)
  158. {
  159. extraLength = getc( str ) << 0;
  160. extraLength |= getc( str ) << 8;
  161. this->headerLength += extraLength + 2;
  162. si1 = getc( str );
  163. si2 = getc( str );
  164. if (si1 == GZ_RND_S1 || si2 == GZ_RND_S2)
  165. {
  166. subLength = getc( str ) << 0;
  167. subLength |= getc( str ) << 8;
  168. this->version = getc( str ) << 0;
  169. this->version |= getc( str ) << 8;
  170. if (this->version != 1)
  171. {
  172. //err_internal( __FUNCTION__,
  173. // "dzip header version %d not supported\n",
  174. // this->version );
  175. }
  176. this->chunkLength = getc( str ) << 0;
  177. this->chunkLength |= getc( str ) << 8;
  178. this->chunkCount = getc( str ) << 0;
  179. this->chunkCount |= getc( str ) << 8;
  180. if (this->chunkCount <= 0)
  181. {
  182. fclose( str );
  183. return 5;
  184. }
  185. this->chunks = (int *)malloc(sizeof( this->chunks[0] )
  186. * this->chunkCount );
  187. for (i = 0; i < this->chunkCount; i++)
  188. {
  189. this->chunks[i] = getc( str ) << 0;
  190. this->chunks[i] |= getc( str ) << 8;
  191. }
  192. this->type = DICT_DZIP;
  193. }
  194. else
  195. {
  196. fseek( str, this->headerLength, SEEK_SET );
  197. }
  198. }
  199. if (this->flags & GZ_FNAME)
  200. { /* FIXME! Add checking against header len */
  201. pt = buffer;
  202. while ((c = getc( str )) && c != EOF)
  203. * pt++ = c;
  204. *pt = '\0';
  205. this->origFilename = buffer;
  206. this->headerLength += this->origFilename.length() + 1;
  207. }
  208. else
  209. {
  210. this->origFilename = "";
  211. }
  212. if (this->flags & GZ_COMMENT)
  213. { /* FIXME! Add checking for header len */
  214. pt = buffer;
  215. while ((c = getc( str )) && c != EOF)
  216. * pt++ = c;
  217. *pt = '\0';
  218. comment = buffer;
  219. headerLength += comment.length() + 1;
  220. }
  221. else
  222. {
  223. comment = "";
  224. }
  225. if (this->flags & GZ_FHCRC)
  226. {
  227. getc( str );
  228. getc( str );
  229. this->headerLength += 2;
  230. }
  231. if (ftell( str ) != this->headerLength + 1)
  232. {
  233. //err_internal( __FUNCTION__,
  234. // "File position (%lu) != header length + 1 (%d)\n",
  235. // ftell( str ), this->headerLength + 1 );
  236. }
  237. fseek( str, -8, SEEK_END );
  238. this->crc = getc( str ) << 0;
  239. this->crc |= getc( str ) << 8;
  240. this->crc |= getc( str ) << 16;
  241. this->crc |= getc( str ) << 24;
  242. this->length = getc( str ) << 0;
  243. this->length |= getc( str ) << 8;
  244. this->length |= getc( str ) << 16;
  245. this->length |= getc( str ) << 24;
  246. this->compressedLength = ftell( str );
  247. /* Compute offsets */
  248. this->offsets = (unsigned long *)malloc( sizeof( this->offsets[0] )
  249. * this->chunkCount );
  250. for (offset = this->headerLength + 1, i = 0;
  251. i < this->chunkCount;
  252. i++)
  253. {
  254. this->offsets[i] = offset;
  255. offset += this->chunks[i];
  256. }
  257. fclose( str );
  258. return 0;
  259. }
  260. bool dictData::open(const std::string& fname, int computeCRC)
  261. {
  262. struct stat sb;
  263. int j;
  264. int fd;
  265. this->initialized = 0;
  266. #ifdef Q_OS_UNIX
  267. if (stat(fname.c_str(), &sb) || !S_ISREG(sb.st_mode))
  268. #elif defined(Q_OS_WIN32)
  269. if (_stat(fname.c_str(), &sb) || !(sb.stMode & _S_IFREG))
  270. #endif
  271. {
  272. //err_warning( __FUNCTION__,
  273. // "%s is not a regular file -- ignoring\n", fname );
  274. return false;
  275. }
  276. if (read_header(fname, computeCRC))
  277. {
  278. //err_fatal( __FUNCTION__,
  279. // "\"%s\" not in text or dzip format\n", fname );
  280. return false;
  281. }
  282. if ((fd = ::open(fname.c_str(), O_RDONLY )) < 0)
  283. {
  284. //err_fatal_errno( __FUNCTION__,
  285. // "Cannot open data file \"%s\"\n", fname );
  286. return false;
  287. }
  288. if (fstat(fd, &sb))
  289. {
  290. //err_fatal_errno( __FUNCTION__,
  291. // "Cannot stat data file \"%s\"\n", fname );
  292. return false;
  293. }
  294. this->size = sb.st_size;
  295. ::close(fd);
  296. if (!mapfile.open(fname.c_str(), size))
  297. return false;
  298. this->start = mapfile.begin();
  299. this->end = this->start + this->size;
  300. for (j = 0; j < DICT_CACHE_SIZE; j++)
  301. {
  302. cache[j].chunk = -1;
  303. cache[j].stamp = -1;
  304. cache[j].inBuffer = NULL;
  305. cache[j].count = 0;
  306. }
  307. return true;
  308. }
  309. void dictData::close()
  310. {
  311. int i;
  312. if (this->chunks)
  313. free(this->chunks);
  314. if (this->offsets)
  315. free(this->offsets);
  316. if (this->initialized)
  317. {
  318. if (inflateEnd( &this->zStream ))
  319. {
  320. //err_internal( __FUNCTION__,
  321. // "Cannot shut down inflation engine: %s\n",
  322. // this->zStream.msg );
  323. }
  324. }
  325. for (i = 0; i < DICT_CACHE_SIZE; ++i)
  326. {
  327. if (this -> cache [i].inBuffer)
  328. free (this -> cache [i].inBuffer);
  329. }
  330. }
  331. void dictData::read(char *buffer, unsigned long start, unsigned long size)
  332. {
  333. char *pt;
  334. unsigned long end;
  335. int count;
  336. char *inBuffer;
  337. char outBuffer[OUT_BUFFER_SIZE];
  338. int firstChunk, lastChunk;
  339. int firstOffset, lastOffset;
  340. int i, j;
  341. int found, target, lastStamp;
  342. static int stamp = 0;
  343. end = start + size;
  344. //buffer = malloc( size + 1 );
  345. //PRINTF(DBG_UNZIP,
  346. // ("dict_data_read( %p, %lu, %lu )\n",
  347. //h, start, size ));
  348. switch (this->type)
  349. {
  350. case DICT_GZIP:
  351. //err_fatal( __FUNCTION__,
  352. // "Cannot seek on pure gzip format files.\n"
  353. // "Use plain text (for performance)"
  354. // " or dzip format (for space savings).\n" );
  355. break;
  356. case DICT_TEXT:
  357. memcpy( buffer, this->start + start, size );
  358. //buffer[size] = '\0';
  359. break;
  360. case DICT_DZIP:
  361. if (!this->initialized)
  362. {
  363. ++this->initialized;
  364. this->zStream.zalloc = NULL;
  365. this->zStream.zfree = NULL;
  366. this->zStream.opaque = NULL;
  367. this->zStream.next_in = 0;
  368. this->zStream.avail_in = 0;
  369. this->zStream.next_out = NULL;
  370. this->zStream.avail_out = 0;
  371. if (inflateInit2( &this->zStream, -15 ) != Z_OK)
  372. {
  373. //err_internal( __FUNCTION__,
  374. // "Cannot initialize inflation engine: %s\n",
  375. //this->zStream.msg );
  376. }
  377. }
  378. firstChunk = start / this->chunkLength;
  379. firstOffset = start - firstChunk * this->chunkLength;
  380. lastChunk = end / this->chunkLength;
  381. lastOffset = end - lastChunk * this->chunkLength;
  382. //PRINTF(DBG_UNZIP,
  383. // (" start = %lu, end = %lu\n"
  384. //"firstChunk = %d, firstOffset = %d,"
  385. //" lastChunk = %d, lastOffset = %d\n",
  386. //start, end, firstChunk, firstOffset, lastChunk, lastOffset ));
  387. for (pt = buffer, i = firstChunk; i <= lastChunk; i++)
  388. {
  389. /* Access cache */
  390. found = 0;
  391. target = 0;
  392. lastStamp = INT_MAX;
  393. for (j = 0; j < DICT_CACHE_SIZE; j++)
  394. {
  395. #if USE_CACHE
  396. if (this->cache[j].chunk == i)
  397. {
  398. found = 1;
  399. target = j;
  400. break;
  401. }
  402. #endif
  403. if (this->cache[j].stamp < lastStamp)
  404. {
  405. lastStamp = this->cache[j].stamp;
  406. target = j;
  407. }
  408. }
  409. this->cache[target].stamp = ++stamp;
  410. if (found)
  411. {
  412. count = this->cache[target].count;
  413. inBuffer = this->cache[target].inBuffer;
  414. }
  415. else
  416. {
  417. this->cache[target].chunk = i;
  418. if (!this->cache[target].inBuffer)
  419. this->cache[target].inBuffer = (char *)malloc( IN_BUFFER_SIZE );
  420. inBuffer = this->cache[target].inBuffer;
  421. if (this->chunks[i] >= OUT_BUFFER_SIZE )
  422. {
  423. //err_internal( __FUNCTION__,
  424. // "this->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n",
  425. // i, this->chunks[i], OUT_BUFFER_SIZE );
  426. }
  427. memcpy( outBuffer, this->start + this->offsets[i], this->chunks[i] );
  428. this->zStream.next_in = (Bytef *)outBuffer;
  429. this->zStream.avail_in = this->chunks[i];
  430. this->zStream.next_out = (Bytef *)inBuffer;
  431. this->zStream.avail_out = IN_BUFFER_SIZE;
  432. if (inflate( &this->zStream, Z_PARTIAL_FLUSH ) != Z_OK)
  433. {
  434. //err_fatal( __FUNCTION__, "inflate: %s\n", this->zStream.msg );
  435. }
  436. if (this->zStream.avail_in)
  437. {
  438. //err_internal( __FUNCTION__,
  439. // "inflate did not flush (%d pending, %d avail)\n",
  440. // this->zStream.avail_in, this->zStream.avail_out );
  441. }
  442. count = IN_BUFFER_SIZE - this->zStream.avail_out;
  443. this->cache[target].count = count;
  444. }
  445. if (i == firstChunk)
  446. {
  447. if (i == lastChunk)
  448. {
  449. memcpy( pt, inBuffer + firstOffset, lastOffset - firstOffset);
  450. pt += lastOffset - firstOffset;
  451. }
  452. else
  453. {
  454. if (count != this->chunkLength )
  455. {
  456. //err_internal( __FUNCTION__,
  457. // "Length = %d instead of %d\n",
  458. //count, this->chunkLength );
  459. }
  460. memcpy( pt, inBuffer + firstOffset,
  461. this->chunkLength - firstOffset );
  462. pt += this->chunkLength - firstOffset;
  463. }
  464. }
  465. else if (i == lastChunk)
  466. {
  467. memcpy( pt, inBuffer, lastOffset );
  468. pt += lastOffset;
  469. }
  470. else
  471. {
  472. assert( count == this->chunkLength );
  473. memcpy( pt, inBuffer, this->chunkLength );
  474. pt += this->chunkLength;
  475. }
  476. }
  477. //*pt = '\0';
  478. break;
  479. case DICT_UNKNOWN:
  480. //err_fatal( __FUNCTION__, "Cannot read unknown file type\n" );
  481. break;
  482. }
  483. }