jdhuff.c 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573
  1. /*
  2. * jdhuff.c
  3. *
  4. * Copyright (C) 1991-1996, Thomas G. Lane.
  5. * This file is part of the Independent JPEG Group's software.
  6. * For conditions of distribution and use, see the accompanying README file.
  7. *
  8. * This file contains Huffman entropy decoding routines.
  9. *
  10. * Much of the complexity here has to do with supporting input suspension.
  11. * If the data source module demands suspension, we want to be able to back
  12. * up to the start of the current MCU. To do this, we copy state variables
  13. * into local working storage, and update them back to the permanent
  14. * storage only upon successful completion of an MCU.
  15. */
  16. #define JPEG_INTERNALS
  17. #include "jinclude.h"
  18. #include "jpeglib.h"
  19. #include "jdhuff.h" /* Declarations shared with jdphuff.c */
  20. /*
  21. * Expanded entropy decoder object for Huffman decoding.
  22. *
  23. * The savable_state subrecord contains fields that change within an MCU,
  24. * but must not be updated permanently until we complete the MCU.
  25. */
  26. typedef struct {
  27. int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
  28. } savable_state;
  29. /* This macro is to work around compilers with missing or broken
  30. * structure assignment. You'll need to fix this code if you have
  31. * such a compiler and you change MAX_COMPS_IN_SCAN.
  32. */
  33. #ifndef NO_STRUCT_ASSIGN
  34. #define ASSIGN_STATE(dest,src) ((dest) = (src))
  35. #else
  36. #if MAX_COMPS_IN_SCAN == 4
  37. #define ASSIGN_STATE(dest,src) \
  38. ((dest).last_dc_val[0] = (src).last_dc_val[0], \
  39. (dest).last_dc_val[1] = (src).last_dc_val[1], \
  40. (dest).last_dc_val[2] = (src).last_dc_val[2], \
  41. (dest).last_dc_val[3] = (src).last_dc_val[3])
  42. #endif
  43. #endif
  44. typedef struct {
  45. struct jpeg_entropy_decoder pub; /* public fields */
  46. /* These fields are loaded into local variables at start of each MCU.
  47. * In case of suspension, we exit WITHOUT updating them.
  48. */
  49. bitread_perm_state bitstate; /* Bit buffer at start of MCU */
  50. savable_state saved; /* Other state at start of MCU */
  51. /* These fields are NOT loaded into local working state. */
  52. unsigned int restarts_to_go; /* MCUs left in this restart interval */
  53. /* Pointers to derived tables (these workspaces have image lifespan) */
  54. d_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
  55. d_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS];
  56. } huff_entropy_decoder;
  57. typedef huff_entropy_decoder * huff_entropy_ptr;
  58. /*
  59. * Initialize for a Huffman-compressed scan.
  60. */
  61. METHODDEF(void)
  62. start_pass_huff_decoder (j_decompress_ptr cinfo)
  63. {
  64. huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
  65. int ci, dctbl, actbl;
  66. jpeg_component_info * compptr;
  67. /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
  68. * This ought to be an error condition, but we make it a warning because
  69. * there are some baseline files out there with all zeroes in these bytes.
  70. */
  71. if (cinfo->Ss != 0 || cinfo->Se != DCTSIZE2-1 ||
  72. cinfo->Ah != 0 || cinfo->Al != 0)
  73. WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
  74. for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
  75. compptr = cinfo->cur_comp_info[ci];
  76. dctbl = compptr->dc_tbl_no;
  77. actbl = compptr->ac_tbl_no;
  78. /* Make sure requested tables are present */
  79. if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS ||
  80. cinfo->dc_huff_tbl_ptrs[dctbl] == NULL)
  81. ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl);
  82. if (actbl < 0 || actbl >= NUM_HUFF_TBLS ||
  83. cinfo->ac_huff_tbl_ptrs[actbl] == NULL)
  84. ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl);
  85. /* Compute derived values for Huffman tables */
  86. /* We may do this more than once for a table, but it's not expensive */
  87. jpeg_make_d_derived_tbl(cinfo, cinfo->dc_huff_tbl_ptrs[dctbl],
  88. & entropy->dc_derived_tbls[dctbl]);
  89. jpeg_make_d_derived_tbl(cinfo, cinfo->ac_huff_tbl_ptrs[actbl],
  90. & entropy->ac_derived_tbls[actbl]);
  91. /* Initialize DC predictions to 0 */
  92. entropy->saved.last_dc_val[ci] = 0;
  93. }
  94. /* Initialize bitread state variables */
  95. entropy->bitstate.bits_left = 0;
  96. entropy->bitstate.get_buffer_64 = 0;
  97. entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */
  98. entropy->bitstate.printed_eod = FALSE;
  99. /* Initialize restart counter */
  100. entropy->restarts_to_go = cinfo->restart_interval;
  101. }
  102. /*
  103. * Compute the derived values for a Huffman table.
  104. * Note this is also used by jdphuff.c.
  105. */
  106. GLOBAL(void)
  107. jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, JHUFF_TBL * htbl,
  108. d_derived_tbl ** pdtbl)
  109. {
  110. d_derived_tbl *dtbl;
  111. int p, i, l, si;
  112. int lookbits, ctr;
  113. char huffsize[257];
  114. unsigned int huffcode[257];
  115. unsigned int code;
  116. /* Allocate a workspace if we haven't already done so. */
  117. if (*pdtbl == NULL)
  118. *pdtbl = (d_derived_tbl *)
  119. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  120. SIZEOF(d_derived_tbl));
  121. dtbl = *pdtbl;
  122. dtbl->pub = htbl; /* fill in back link */
  123. /* Figure C.1: make table of Huffman code length for each symbol */
  124. /* Note that this is in code-length order. */
  125. p = 0;
  126. for (l = 1; l <= 16; l++) {
  127. for (i = 1; i <= (int) htbl->bits[l]; i++)
  128. huffsize[p++] = (char) l;
  129. }
  130. huffsize[p] = 0;
  131. /* Figure C.2: generate the codes themselves */
  132. /* Note that this is in code-length order. */
  133. code = 0;
  134. si = huffsize[0];
  135. p = 0;
  136. while (huffsize[p]) {
  137. while (((int) huffsize[p]) == si) {
  138. huffcode[p++] = code;
  139. code++;
  140. }
  141. code <<= 1;
  142. si++;
  143. }
  144. /* Figure F.15: generate decoding tables for bit-sequential decoding */
  145. p = 0;
  146. for (l = 1; l <= 16; l++) {
  147. if (htbl->bits[l]) {
  148. dtbl->valptr[l] = p; /* huffval[] index of 1st symbol of code length l */
  149. dtbl->mincode[l] = huffcode[p]; /* minimum code of length l */
  150. p += htbl->bits[l];
  151. dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */
  152. } else {
  153. dtbl->maxcode[l] = -1; /* -1 if no codes of this length */
  154. }
  155. }
  156. dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates */
  157. /* Compute lookahead tables to speed up decoding.
  158. * First we set all the table entries to 0, indicating "too long";
  159. * then we iterate through the Huffman codes that are short enough and
  160. * fill in all the entries that correspond to bit sequences starting
  161. * with that code.
  162. */
  163. MEMZERO(dtbl->look_nbits, SIZEOF(dtbl->look_nbits));
  164. p = 0;
  165. for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
  166. for (i = 1; i <= (int) htbl->bits[l]; i++, p++) {
  167. /* l = current code's length, p = its index in huffcode[] & huffval[]. */
  168. /* Generate left-justified code followed by all possible bit sequences */
  169. lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
  170. for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) {
  171. dtbl->look_nbits[lookbits] = l;
  172. dtbl->look_sym[lookbits] = htbl->huffval[p];
  173. lookbits++;
  174. }
  175. }
  176. }
  177. }
  178. /*
  179. * Out-of-line code for bit fetching (shared with jdphuff.c).
  180. * See jdhuff.h for info about usage.
  181. * Note: current values of get_buffer and bits_left are passed as parameters,
  182. * but are returned in the corresponding fields of the state struct.
  183. *
  184. * On most machines MIN_GET_BITS should be 25 to allow the full 32-bit width
  185. * of get_buffer to be used. (On machines with wider words, an even larger
  186. * buffer could be used.) However, on some machines 32-bit shifts are
  187. * quite slow and take time proportional to the number of places shifted.
  188. * (This is true with most PC compilers, for instance.) In this case it may
  189. * be a win to set MIN_GET_BITS to the minimum value of 15. This reduces the
  190. * average shift distance at the cost of more calls to jpeg_fill_bit_buffer.
  191. */
  192. #ifdef SLOW_SHIFT_32
  193. #define MIN_GET_BITS 15 /* minimum allowable value */
  194. #else
  195. #define MIN_GET_BITS (BIT_BUF_SIZE-7)
  196. #endif
  197. // not used in MMX version
  198. GLOBAL(boolean)
  199. jpeg_fill_bit_buffer (bitread_working_state * state,
  200. register bit_buf_type get_buffer, register int bits_left,
  201. int nbits)
  202. /* Load up the bit buffer to a depth of at least nbits */
  203. {
  204. /* Copy heavily used state fields into locals (hopefully registers) */
  205. register const JOCTET * next_input_byte = state->next_input_byte;
  206. register size_t bytes_in_buffer = state->bytes_in_buffer;
  207. register int c;
  208. /* Attempt to load at least MIN_GET_BITS bits into get_buffer. */
  209. /* (It is assumed that no request will be for more than that many bits.) */
  210. while (bits_left < MIN_GET_BITS) {
  211. /* Attempt to read a byte */
  212. if (state->unread_marker != 0)
  213. goto no_more_data; /* can't advance past a marker */
  214. if (bytes_in_buffer == 0) {
  215. if (! (*state->cinfo->src->fill_input_buffer) (state->cinfo))
  216. return FALSE;
  217. next_input_byte = state->cinfo->src->next_input_byte;
  218. bytes_in_buffer = state->cinfo->src->bytes_in_buffer;
  219. }
  220. bytes_in_buffer--;
  221. c = GETJOCTET(*next_input_byte++);
  222. /* If it's 0xFF, check and discard stuffed zero byte */
  223. if (c == 0xFF)
  224. {
  225. do
  226. {
  227. if (bytes_in_buffer == 0)
  228. {
  229. if (! (*state->cinfo->src->fill_input_buffer) (state->cinfo))
  230. return FALSE;
  231. next_input_byte = state->cinfo->src->next_input_byte;
  232. bytes_in_buffer = state->cinfo->src->bytes_in_buffer;
  233. }
  234. bytes_in_buffer--;
  235. c = GETJOCTET(*next_input_byte++);
  236. } while (c == 0xFF);
  237. if (c == 0)
  238. {
  239. // Found FF/00, which represents an FF data byte
  240. c = 0xFF;
  241. }
  242. else
  243. {
  244. // Oops, it's actually a marker indicating end of compressed data.
  245. // Better put it back for use later
  246. state->unread_marker = c;
  247. no_more_data:
  248. // There should be enough bits still left in the data segment;
  249. // if so, just break out of the outer while loop.
  250. if (bits_left >= nbits)
  251. break;
  252. /* Uh-oh. Report corrupted data to user and stuff zeroes into
  253. * the data stream, so that we can produce some kind of image.
  254. * Note that this code will be repeated for each byte demanded
  255. * for the rest of the segment. We use a nonvolatile flag to ensure
  256. * that only one warning message appears.
  257. */
  258. if (! *(state->printed_eod_ptr))
  259. {
  260. WARNMS(state->cinfo, JWRN_HIT_MARKER);
  261. *(state->printed_eod_ptr) = TRUE;
  262. }
  263. c = 0; // insert a zero byte into bit buffer
  264. }
  265. }
  266. /* OK, load c into get_buffer */
  267. get_buffer = (get_buffer << 8) | c;
  268. bits_left += 8;
  269. }
  270. /* Unload the local registers */
  271. state->next_input_byte = next_input_byte;
  272. state->bytes_in_buffer = bytes_in_buffer;
  273. state->get_buffer = get_buffer;
  274. state->bits_left = bits_left;
  275. return TRUE;
  276. }
  277. /*
  278. * Out-of-line code for Huffman code decoding.
  279. * See jdhuff.h for info about usage.
  280. */
  281. GLOBAL(int)
  282. jpeg_huff_decode (bitread_working_state * state,
  283. register bit_buf_type get_buffer, register int bits_left,
  284. d_derived_tbl * htbl, int min_bits)
  285. {
  286. register int l = min_bits;
  287. register INT32 code;
  288. /* HUFF_DECODE has determined that the code is at least min_bits */
  289. /* bits long, so fetch that many bits in one swoop. */
  290. CHECK_BIT_BUFFER(*state, l, return -1);
  291. code = GET_BITS(l);
  292. /* Collect the rest of the Huffman code one bit at a time. */
  293. /* This is per Figure F.16 in the JPEG spec. */
  294. while (code > htbl->maxcode[l]) {
  295. code <<= 1;
  296. CHECK_BIT_BUFFER(*state, 1, return -1);
  297. code |= GET_BITS(1);
  298. l++;
  299. }
  300. /* Unload the local registers */
  301. state->get_buffer = get_buffer;
  302. state->bits_left = bits_left;
  303. /* With garbage input we may reach the sentinel value l = 17. */
  304. if (l > 16) {
  305. WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE);
  306. return 0; /* fake a zero as the safest result */
  307. }
  308. return htbl->pub->huffval[ htbl->valptr[l] +
  309. ((int) (code - htbl->mincode[l])) ];
  310. }
  311. /*
  312. * Figure F.12: extend sign bit.
  313. * On some machines, a shift and add will be faster than a table lookup.
  314. */
  315. #ifdef AVOID_TABLES
  316. #define HUFF_EXTEND(x,s) ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x))
  317. #else
  318. #define HUFF_EXTEND(x,s) ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
  319. static const int extend_test[16] = /* entry n is 2**(n-1) */
  320. { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
  321. 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
  322. static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */
  323. { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1,
  324. ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1,
  325. ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1,
  326. ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 };
  327. #endif /* AVOID_TABLES */
  328. /*
  329. * Check for a restart marker & resynchronize decoder.
  330. * Returns FALSE if must suspend.
  331. */
  332. LOCAL(boolean)
  333. process_restart (j_decompress_ptr cinfo)
  334. {
  335. huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
  336. int ci;
  337. /* Throw away any unused bits remaining in bit buffer; */
  338. /* include any full bytes in next_marker's count of discarded bytes */
  339. cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
  340. entropy->bitstate.bits_left = 0;
  341. /* Advance past the RSTn marker */
  342. if (! (*cinfo->marker->read_restart_marker) (cinfo))
  343. return FALSE;
  344. /* Re-initialize DC predictions to 0 */
  345. for (ci = 0; ci < cinfo->comps_in_scan; ci++)
  346. entropy->saved.last_dc_val[ci] = 0;
  347. /* Reset restart counter */
  348. entropy->restarts_to_go = cinfo->restart_interval;
  349. /* Next segment can get another out-of-data warning */
  350. entropy->bitstate.printed_eod = FALSE;
  351. return TRUE;
  352. }
  353. /*
  354. * Decode and return one MCU's worth of Huffman-compressed coefficients.
  355. * The coefficients are reordered from zigzag order into natural array order,
  356. * but are not dequantized.
  357. *
  358. * The i'th block of the MCU is stored into the block pointed to by
  359. * MCU_data[i]. WE ASSUME THIS AREA HAS BEEN ZEROED BY THE CALLER.
  360. * (Wholesale zeroing is usually a little faster than retail...)
  361. *
  362. * Returns FALSE if data source requested suspension. In that case no
  363. * changes have been made to permanent state. (Exception: some output
  364. * coefficients may already have been assigned. This is harmless for
  365. * this module, since we'll just re-assign them on the next call.)
  366. */
  367. METHODDEF(boolean)
  368. __cdecl decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
  369. {
  370. huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
  371. register int s, k, r;
  372. int blkn, ci;
  373. JBLOCKROW block;
  374. BITREAD_STATE_VARS;
  375. savable_state state;
  376. d_derived_tbl * dctbl;
  377. d_derived_tbl * actbl;
  378. jpeg_component_info * compptr;
  379. /* Process restart marker if needed; may have to suspend */
  380. if (cinfo->restart_interval) {
  381. if (entropy->restarts_to_go == 0)
  382. if (! process_restart(cinfo))
  383. return FALSE;
  384. }
  385. /* Load up working state */
  386. BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
  387. ASSIGN_STATE(state, entropy->saved);
  388. /* Outer loop handles each block in the MCU */
  389. for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
  390. block = MCU_data[blkn];
  391. ci = cinfo->MCU_membership[blkn];
  392. compptr = cinfo->cur_comp_info[ci];
  393. dctbl = entropy->dc_derived_tbls[compptr->dc_tbl_no];
  394. actbl = entropy->ac_derived_tbls[compptr->ac_tbl_no];
  395. /* Decode a single block's worth of coefficients */
  396. /* Section F.2.2.1: decode the DC coefficient difference */
  397. HUFF_DECODE(s, br_state, dctbl, return FALSE, label1);
  398. if (s) {
  399. CHECK_BIT_BUFFER(br_state, s, return FALSE);
  400. r = GET_BITS(s);
  401. s = HUFF_EXTEND(r, s);
  402. }
  403. /* Shortcut if component's values are not interesting */
  404. if (! compptr->component_needed)
  405. goto skip_ACs;
  406. /* Convert DC difference to actual value, update last_dc_val */
  407. s += state.last_dc_val[ci];
  408. state.last_dc_val[ci] = s;
  409. /* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */
  410. (*block)[0] = (JCOEF) s;
  411. /* Do we need to decode the AC coefficients for this component? */
  412. if (compptr->DCT_scaled_size > 1) {
  413. /* Section F.2.2.2: decode the AC coefficients */
  414. /* Since zeroes are skipped, output area must be cleared beforehand */
  415. for (k = 1; k < DCTSIZE2; k++) {
  416. HUFF_DECODE(s, br_state, actbl, return FALSE, label2);
  417. r = s >> 4;
  418. s &= 15;
  419. if (s) {
  420. k += r;
  421. CHECK_BIT_BUFFER(br_state, s, return FALSE);
  422. r = GET_BITS(s);
  423. s = HUFF_EXTEND(r, s);
  424. /* Output coefficient in natural (dezigzagged) order.
  425. * Note: the extra entries in jpeg_natural_order[] will save us
  426. * if k >= DCTSIZE2, which could happen if the data is corrupted.
  427. */
  428. (*block)[jpeg_natural_order[k]] = (JCOEF) s;
  429. } else {
  430. if (r != 15)
  431. break;
  432. k += 15;
  433. }
  434. }
  435. } else {
  436. skip_ACs:
  437. /* Section F.2.2.2: decode the AC coefficients */
  438. /* In this path we just discard the values */
  439. for (k = 1; k < DCTSIZE2; k++) {
  440. HUFF_DECODE(s, br_state, actbl, return FALSE, label3);
  441. r = s >> 4;
  442. s &= 15;
  443. if (s) {
  444. k += r;
  445. CHECK_BIT_BUFFER(br_state, s, return FALSE);
  446. DROP_BITS(s);
  447. } else {
  448. if (r != 15)
  449. break;
  450. k += 15;
  451. }
  452. }
  453. }
  454. }
  455. /* Completed MCU, so update state */
  456. BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
  457. ASSIGN_STATE(entropy->saved, state);
  458. /* Account for restart interval (no-op if not using restarts) */
  459. entropy->restarts_to_go--;
  460. return TRUE;
  461. }
  462. //MMX routines
  463. //new Typedefs necessary for the new decode_mcu_fast to work.
  464. typedef struct jpeg_source_mgr * j_csrc_ptr;
  465. //typedef struct jpeg_err_mgr * j_cerr_ptr;
  466. typedef struct jpeg_error_mgr * j_cerr_ptr;
  467. typedef d_derived_tbl * h_pub_ptr;
  468. /*
  469. * Decode and return one MCU's worth of Huffman-compressed coefficients.
  470. * The coefficients are reordered from zigzag order into natural array order,
  471. * but are not dequantized.
  472. *
  473. * The i'th block of the MCU is stored into the block pointed to by
  474. * MCU_data[i]. WE ASSUME THIS AREA HAS BEEN ZEROED BY THE CALLER.
  475. * (Wholesale zeroing is usually a little faster than retail...)
  476. *
  477. * Returns FALSE if data source requested suspension. In that case no
  478. * changes have been made to permanent state. (Exception: some output
  479. * coefficients may already have been assigned. This is harmless for
  480. * this module, since we'll just re-assign them on the next call.)
  481. */
  482. const int twoexpnminusone[13] = { 0, 1, 2, 4, 8,16,32,64,128,256,512,1024,2048};
  483. const int oneminustwoexpn[13] = { 0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047};
  484. #ifdef _X86_
  485. METHODDEF(boolean)
  486. __cdecl decode_mcu_fast (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
  487. {
  488. // return decode_mcu_inner(cinfo,MCU_data);
  489. //***************************************************************************/
  490. //*
  491. //* INTEL Corporation Proprietary Information
  492. //*
  493. //*
  494. //* Copyright (c) 1996 Intel Corporation.
  495. //* All rights reserved.
  496. //*
  497. //***************************************************************************/
  498. // AUTHOR: Mark Buxton
  499. /***************************************************************************/
  500. // MMX version of the "Huffman Decoder" within the IJG decompressor code.
  501. // // MMX Allocation:
  502. //-------------------------------------------------------------
  503. //// XXXX XXXX | XXXX XXXX
  504. //
  505. // MM0: ------------
  506. // MM1: bit_buffer
  507. // MM2: temp buffer
  508. // MM3: temp buffer
  509. // MM4: 0000 0000 0000 0040
  510. // MM5: ------------ dctbl
  511. // MM6: ------------ actbl
  512. // MM7: ------------ temp_buffer
  513. //
  514. //
  515. // edi - bits left in the Bit Buffer
  516. // //routines to modify: jpeg_huff_decode_fast
  517. // // fill_bit_buffer
  518. //
  519. //
  520. //
  521. // Other available storage locations:
  522. //
  523. // ebp - state
  524. //data declaration:
  525. unsigned char blkn;
  526. unsigned char nbits;
  527. JBLOCKROW block;
  528. huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
  529. jpeg_component_info * compptr;
  530. bitread_working_state br_state;
  531. savable_state state;
  532. d_derived_tbl * dctbl;
  533. d_derived_tbl * actbl;
  534. d_derived_tbl * htbl;
  535. int ci,temp1;
  536. int code;
  537. int min_bits;
  538. __asm {
  539. // // Process restart marker if needed// may have to suspend
  540. // if (cinfo->restart_interval) {
  541. mov eax,dword ptr [cinfo]
  542. cmp (j_decompress_ptr [eax]).restart_interval,1
  543. jne Skip_Restart
  544. //if (entropy->restarts_to_go == 0)
  545. mov eax,dword ptr [entropy]
  546. cmp (dword ptr [eax]).restarts_to_go,0
  547. jne Skip_Restart
  548. //if (! process_restart(cinfo))
  549. mov eax,dword ptr [cinfo]
  550. push eax
  551. call process_restart
  552. add esp,4
  553. test eax,eax
  554. jne Skip_Restart
  555. jmp Return_Fail
  556. Skip_Restart:
  557. // // Load up working state
  558. // br_state.cinfo = cinfop//
  559. // br_state.next_input_byte = cinfop->src->next_input_byte//
  560. // br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer//
  561. // br_state.unread_marker = cinfop->unread_marker//
  562. // get_buffer = entropy->bitstate.get_buffer//
  563. // bits_left = entropy->bitstate.bits_left//
  564. // br_state.printed_eod_ptr = & entropy->bitstate.printed_eod
  565. mov eax,dword ptr [cinfo]
  566. mov dword ptr [br_state.cinfo],eax
  567. mov ebx,(j_decompress_ptr [eax]).unread_marker
  568. mov dword ptr [br_state.unread_marker],ebx
  569. mov eax,(j_decompress_ptr [eax]).src
  570. mov ebx,(j_csrc_ptr [eax]).next_input_byte
  571. mov dword ptr [br_state.next_input_byte],ebx
  572. mov ebx,(j_csrc_ptr [eax]).bytes_in_buffer
  573. mov dword ptr [br_state.bytes_in_buffer],ebx
  574. //pxor mm0,mm0
  575. mov eax,dword ptr[entropy]
  576. movq mm1,(qword ptr [eax]).bitstate.get_buffer_64
  577. mov edi,(dword ptr [eax]).bitstate.bits_left
  578. lea eax,dword ptr[eax].bitstate.printed_eod
  579. mov dword ptr [br_state.printed_eod_ptr],eax
  580. mov ebx,dword ptr [entropy]
  581. xor eax,eax
  582. mov eax,(dword ptr [ebx]).saved.last_dc_val[0x00]
  583. mov dword ptr [state.last_dc_val+0x00],eax
  584. mov eax,(dword ptr [ebx]).saved.last_dc_val[0x04]
  585. mov dword ptr [state.last_dc_val+0x04],eax
  586. mov eax,(dword ptr [ebx]).saved.last_dc_val[0x08]
  587. mov dword ptr [state.last_dc_val+0x08],eax
  588. mov eax,(dword ptr [ebx]).saved.last_dc_val[0x0C]
  589. mov dword ptr [state.last_dc_val+0x0c],eax
  590. //make sure all variables are initalized.
  591. //see map in header for register usage
  592. // // Outer loop handles each block in the MCU
  593. //the address of each block is just MCU_data + blkn<<7 (this is MCU_data * 128, right?)
  594. //ci = cinfo->MCU_membership[blkn];
  595. //compptr = cinfo->cur_comp_info[ci];
  596. //dctbl = entropy->dc_derived_tbls[compptr->dc_tbl_no];
  597. //actbl = entropy->ac_derived_tbls[compptr->ac_tbl_no];
  598. mov byte ptr [blkn],0
  599. pxor mm5,mm5
  600. pxor mm6,mm6
  601. pxor mm2,mm2
  602. pxor mm3,mm3
  603. pxor mm4,mm4
  604. mov eax,0x40
  605. movd mm4,eax
  606. }
  607. One_Block_Loop:
  608. block = MCU_data[blkn];
  609. ci = cinfo->MCU_membership[blkn];
  610. compptr = cinfo->cur_comp_info[ci];
  611. actbl = entropy->ac_derived_tbls[compptr->ac_tbl_no];
  612. dctbl = entropy->dc_derived_tbls[compptr->dc_tbl_no];
  613. __asm
  614. {
  615. movd mm5,[dctbl]
  616. movd mm6,[actbl]
  617. //// Decode a single block's worth of coefficients
  618. //// Section F.2.2.1: decode the DC coefficient difference
  619. //---------------------------------------------------------------------------------
  620. //DC loop section: there are probably only ~6 to process.
  621. //---------------------------------------------------------------------------------
  622. //set up the MMX registers:
  623. //move the dctbl pointer into MM6
  624. //pxor mm6,mm6
  625. //movd mm6,dword ptr [dctbl]
  626. //movd eax,mm0
  627. cmp edi,8
  628. jl Get_n_bits_DC
  629. //normal path
  630. //take a peek at the data in get_buffer.
  631. Got_n_bits_DC:
  632. movq mm3,mm1 //copy the Bit-Buffer
  633. psrlq mm1,56 //Extract the MS 8 bits from the Bit Buffer
  634. movd eax,mm5 //load the DC table pointer
  635. movd ecx,mm1 //lsb holds the 8 input bits
  636. movq mm1,mm3
  637. mov ebx,(dword ptr[eax+4*ecx]).look_nbits
  638. /*get the number of bits required to represent
  639. this Huffman Code (n) . If the code is > 8 bits,
  640. the table entry is Zero*/
  641. test ebx,ebx
  642. je Nineplus_Decode_DC//branch taken 3% of the time. If code > 8 bits,
  643. //get it via a slower metho
  644. movd mm2,ebx
  645. sub edi,ebx //invalidate n bits from the Bit counter
  646. xor ebx,ebx
  647. psllq mm1,mm2 //invalidate n bits from the Bit Buffer
  648. mov bl,(byte ptr[eax+ecx]).look_sym //read in the Run Lenth Code (rrrr|ssss); though for the DC coefct's rrrr=0000
  649. Got_SymbolDC: //return point from the slow Huffman decoder routine (for code length > 8 bits)
  650. cmp edi,ebx //
  651. jl not_enough_bits_DC //If Not enough bits left in the Bit Buffer, Get More
  652. Got_enough_bits_DC:
  653. pxor mm2,mm2
  654. sub edi,ebx //invalidate ssss bits from the Bit counter
  655. movd mm2,ebx
  656. movq mm3,mm4 //copy #64 into mm3
  657. psubd mm3,mm2 //now mm3 has 64-ssss
  658. movq mm0,mm1 //save a copy of the Bit Buffer
  659. psrlq mm0,mm3 //shift result right
  660. nop
  661. psllq mm1,mm2 //Invalidate ssss bits from the Bit Buffer
  662. movd ecx,mm0
  663. mov eax,(dword ptr[twoexpnminusone+4*ebx]) //load 2^(ssss-1)
  664. cmp ecx,eax //
  665. jge positiv_symDC // If # < 2^(ssss-1), then # = #+(1-2^ssss)
  666. add ecx,(dword ptr [oneminustwoexpn+4*ebx]) //
  667. nop /****************************************/
  668. positiv_symDC:
  669. mov eax,dword ptr [compptr] //If !(compptr->compoent_needed), skip AC and DC coefts
  670. mov edx,1 //initalize loop counter for AC coef't loop
  671. cmp (dword ptr [eax]).component_needed,0
  672. je skip_ACs
  673. //don't skip the AC coefficients.
  674. mov eax,[ci]
  675. mov ebx,[block] //(*block)[0] = (JCOEF) s//
  676. add ecx,(dword ptr[state.last_dc_val+eax*4]) //s += state.last_dc_val[ci]//
  677. pxor mm7,mm7 //cleared for AC_coefficient calculations
  678. mov (dword ptr[state.last_dc_val+eax*4]),ecx //state.last_dc_val[ci] = s//
  679. mov word ptr[ebx],cx //store in (*block)
  680. mov eax,[compptr]
  681. cmp (dword ptr[eax]).DCT_scaled_size,1 //if (compptr->DCT_scaled_size > 1) {
  682. jle skip_ACs
  683. // Section F.2.2.2: decode the AC coefficients
  684. // Since zeroes are skipped, output area must be cleared beforehand
  685. //---------------------------------------------------------------------------------
  686. //AC loop section: Active case.
  687. //---------------------------------------------------------------------------------
  688. Get_AC_DCT_loop:
  689. cmp edi,8
  690. jl Get_8_bits_ac
  691. //take a peek at the data in get_buffer.
  692. Full_8_bits_AC:
  693. movq mm3,mm1 //copy Bit Buffer
  694. psrlq mm1,56 //load msb from the Bit Buffer
  695. movd ecx,mm6 //load AC Huffman Table Pointer
  696. movd eax,mm1 //copy into integer reg. for address calculation
  697. movq mm1,mm3
  698. mov ebx,(dword ptr[ecx+4*eax]).look_nbits //If Huffman symbol is contained within 8 bits fetched,
  699. //return the actual length of the sequence. If zero, len>8 bits
  700. test ebx,ebx
  701. je Nineplus_decode_AC
  702. sub edi,ebx //invalidate n bits from Bit Counter
  703. movd mm2,ebx
  704. psllq mm1,mm2 //invalidate n bits from Bit Buffer
  705. xor ebx,ebx
  706. mov bl,(byte ptr[eax+ecx]).look_sym //load the Huffman Run Length code (rrrr|ssss) for this symbol
  707. Got_SymbolAC: //return point from the slow Huffman routine
  708. mov eax,ebx
  709. shr eax,4 //highest nibble is run-length of zeroes (rrrr)
  710. add edx,eax //increment AC coefft counter by the # of zeroes. Assume array is zeroed originally
  711. and ebx,0x000F //isolate the lowest nibble, the bit-length of the actual coeff't (ssss)
  712. jz Special_SymbolAC //a zero for the symbol bit-length indicates it is a special symbol. Ex: 0xF0, 0x00
  713. //test to see if # available bits from bit_buffer are less than required to fill the Huffman symbol
  714. //if insufficient bits, load new bit_buffer through fill_bit_buffer
  715. cmp edi,ebx //ssss in ebx
  716. jl Get_n_bits_ac
  717. Got_n_bits_AC:
  718. sub edi,ebx //invalidate ssss bits from the Bit counter
  719. movd mm2,ebx
  720. movq mm3,mm4 //copy #64 into mm3
  721. psubd mm3,mm2 //now mm3 has 64-ssss
  722. movq mm0,mm1 //save a copy of the Bit Buffer
  723. psllq mm1,mm2 //Invalidate ssss bits from the Bit Buffer
  724. psrlq mm0,mm3 //shift result right
  725. mov eax,(dword ptr[twoexpnminusone+4*ebx]) //load 2^(ssss-1)
  726. movd ecx,mm0
  727. cmp ecx,eax //
  728. //
  729. jge positiv_symAC // If # < 2^(ssss-1), then # = #+(1-2^ssss)
  730. add ecx,(dword ptr [oneminustwoexpn+4*ebx]) //
  731. positiv_symAC:
  732. //don't modify mm3. It has the actual AC-DCT coefficient.
  733. // Output coefficient in natural (dezigzagged) order.
  734. // Note: the extra entries in jpeg_natural_order[] will save us
  735. // if the AC coefct index >= DCTSIZE2 (64), which could happen if the data is corrupted.
  736. mov eax, dword ptr(jpeg_natural_order[4*edx]) //(*block)[jpeg_natural_order[k]]=s;
  737. mov ebx, dword ptr [block]
  738. mov word ptr([ebx+2*eax]),cx
  739. ContinueAC:
  740. inc edx //Ac coefct index ++
  741. cmp edx,64 //While (index) < 64
  742. jl Get_AC_DCT_loop //imples we are doing the loop 63 times (DC was the first, for 64 total COEFF"s)
  743. Continue_Next_Block_AC:
  744. inc byte ptr[blkn] //process the next Coeff. block
  745. xor eax,eax
  746. mov al,byte ptr[blkn]
  747. mov edx,dword ptr[cinfo]
  748. cmp eax,(j_decompress_ptr [edx]).blocks_in_MCU //While [blkn]<= Max number of blocks in MCU:
  749. jge COMPLETED_MCU
  750. jmp One_Block_Loop
  751. /***************************************************************************************/
  752. /* DC helper Code */
  753. /***************************************************************************************/
  754. Get_n_bits_DC: xor ebx,ebx//pass nbits in the eax register
  755. call fill_bit_buffer
  756. //if zero, it was probably suspended. Therefore suspend the whole DECODE_MCU
  757. test eax,eax
  758. je Return_Fail
  759. cmp edi,8
  760. jge Got_n_bits_DC //probable and predicted path is up.
  761. mov ebx,1
  762. jmp Slow_Decode_DC
  763. not_enough_bits_DC:
  764. call fill_bit_buffer
  765. xor ebx,ebx
  766. mov bl,byte ptr[nbits]
  767. test eax,eax
  768. jne Got_enough_bits_DC
  769. jmp Return_Fail
  770. Nineplus_Decode_DC:
  771. mov ebx,9
  772. Slow_Decode_DC: //aka slow_label. This is the _slow_ huff_decode.
  773. mov eax,[dctbl]
  774. mov [htbl],eax
  775. call jpeg_huff_decode_fast //assume ebx holds nbits
  776. test eax,eax
  777. jl Return_Fail
  778. mov ebx,eax
  779. jmp Got_SymbolDC
  780. /***************************************************************************************/
  781. /* AC helper Code */
  782. /***************************************************************************************/
  783. Special_SymbolAC:
  784. cmp al,0x0F
  785. jne Continue_Next_Block_AC
  786. jmp ContinueAC
  787. Get_n_bits_ac:
  788. call fill_bit_buffer
  789. xor ebx,ebx
  790. mov bl,byte ptr[nbits]
  791. test eax,eax
  792. jne Got_n_bits_AC
  793. jmp Return_Fail
  794. Get_8_bits_ac:
  795. call fill_bit_buffer
  796. test eax,eax
  797. je Return_Fail
  798. cmp edi,8
  799. jge Full_8_bits_AC //probable and predicted path is up.
  800. mov ebx,1
  801. jmp Slow_decode_AC
  802. Nineplus_decode_AC:
  803. mov ebx,9
  804. Slow_decode_AC: //The slow Huffman Decode. Used when the code length is > 8 bits
  805. mov eax,[actbl]
  806. mov [htbl],eax
  807. call jpeg_huff_decode_fast //assume ebx holds nbits
  808. test eax,eax
  809. jl Return_Fail
  810. mov ebx,eax
  811. jmp Got_SymbolAC
  812. //Failure, return from the routine
  813. Return_Fail: //do not modify any permanent registers
  814. emms
  815. }
  816. return FALSE;
  817. __asm {
  818. //} else {
  819. //---------------------------------------------------------------------------------
  820. //AC loop section: Ignore case.
  821. //---------------------------------------------------------------------------------
  822. skip_ACs:
  823. // Section F.2.2.2: decode the AC coefficients
  824. // In this path we just discard the values
  825. Ignore_AC_DCT_loop:
  826. cmp edi,8
  827. jl Get_8_bits_acs
  828. //take a peek at the data in get_buffer.
  829. Full_8_bits_ACs:
  830. movq mm3,mm1 //copy Bit Buffer
  831. psrlq mm1,56 //load msb from the Bit Buffer
  832. movd ecx,mm6 //load AC Huffman Table Pointer
  833. movd eax,mm1 //copy into integer reg. for address calculation
  834. movq mm1,mm3
  835. mov ebx,(dword ptr[ecx+4*eax]).look_nbits //If Huffman symbol is contained within 8 bits fetched,
  836. //return the actual length of the sequence. If zero, len>8 bits
  837. test ebx,ebx
  838. je Nineplus_Decode_ACs //If symbol > 8 bits, fetch the slow way. Called 3% of the time
  839. sub edi,ebx //invalidate n bits from Bit Counter
  840. movd mm2,ebx
  841. psllq mm1,mm2 //invalidate n bits from Bit Buffer
  842. xor ebx,ebx
  843. mov bl,(byte ptr[eax+ecx]).look_sym //load the Huffman Run Length code (rrrr|ssss) for this symbol
  844. Got_SymbolACs: //return point from the slow Huffman routine
  845. mov eax,ebx
  846. shr eax,4 //highest nibble is run-length of zeroes (rrrr)
  847. add edx,eax //increment AC coefft counter by the # of zeroes. Assume array is zeroed originally
  848. and ebx,0x000F //isolate the lowest nibble, the bit-length of the actual coeff't (ssss)
  849. jz Special_SymbolACs //a zero for the symbol bit-length indicates it is a special symbol. Ex: 0xF0, 0x00
  850. //test to see if # available bits from bit_buffer are less than required to fill the Huffman symbol
  851. //if insufficient bits, load new bit_buffer through fill_bit_buffer
  852. cmp edi,ebx //ssss in ebx
  853. jl Get_n_bits_acs
  854. Got_n_bits_acs:
  855. sub edi,ebx //invalidate ssss bits from the Bit counter
  856. movd mm2,ebx
  857. psllq mm1,mm2 //Invalidate ssss bits from the Bit Buffer
  858. Continue_ACs:
  859. inc edx //Ac coefct index ++
  860. cmp edx,64 //While (index) < 64
  861. jl Ignore_AC_DCT_loop //imples we are doing the loop 63 times (DC was the first, for 64 total COEFF"s)
  862. jmp Continue_Next_Block_AC
  863. /***************************************************************************************/
  864. /* Skipped AC helper Code */
  865. /***************************************************************************************/
  866. Special_SymbolACs:
  867. cmp al,0x0F
  868. jne Continue_Next_Block_AC
  869. jmp Continue_ACs
  870. Get_8_bits_acs:
  871. call fill_bit_buffer
  872. test eax,eax
  873. je Return_Fail
  874. cmp edi,8
  875. jge Full_8_bits_ACs //probable and predicted path is up.
  876. mov ebx,1
  877. jmp Slow_Decode_ACs
  878. Get_n_bits_acs:
  879. call fill_bit_buffer
  880. xor ebx,ebx
  881. mov bl,byte ptr[nbits]
  882. test eax,eax
  883. jne Got_n_bits_acs
  884. jmp Return_Fail
  885. Nineplus_Decode_ACs:
  886. mov ebx,9
  887. Slow_Decode_ACs: //The slow Huffman Decode. Used when the code length is > 8 bits
  888. mov eax,[actbl]
  889. mov [htbl],eax
  890. call jpeg_huff_decode_fast //assume ebx holds nbits
  891. test eax,eax
  892. jl Return_Fail
  893. mov ebx,eax
  894. jmp Got_SymbolACs
  895. //} else {
  896. COMPLETED_MCU:
  897. // Completed MCU, so update state
  898. //BITREAD_SAVE_STATE(cinfo,entropy->bitstate)//
  899. //#define BITREAD_SAVE_STATE(cinfop,permstate)
  900. // cinfo->src->next_input_byte = br_state.next_input_byte
  901. // cinfo->src->bytes_in_buffer = br_state.bytes_in_buffer
  902. // cinfo->unread_marker = br_state.unread_marker
  903. // entropy->bitstate.get_buffer_64 = mm1
  904. // entropy->bitstate.bits_left = mm0
  905. mov eax,dword ptr [br_state.unread_marker]
  906. mov ebx,dword ptr [cinfo]
  907. mov (j_decompress_ptr [ebx]).unread_marker,eax
  908. mov eax,dword ptr [br_state.next_input_byte]
  909. mov ebx,(j_decompress_ptr [ebx]).src
  910. mov (j_csrc_ptr [ebx]).next_input_byte,eax
  911. mov eax,dword ptr [br_state.bytes_in_buffer]
  912. mov (j_csrc_ptr [ebx]).bytes_in_buffer,eax
  913. mov eax,dword ptr [entropy]
  914. movq (qword ptr [eax]).bitstate.get_buffer_64,mm1
  915. mov (dword ptr [eax]).bitstate.bits_left,edi
  916. mov ebx,dword ptr [entropy]
  917. mov eax,dword ptr [state.last_dc_val+0x00]
  918. mov (dword ptr [ebx]).saved[0x00],eax
  919. mov eax,dword ptr [state.last_dc_val+0x04]
  920. mov (dword ptr [ebx]).saved[0x04],eax
  921. mov eax,dword ptr [state.last_dc_val+0x08]
  922. mov (dword ptr [ebx]).saved[0x08],eax
  923. mov eax,dword ptr [state.last_dc_val+0x0C]
  924. mov (dword ptr [ebx]).saved[0x0C],eax
  925. // Account for restart interval (no-op if not using restarts)
  926. emms
  927. }
  928. entropy->restarts_to_go--;
  929. return TRUE;
  930. //----------------------------------------------------------------------
  931. /***************************************************************************
  932. fill_bit_buffer:
  933. Assembly procedure to decode Huffman coefficients longer than 8 bits.
  934. Also called near the end of a data segment.
  935. Input Parameters
  936. al: minimum number of bits to get
  937. various MMX registers and local variables must be defined; see
  938. _decode_one_mcu_inner above
  939. This code is called very frequently
  940. ****************************************************************************/
  941. __asm {
  942. fill_bit_buffer:
  943. //use ecx to store bytes_in_buffer
  944. //use ebx to store next_input_byte
  945. //edi to store Bit Buffer length
  946. //---------------------------------------------Main Looop----------
  947. mov dword ptr [temp1],edx
  948. mov byte ptr[nbits],bl //number of bits to get
  949. //format the bit buffer: shift to the right by
  950. //64-nbits
  951. movd mm0,edi
  952. movq mm7,mm4
  953. mov ecx,dword ptr[br_state.bytes_in_buffer]
  954. psubd mm7,mm0
  955. psrlq mm1,mm7
  956. mov ebx,dword ptr[br_state.next_input_byte]
  957. //mov eax,8
  958. //movd mm4,eax
  959. // Attempt to read a byte */
  960. cmp [br_state.unread_marker],0
  961. jne no_more_data
  962. test ecx,ecx
  963. je call_load_more_bytes
  964. //determine if there are enough bytes in the i/o buffer
  965. continue_reading:
  966. //decrement bytes_in_buffer//
  967. dec ecx
  968. js call_load_more_bytes
  969. //load new data
  970. xor eax,eax
  971. mov al,byte ptr[ebx]
  972. //update next_input_byte pointer
  973. inc ebx
  974. cmp eax,0xFF //compare ebx to FF
  975. je got_FF
  976. stuff_byte:
  977. psllq mm1,8
  978. movd mm7,eax
  979. add edi,8
  980. por mm1,mm7
  981. //determine if we've read enough bytes
  982. cmp edi,56
  983. jle continue_reading
  984. done_loading:
  985. //were done loading data.
  986. //stuff values for bytes_in_buffer, next_input_byte
  987. mov [br_state.next_input_byte],ebx
  988. mov [br_state.bytes_in_buffer],ecx
  989. //finish formatting the bit_register
  990. movd mm7,edi
  991. movq mm0,mm4
  992. psubd mm0,mm7
  993. mov eax,0xFF
  994. psllq mm1,mm0
  995. mov edx, dword ptr [temp1]
  996. ret
  997. call_load_more_bytes:
  998. call load_more_bytes
  999. jmp continue_reading
  1000. //---------------------------------------End Main Loop-----------
  1001. got_FF:
  1002. //test to see if there are enough bytes in input_buffer
  1003. test ecx,ecx
  1004. jne continue_reading_2
  1005. call load_more_bytes
  1006. continue_reading_2:
  1007. //decrement bytes_in_buffer//
  1008. dec ecx
  1009. //load new data
  1010. xor eax,eax
  1011. mov al,[ebx]
  1012. //update next_input_byte pointer
  1013. inc ebx //do this twice?
  1014. cmp eax,0xff
  1015. je got_FF
  1016. test eax,eax
  1017. jne eod_marker
  1018. mov eax,0xFF
  1019. jmp stuff_byte //stuff an 'FF'
  1020. eod_marker: //byte was an end-of-data marker
  1021. mov [br_state.unread_marker],eax
  1022. //if we have enough bits in the input buffer to cover the required bits, ok.
  1023. //otherwise, warn the sytem about corrupt data.
  1024. no_more_data:
  1025. xor eax, eax
  1026. //movd ebx,mm0 //dshade
  1027. //cmp bl,[nbits]
  1028. //jl corrupt_data
  1029. //ok, have enough data,
  1030. jmp stuff_byte_corrupt
  1031. //corrupt_data:
  1032. //this junk is the WARNMS macro
  1033. mov eax,dword ptr [br_state.printed_eod_ptr]
  1034. cmp dword ptr [eax],0x00
  1035. jne continue_corrupt
  1036. mov eax,dword ptr [cinfo]
  1037. mov eax,(j_decompress_ptr [eax]).err //the err struct is the first memer of state->cinfo
  1038. mov (j_cerr_ptr [eax]).msg_code,JWRN_HIT_MARKER
  1039. push 0xffffffff
  1040. mov eax,dword ptr [cinfo]
  1041. push eax
  1042. mov eax,dword ptr[cinfo] //the err struct is the first member of state->cinfo
  1043. mov eax,(j_decompress_ptr [eax]).err
  1044. call (j_cerr_ptr [eax]).emit_message
  1045. //call dword ptr[eax]
  1046. add esp,8
  1047. mov eax, dword ptr[br_state.printed_eod_ptr]
  1048. mov dword ptr [eax],1
  1049. continue_corrupt:
  1050. xor eax,eax
  1051. jmp stuff_byte_corrupt
  1052. stuff_byte_corrupt:
  1053. psllq mm1,8
  1054. movd mm7,eax
  1055. add edi,8
  1056. por mm1,mm7
  1057. //determine if we've read enough bytes
  1058. cmp edi,56
  1059. jle stuff_byte_corrupt
  1060. jmp done_loading
  1061. load_more_bytes:
  1062. movd mm0,edi
  1063. mov [br_state.next_input_byte],ebx
  1064. mov eax,[br_state.cinfo]
  1065. push eax
  1066. //mov eax,[br_state.cinfo]
  1067. mov eax,(j_decompress_ptr[eax]).src
  1068. //movd mm0,edi
  1069. call (j_csrc_ptr [eax]).fill_input_buffer
  1070. add esp,4
  1071. //eax has the return value. If zero, bomb out
  1072. test eax,eax
  1073. je return_4
  1074. //update next_input_byte and bytes_in_buffer.
  1075. mov eax,[br_state.cinfo]
  1076. mov eax,(j_decompress_ptr[eax]).src
  1077. mov ebx,(j_csrc_ptr [eax]).next_input_byte;
  1078. mov ecx,(j_csrc_ptr [eax]).bytes_in_buffer;
  1079. movd edi,mm0
  1080. mov edx,dword ptr[temp1]
  1081. ret
  1082. return_4:
  1083. mov eax,0x40
  1084. movd mm4,eax
  1085. mov eax,0
  1086. mov edx,dword ptr[temp1]
  1087. emms
  1088. ret
  1089. //End fill_bit_buffer--------------------------------------------------
  1090. //--------------------------------------------------------------------------
  1091. //--------------------------------------------------------------------------
  1092. /***************************************************************************
  1093. Jpeg_huff_decode_fast.
  1094. Assembly procedure to decode Huffman coefficients longer than 8 bits.
  1095. Also called near the end of a data segment.
  1096. Input Parameters
  1097. eax: minimum number of bits for the next huffman code.
  1098. various MMX registers and local variables must be defined; see
  1099. _decode_one_mcu_inner above
  1100. This code is infrequently called
  1101. ****************************************************************************/
  1102. jpeg_huff_decode_fast:
  1103. /* HUFF_DECODE has determined that the code is at least min_bits */
  1104. /* bits long, so fetch that many bits in one swoop. */
  1105. push edx
  1106. mov [min_bits],ebx
  1107. cmp edi,ebx
  1108. jl Fill_Input_Buffer
  1109. Filled_Up:
  1110. sub edi,ebx
  1111. movq mm3,mm4
  1112. movd mm7,ebx
  1113. movq mm2,mm1
  1114. psubd mm3,mm7
  1115. psllq mm1,mm7
  1116. psrlq mm2,mm3
  1117. movd ecx,mm2
  1118. Continue_Tedious_1:
  1119. //now mm7 holds the most recent code
  1120. /* Collect the rest of the Huffman code one bit at a time. */
  1121. /* This is per Figure F.16 in the JPEG spec. */
  1122. mov eax,dword ptr [min_bits]
  1123. mov edx,dword ptr [htbl]
  1124. //mov ecx,dword ptr [code]
  1125. mov ebx,dword ptr [edx+eax*4].maxcode
  1126. cmp ebx,ecx
  1127. jge Continue_Tedious_2b
  1128. //while (code > htbl->maxcode[min_bits]) {
  1129. //movd eax,mm0
  1130. cmp edi,1
  1131. jl Fill_Input_Buffer_2
  1132. Filled_Up_2:
  1133. dec edi
  1134. movq mm3,mm1
  1135. psrlq mm3,63
  1136. movd mm7,ecx
  1137. psllq mm1,1
  1138. psllq mm7,1
  1139. inc [min_bits]
  1140. por mm7,mm3
  1141. movd ecx,mm7
  1142. jmp Continue_Tedious_1
  1143. Fill_Input_Buffer:
  1144. //al should hold the number of valid bits;
  1145. //mov eax,ebx
  1146. call fill_bit_buffer
  1147. //if it returned a zero, exit with a -1.
  1148. test eax,eax
  1149. je Suspend_Label
  1150. //we were able to fill it with (some) data.
  1151. //jump back to the continuation of this loop:
  1152. xor ebx,ebx
  1153. mov ebx,[min_bits]
  1154. jmp Filled_Up
  1155. Fill_Input_Buffer_2:
  1156. mov ebx,1
  1157. mov [code],ecx
  1158. call fill_bit_buffer
  1159. //if it returned a zero, exit with a -1.
  1160. test eax,eax
  1161. je Suspend_Label
  1162. //we were able to fill it with (some) data.
  1163. //jump back to the continuation of this loop:
  1164. mov ecx,[code]
  1165. jmp Filled_Up_2
  1166. Continue_Tedious_2b:
  1167. push edi
  1168. /* With garbage input we may reach the sentinel value l = 17. */
  1169. }
  1170. if (min_bits > 16) {
  1171. WARNMS(br_state.cinfo, JWRN_HUFF_BAD_CODE);
  1172. __asm {
  1173. pop edi
  1174. xor eax,eax
  1175. pop edx
  1176. ret
  1177. }
  1178. }
  1179. /*code= htbl->pub->huffval[ htbl->valptr[min_bits] +
  1180. ((int) (code - htbl->mincode[min_bits])) ];*/
  1181. __asm{
  1182. pop edi
  1183. mov eax,dword ptr [min_bits]
  1184. mov ebx,dword ptr [htbl]
  1185. sub ecx,(dword ptr [ebx+eax*4]).mincode
  1186. add ecx,(dword ptr [ebx+eax*4]).valptr
  1187. mov ebx,(h_pub_ptr [ebx]).pub
  1188. xor eax,eax
  1189. mov al,(byte ptr [ecx+ebx]).huffval
  1190. pop edx
  1191. ret
  1192. Suspend_Label:
  1193. mov eax,1
  1194. pop edx
  1195. ret
  1196. }
  1197. }
  1198. #endif
  1199. //End jpeg_huff_decode_fast-------------------------------------------------
  1200. //--------------------------------------------------------------------------
  1201. //--------------------------------------------------------------------------
  1202. /*
  1203. * Module initialization routine for Huffman entropy decoding.
  1204. */
  1205. GLOBAL(void)
  1206. jinit_huff_decoder (j_decompress_ptr cinfo)
  1207. {
  1208. huff_entropy_ptr entropy;
  1209. int i;
  1210. entropy = (huff_entropy_ptr)
  1211. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  1212. SIZEOF(huff_entropy_decoder));
  1213. cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
  1214. entropy->pub.start_pass = start_pass_huff_decoder;
  1215. #if 0
  1216. //#ifdef _X86_
  1217. if (vfMMXMachine)
  1218. {
  1219. entropy->pub.decode_mcu = decode_mcu_fast;
  1220. }
  1221. else
  1222. {
  1223. entropy->pub.decode_mcu = decode_mcu;
  1224. }
  1225. #else
  1226. entropy->pub.decode_mcu = decode_mcu;
  1227. #endif
  1228. /* Mark tables unallocated */
  1229. for (i = 0; i < NUM_HUFF_TBLS; i++) {
  1230. entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
  1231. }
  1232. }