jdcolor.c 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410
  1. /*
  2. * jdcolor.c
  3. *
  4. * Copyright (C) 1991-1996, Thomas G. Lane.
  5. * This file is part of the Independent JPEG Group's software.
  6. * For conditions of distribution and use, see the accompanying README file.
  7. *
  8. * This file contains output colorspace conversion routines.
  9. */
  10. #define JPEG_INTERNALS
  11. #include "jinclude.h"
  12. #include "jpeglib.h"
  13. #ifdef NIFTY
  14. #include <math.h>
  15. #define SCALE_PREC 5
  16. #define SCALE_RND (1 << (SCALE_PREC - 1))
  17. #define SCALE (1 << SCALE_PREC)
  18. #define unscale(x) (((x) + SCALE_RND) >> SCALE_PREC)
  19. #define clip(x) (((long)(x) & ~0xff) ? (((long)(x) < 0) ? 0 : 255) : (long)(x))
  20. #endif
  21. /* Private subobject */
  22. typedef struct {
  23. struct jpeg_color_deconverter pub; /* public fields */
  24. /* Private state for YCC->RGB conversion */
  25. int * Cr_r_tab; /* => table for Cr to R conversion */
  26. int * Cb_b_tab; /* => table for Cb to B conversion */
  27. INT32 * Cr_g_tab; /* => table for Cr to G conversion */
  28. INT32 * Cb_g_tab; /* => table for Cb to G conversion */
  29. #ifdef NIFTY
  30. /* Private state for the PhotoYCC->RGB conversion tables */
  31. coef_c1 *C1;
  32. coef_c2 *C2;
  33. short *xy;
  34. #endif
  35. } my_color_deconverter;
  36. /* Added header info - CRK */
  37. extern void MYCbCr2RGB(
  38. int columns,
  39. unsigned char *inY,
  40. unsigned char *inU,
  41. unsigned char *inV,
  42. unsigned char *outRGB);
  43. extern void MYCbCrA2RGBA(
  44. int columns,
  45. unsigned char *inY,
  46. unsigned char *inU,
  47. unsigned char *inV,
  48. unsigned char *inA,
  49. unsigned char *outRGBA);
  50. extern void MYCbCrA2RGBALegacy(
  51. int columns,
  52. unsigned char *inY,
  53. unsigned char *inU,
  54. unsigned char *inV,
  55. unsigned char *inA,
  56. unsigned char *outRGBA);
  57. // These constants correspond to CCIR 601-1
  58. // R = [256*Y + 359*(Cr-128)] / 256
  59. // G = [256*Y - 88*(Cb-128) - 183*(Cr-128)] / 256
  60. // B = [256*Y + 454*(Cb-128)] / 256
  61. //Conventional floating point equations:
  62. // R = Y + 1.40200 * Cr
  63. // G = Y - 0.34414 * Cb - 0.71414 * Cr
  64. // B = Y + 1.77200 * Cb
  65. //Ry=0100 Ru=0000 Rv=0167
  66. //Gy=0100 Gu=FFA8 Gv=FF49
  67. //By=0100 Bu=01C6 Bv=0000
  68. // constants for YCbCr->RGB and YCbCrA->RGBA
  69. static __int64 const_0 = 0x0000000000000000;
  70. static __int64 const_sub128 = 0x0080008000800080;
  71. static __int64 const_VUmul = 0xFF49FFA8FF49FFA8;
  72. static __int64 const_YVmul = 0x0100016701000167;
  73. static __int64 const_YUmul = 0x010001C6010001C6;
  74. static __int64 mask_highd = 0xFFFFFFFF00000000;
  75. static __int64 const_invert = 0x00FFFFFF00FFFFFF;
  76. //These constants correspond to the original FPX SDK
  77. // R = [256*Y + 410*(Cr-128)] / 256
  78. // G = [256*Y - 85*(Cb-128) - 205*(Cr-128)] / 256
  79. // B = [256*Y + 512*(Cb-128)] / 256
  80. //Conventional floating point equations:
  81. // R = Y + 1.60000*(Cr)
  82. // G = Y - 0.33333*(Cb) - 0.80000*(Cr)
  83. // B = Y + 2.00000*(Cb)
  84. //Ry=0100 Ru=0000 Rv=019A
  85. //Gy=0100 Gu=FFAB Gv=FF33
  86. //By=0100 Bu=0200 Bv=0000
  87. // constants for YCbCr->RGB and YCbCrA->RGBA
  88. //const __int64 const_0 = 0x0000000000000000;
  89. //const __int64 const_sub128= 0x0080008000800080;
  90. //const __int64 const_VUmul = 0xFF33FFABFF33FFAB;
  91. //const __int64 const_YVmul = 0x0100019A0100019A;
  92. //const __int64 const_YUmul = 0x0001000200010002;
  93. //const __int64 mask_highd = 0xFFFFFFFF00000000;
  94. //const __int64 const_invert= 0x00FFFFFF00FFFFFF;
  95. /* End of added info - CRK */
  96. typedef my_color_deconverter * my_cconvert_ptr;
  97. #ifdef NIFTY
  98. /*
  99. * Initialize tables for PhotoYCC->RGB colorspace conversion.
  100. */
  101. LOCAL (void)
  102. build_pycc_rgb_table (j_decompress_ptr cinfo)
  103. {
  104. my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
  105. INT32 i;
  106. cconvert->C1 = (coef_c1 *)
  107. (*cinfo->mem->alloc_small)((j_common_ptr) cinfo, JPOOL_IMAGE,
  108. 256 * SIZEOF(coef_c1));
  109. cconvert->C2 = (coef_c2 *)
  110. (*cinfo->mem->alloc_small)((j_common_ptr) cinfo, JPOOL_IMAGE,
  111. 256 * SIZEOF(coef_c2));
  112. cconvert->xy = (short *)
  113. (*cinfo->mem->alloc_small)((j_common_ptr) cinfo, JPOOL_IMAGE,
  114. 256 * SIZEOF(short));
  115. for (i = 0; i < 256; i++) {
  116. cconvert->xy[i] = (short)((double)i * 1.3584 * SCALE);
  117. cconvert->C2[i].r = (short)(i * 1.8215 * SCALE);
  118. cconvert->C1[i].g = (short)(i * -0.4303 * SCALE);
  119. cconvert->C2[i].g = (short)(i * -0.9271 * SCALE);
  120. cconvert->C1[i].b = (short)(i * 2.2179 * SCALE);
  121. }
  122. }
  123. /*
  124. * PhotoYCC->RGB colorspace conversion.
  125. */
  126. METHODDEF (void)
  127. pycc_rgb_convert (j_decompress_ptr cinfo,
  128. JSAMPIMAGE input_buf, JDIMENSION input_row,
  129. JSAMPARRAY output_buf, int num_rows)
  130. {
  131. my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
  132. register JSAMPROW inptr0, inptr1, inptr2;
  133. register JSAMPROW outptr;
  134. register JDIMENSION col;
  135. JDIMENSION num_cols = cinfo->output_width;
  136. unsigned char y, c1, c2;
  137. short ri, gi, bi,
  138. offsetR, offsetG, offsetB;
  139. register short *xy = cconvert->xy;
  140. register coef_c1 *C1 = cconvert->C1;
  141. register coef_c2 *C2 = cconvert->C2;
  142. /*
  143. for (i = 0; i < 256; i++) {
  144. xy[i] = (short)((double)i * 1.3584 * SCALE);
  145. C2[i].r = (short)(i * 1.8215 * SCALE);
  146. C1[i].g = (short)(i * -0.4303 * SCALE);
  147. C2[i].g = (short)(i * -0.9271 * SCALE);
  148. C1[i].b = (short)(i * 2.2179 * SCALE);
  149. }
  150. */
  151. offsetR = (short)(-249.55 * SCALE);
  152. offsetG = (short)( 194.14 * SCALE);
  153. offsetB = (short)(-345.99 * SCALE);
  154. while (--num_rows >= 0) {
  155. inptr0 = input_buf[0][input_row];
  156. inptr1 = input_buf[1][input_row];
  157. inptr2 = input_buf[2][input_row];
  158. input_row++;
  159. outptr = *output_buf++;
  160. for (col = 0; col < num_cols; col++) {
  161. y = GETJSAMPLE(inptr0[col]);
  162. c1 = GETJSAMPLE(inptr1[col]);
  163. c2 = GETJSAMPLE(inptr2[col]);
  164. ri = xy[y] + C2[c2].r + offsetR;
  165. gi = xy[y] + C1[c1].g + C2[c2].g + offsetG;
  166. bi = xy[y] + C1[c1].b + offsetB;
  167. ri = unscale(ri);
  168. gi = unscale(gi);
  169. bi = unscale(bi);
  170. outptr[RGB_RED] = (JSAMPLE)clip(ri);
  171. outptr[RGB_GREEN] = (JSAMPLE)clip(gi);
  172. outptr[RGB_BLUE] = (JSAMPLE)clip(bi);
  173. outptr+=3;
  174. }
  175. }
  176. }
  177. /*
  178. * PhotoYCC->RGBA colorspace conversion.
  179. */
  180. METHODDEF (void)
  181. pycc_rgba_convert (j_decompress_ptr cinfo,
  182. JSAMPIMAGE input_buf, JDIMENSION input_row,
  183. JSAMPARRAY output_buf, int num_rows)
  184. {
  185. my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
  186. register JSAMPROW inptr0, inptr1, inptr2;
  187. register JSAMPROW outptr;
  188. register JDIMENSION col;
  189. JDIMENSION num_cols = cinfo->output_width;
  190. unsigned char y, c1, c2;
  191. short ri, gi, bi,
  192. offsetR, offsetG, offsetB;
  193. register short *xy = cconvert->xy;
  194. register coef_c1 *C1 = cconvert->C1;
  195. register coef_c2 *C2 = cconvert->C2;
  196. offsetR = (short)(-249.55 * SCALE);
  197. offsetG = (short)( 194.14 * SCALE);
  198. offsetB = (short)(-345.99 * SCALE);
  199. while (--num_rows >= 0) {
  200. inptr0 = input_buf[0][input_row];
  201. inptr1 = input_buf[1][input_row];
  202. inptr2 = input_buf[2][input_row];
  203. input_row++;
  204. outptr = *output_buf++;
  205. for (col = 0; col < num_cols; col++) {
  206. y = GETJSAMPLE(inptr0[col]);
  207. c1 = GETJSAMPLE(inptr1[col]);
  208. c2 = GETJSAMPLE(inptr2[col]);
  209. ri = xy[y] + C2[c2].r + offsetR;
  210. gi = xy[y] + C1[c1].g + C2[c2].g + offsetG;
  211. bi = xy[y] + C1[c1].b + offsetB;
  212. ri = unscale(ri);
  213. gi = unscale(gi);
  214. bi = unscale(bi);
  215. outptr[RGB_RED] = (JSAMPLE)clip(ri);
  216. outptr[RGB_GREEN] = (JSAMPLE)clip(gi);
  217. outptr[RGB_BLUE] = (JSAMPLE)clip(bi);
  218. outptr[3] = 255;
  219. outptr+=4;
  220. }
  221. }
  222. }
  223. #endif
  224. /**************** YCbCr -> RGB conversion: most common case **************/
  225. /*
  226. * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
  227. * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
  228. * The conversion equations to be implemented are therefore
  229. * R = Y + 1.40200 * Cr
  230. * G = Y - 0.34414 * Cb - 0.71414 * Cr
  231. * B = Y + 1.77200 * Cb
  232. * where Cb and Cr represent the incoming values less CENTERJSAMPLE.
  233. * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
  234. *
  235. * To avoid floating-point arithmetic, we represent the fractional constants
  236. * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
  237. * the products by 2^16, with appropriate rounding, to get the correct answer.
  238. * Notice that Y, being an integral input, does not contribute any fraction
  239. * so it need not participate in the rounding.
  240. *
  241. * For even more speed, we avoid doing any multiplications in the inner loop
  242. * by precalculating the constants times Cb and Cr for all possible values.
  243. * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
  244. * for 12-bit samples it is still acceptable. It's not very reasonable for
  245. * 16-bit samples, but if you want lossless storage you shouldn't be changing
  246. * colorspace anyway.
  247. * The Cr=>R and Cb=>B values can be rounded to integers in advance; the
  248. * values for the G calculation are left scaled up, since we must add them
  249. * together before rounding.
  250. */
  251. #define SCALEBITS 16 /* speediest right-shift on some machines */
  252. #define ONE_HALF ((INT32) 1 << (SCALEBITS-1))
  253. #define FIX(x) ((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
  254. /*
  255. * Initialize tables for YCC->RGB colorspace conversion.
  256. */
  257. LOCAL(void)
  258. build_ycc_rgb_table (j_decompress_ptr cinfo)
  259. {
  260. my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
  261. int i;
  262. INT32 x;
  263. SHIFT_TEMPS
  264. cconvert->Cr_r_tab = (int *)
  265. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  266. (MAXJSAMPLE+1) * SIZEOF(int));
  267. cconvert->Cb_b_tab = (int *)
  268. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  269. (MAXJSAMPLE+1) * SIZEOF(int));
  270. cconvert->Cr_g_tab = (INT32 *)
  271. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  272. (MAXJSAMPLE+1) * SIZEOF(INT32));
  273. cconvert->Cb_g_tab = (INT32 *)
  274. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  275. (MAXJSAMPLE+1) * SIZEOF(INT32));
  276. for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
  277. /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
  278. /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
  279. /* Cr=>R value is nearest int to 1.40200 * x */
  280. cconvert->Cr_r_tab[i] = (int)
  281. RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
  282. /* Cb=>B value is nearest int to 1.77200 * x */
  283. cconvert->Cb_b_tab[i] = (int)
  284. RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
  285. /* Cr=>G value is scaled-up -0.71414 * x */
  286. cconvert->Cr_g_tab[i] = (- FIX(0.71414)) * x;
  287. /* Cb=>G value is scaled-up -0.34414 * x */
  288. /* We also add in ONE_HALF so that need not do it in inner loop */
  289. cconvert->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF;
  290. }
  291. }
  292. /*
  293. * Convert some rows of samples to the output colorspace.
  294. *
  295. * Note that we change from noninterleaved, one-plane-per-component format
  296. * to interleaved-pixel format. The output buffer is therefore three times
  297. * as wide as the input buffer.
  298. * A starting row offset is provided only for the input buffer. The caller
  299. * can easily adjust the passed output_buf value to accommodate any row
  300. * offset required on that side.
  301. */
  302. METHODDEF(void)
  303. ycc_rgb_convert (j_decompress_ptr cinfo,
  304. JSAMPIMAGE input_buf, JDIMENSION input_row,
  305. JSAMPARRAY output_buf, int num_rows)
  306. {
  307. my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
  308. register int y, cb, cr;
  309. register JSAMPROW outptr;
  310. register JSAMPROW inptr0, inptr1, inptr2;
  311. register JDIMENSION col;
  312. JDIMENSION num_cols = cinfo->output_width;
  313. // Alignment variables - CRK
  314. JDIMENSION tail_cols = num_cols&7;
  315. JDIMENSION mmx_cols=num_cols&~7;
  316. /* copy these pointers into registers if possible */
  317. register JSAMPLE * range_limit = cinfo->sample_range_limit;
  318. register int * Crrtab = cconvert->Cr_r_tab;
  319. register int * Cbbtab = cconvert->Cb_b_tab;
  320. register INT32 * Crgtab = cconvert->Cr_g_tab;
  321. register INT32 * Cbgtab = cconvert->Cb_g_tab;
  322. SHIFT_TEMPS
  323. #ifdef _X86_
  324. if(vfMMXMachine) { //MMX Code - CRK
  325. while (--num_rows >= 0) {
  326. inptr0 = input_buf[0][input_row];
  327. inptr1 = input_buf[1][input_row];
  328. inptr2 = input_buf[2][input_row];
  329. input_row++;
  330. outptr = *output_buf++;
  331. MYCbCr2RGB(mmx_cols, inptr0, inptr1, inptr2, outptr);
  332. outptr += 3*mmx_cols;
  333. for (col = mmx_cols; col < num_cols; col++) {
  334. y = GETJSAMPLE(inptr0[col]);
  335. cb = GETJSAMPLE(inptr1[col]);
  336. cr = GETJSAMPLE(inptr2[col]);
  337. /* Range-limiting is essential due to noise introduced by DCT losses. */
  338. outptr[RGB_RED] = range_limit[y + Crrtab[cr]];
  339. outptr[RGB_GREEN] = range_limit[y +
  340. ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
  341. SCALEBITS))];
  342. outptr[RGB_BLUE] = range_limit[y + Cbbtab[cb]];
  343. outptr += RGB_PIXELSIZE;
  344. }
  345. }
  346. __asm emms
  347. }
  348. #else
  349. if (0) { }
  350. #endif
  351. else {
  352. while (--num_rows >= 0) {
  353. inptr0 = input_buf[0][input_row];
  354. inptr1 = input_buf[1][input_row];
  355. inptr2 = input_buf[2][input_row];
  356. input_row++;
  357. outptr = *output_buf++;
  358. for (col = 0; col < num_cols; col++) {
  359. y = GETJSAMPLE(inptr0[col]);
  360. cb = GETJSAMPLE(inptr1[col]);
  361. cr = GETJSAMPLE(inptr2[col]);
  362. /* Range-limiting is essential due to noise introduced by DCT losses. */
  363. outptr[RGB_RED] = range_limit[y + Crrtab[cr]];
  364. outptr[RGB_GREEN] = range_limit[y +
  365. ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
  366. SCALEBITS))];
  367. outptr[RGB_BLUE] = range_limit[y + Cbbtab[cb]];
  368. outptr += RGB_PIXELSIZE;
  369. }
  370. }
  371. }
  372. }
  373. /**************** Cases other than YCbCr -> RGB **************/
  374. /*
  375. * Color conversion for grayscale->RGB:
  376. * Single input value for Y gets copied into RGB.
  377. * Need to do this here so that the color quantizing will work.
  378. */
  379. METHODDEF(void)
  380. grayscale_RGB_convert (j_decompress_ptr cinfo,
  381. JSAMPIMAGE input_buf, JDIMENSION input_row,
  382. JSAMPARRAY output_buf, int num_rows)
  383. {
  384. register int y;
  385. register JSAMPROW outptr;
  386. register JSAMPROW inptr0;
  387. register JDIMENSION col;
  388. JDIMENSION num_cols = cinfo->output_width;
  389. // Alignment variables - CRK
  390. /* copy these pointers into registers if possible */
  391. while (--num_rows >= 0)
  392. {
  393. inptr0 = input_buf[0][input_row];
  394. input_row++;
  395. outptr = *output_buf++;
  396. for (col = num_cols; col--;)
  397. {
  398. y = *inptr0++;
  399. outptr[RGB_RED] = y;
  400. outptr[RGB_GREEN] = y;
  401. outptr[RGB_BLUE] = y;
  402. outptr += RGB_PIXELSIZE;
  403. }
  404. }
  405. }
  406. /*
  407. * Color conversion for no colorspace change: just copy the data,
  408. * converting from separate-planes to interleaved representation.
  409. */
  410. METHODDEF(void)
  411. null_convert (j_decompress_ptr cinfo,
  412. JSAMPIMAGE input_buf, JDIMENSION input_row,
  413. JSAMPARRAY output_buf, int num_rows)
  414. {
  415. register JSAMPROW inptr, outptr;
  416. register JDIMENSION count;
  417. register int num_components = cinfo->num_components;
  418. JDIMENSION num_cols = cinfo->output_width;
  419. int ci;
  420. while (--num_rows >= 0) {
  421. for (ci = 0; ci < num_components; ci++) {
  422. inptr = input_buf[ci][input_row];
  423. outptr = output_buf[0] + ci;
  424. for (count = num_cols; count > 0; count--) {
  425. *outptr = *inptr++; /* needn't bother with GETJSAMPLE() here */
  426. outptr += num_components;
  427. }
  428. }
  429. input_row++;
  430. output_buf++;
  431. }
  432. }
  433. /*
  434. * Color conversion for grayscale: just copy the data.
  435. * This also works for YCbCr -> grayscale conversion, in which
  436. * we just copy the Y (luminance) component and ignore chrominance.
  437. */
  438. METHODDEF(void)
  439. grayscale_convert (j_decompress_ptr cinfo,
  440. JSAMPIMAGE input_buf, JDIMENSION input_row,
  441. JSAMPARRAY output_buf, int num_rows)
  442. {
  443. jcopy_sample_rows(input_buf[0], (int) input_row, output_buf, 0,
  444. num_rows, cinfo->output_width);
  445. }
  446. #ifdef NIFTY
  447. //Not really a colour conversion but special one for Picture It!
  448. //Copies 3 channel data and adds an alpha
  449. METHODDEF(void)
  450. rgb_rgba_convert (j_decompress_ptr cinfo,
  451. JSAMPIMAGE input_buf, JDIMENSION input_row,
  452. JSAMPARRAY output_buf, int num_rows)
  453. {
  454. my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
  455. register JSAMPROW outptr;
  456. register JSAMPROW inptr0, inptr1, inptr2;
  457. register JDIMENSION col;
  458. JDIMENSION num_cols = cinfo->output_width;
  459. /* copy these pointers into registers if possible */
  460. SHIFT_TEMPS
  461. while (--num_rows >= 0) {
  462. inptr0 = input_buf[0][input_row];
  463. inptr1 = input_buf[1][input_row];
  464. inptr2 = input_buf[2][input_row];
  465. input_row++;
  466. outptr = *output_buf++;
  467. for (col = 0; col < num_cols; col++) {
  468. outptr[0] = GETJSAMPLE(inptr0[col]);
  469. outptr[1] = GETJSAMPLE(inptr1[col]);
  470. outptr[2] = GETJSAMPLE(inptr2[col]);
  471. /* Alpha is added as fully opaque */
  472. outptr[3] = 255; /* don't need GETJSAMPLE here */
  473. outptr += 4;
  474. }
  475. }
  476. }
  477. METHODDEF (void)
  478. ycbcra_rgba_convert (j_decompress_ptr cinfo,
  479. JSAMPIMAGE input_buf, JDIMENSION input_row,
  480. JSAMPARRAY output_buf, int num_rows)
  481. {
  482. my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
  483. register int y, cb, cr;
  484. register JSAMPROW outptr;
  485. register JSAMPROW inptr0, inptr1, inptr2, inptr3;
  486. register JDIMENSION col;
  487. JDIMENSION num_cols = cinfo->output_width;
  488. // Alignment variables - CRK
  489. JDIMENSION tail_cols = num_cols&7;
  490. JDIMENSION mmx_cols=num_cols&~7;
  491. /* copy these pointers into registers if possible */
  492. register JSAMPLE * range_limit = cinfo->sample_range_limit;
  493. register int * Crrtab = cconvert->Cr_r_tab;
  494. register int * Cbbtab = cconvert->Cb_b_tab;
  495. register INT32 * Crgtab = cconvert->Cr_g_tab;
  496. register INT32 * Cbgtab = cconvert->Cb_g_tab;
  497. SHIFT_TEMPS
  498. #ifdef _X86_
  499. if(vfMMXMachine) { //MMX Code - CRK
  500. while (--num_rows >= 0) {
  501. inptr0 = input_buf[0][input_row];
  502. inptr1 = input_buf[1][input_row];
  503. inptr2 = input_buf[2][input_row];
  504. inptr3 = input_buf[3][input_row];
  505. input_row++;
  506. outptr = *output_buf++;
  507. MYCbCrA2RGBA(mmx_cols, inptr0, inptr1, inptr2, inptr3, outptr);
  508. outptr += 4*mmx_cols;
  509. for (col = mmx_cols; col < num_cols; col++) {
  510. y = GETJSAMPLE(inptr0[col]);
  511. cb = GETJSAMPLE(inptr1[col]);
  512. cr = GETJSAMPLE(inptr2[col]);
  513. /* Range-limiting is essential due to noise introduced by DCT losses. */
  514. outptr[RGB_RED] = range_limit[y + Crrtab[cr]];
  515. outptr[RGB_GREEN] = range_limit[y +
  516. ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
  517. SCALEBITS))];
  518. outptr[RGB_BLUE] = range_limit[y + Cbbtab[cb]];
  519. outptr[3] = inptr3[col];
  520. outptr += 4;
  521. }
  522. }
  523. __asm emms
  524. }
  525. #else
  526. if (0) { }
  527. #endif
  528. else {
  529. while (--num_rows >= 0) {
  530. inptr0 = input_buf[0][input_row];
  531. inptr1 = input_buf[1][input_row];
  532. inptr2 = input_buf[2][input_row];
  533. inptr3 = input_buf[3][input_row];
  534. input_row++;
  535. outptr = *output_buf++;
  536. for (col = 0; col < num_cols; col++) {
  537. y = GETJSAMPLE(inptr0[col]);
  538. cb = GETJSAMPLE(inptr1[col]);
  539. cr = GETJSAMPLE(inptr2[col]);
  540. /* Range-limiting is essential due to noise introduced by DCT losses. */
  541. outptr[RGB_RED] = range_limit[(y + Crrtab[cr])]; /* red */
  542. outptr[RGB_GREEN] = range_limit[(y + /* green */
  543. ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
  544. SCALEBITS)))];
  545. outptr[RGB_BLUE] = range_limit[(y + Cbbtab[cb])]; /* blue */
  546. /* Alpha passes through unchanged */
  547. outptr[3] = inptr3[col]; /* don't need GETJSAMPLE here */
  548. outptr += 4;
  549. }
  550. }
  551. }
  552. }
  553. METHODDEF (void)
  554. ycbcralegacy_rgba_convert (j_decompress_ptr cinfo,
  555. JSAMPIMAGE input_buf, JDIMENSION input_row,
  556. JSAMPARRAY output_buf, int num_rows)
  557. {
  558. my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
  559. register int y, cb, cr;
  560. register JSAMPROW outptr;
  561. register JSAMPROW inptr0, inptr1, inptr2, inptr3;
  562. register JDIMENSION col;
  563. JDIMENSION num_cols = cinfo->output_width;
  564. // Alignment variables - CRK
  565. JDIMENSION tail_cols = num_cols&7;
  566. JDIMENSION mmx_cols=num_cols&~7;
  567. /* copy these pointers into registers if possible */
  568. register JSAMPLE * range_limit = cinfo->sample_range_limit;
  569. register int * Crrtab = cconvert->Cr_r_tab;
  570. register int * Cbbtab = cconvert->Cb_b_tab;
  571. register INT32 * Crgtab = cconvert->Cr_g_tab;
  572. register INT32 * Cbgtab = cconvert->Cb_g_tab;
  573. SHIFT_TEMPS
  574. #ifdef _X86_
  575. if(vfMMXMachine) { //MMX Code - CRK
  576. while (--num_rows >= 0) {
  577. inptr0 = input_buf[0][input_row];
  578. inptr1 = input_buf[1][input_row];
  579. inptr2 = input_buf[2][input_row];
  580. inptr3 = input_buf[3][input_row];
  581. input_row++;
  582. outptr = *output_buf++;
  583. MYCbCrA2RGBALegacy(mmx_cols, inptr0, inptr1, inptr2, inptr3, outptr);
  584. outptr += 4*mmx_cols;
  585. for (col = mmx_cols; col < num_cols; col++) {
  586. y = GETJSAMPLE(inptr0[col]);
  587. cb = GETJSAMPLE(inptr1[col]);
  588. cr = GETJSAMPLE(inptr2[col]);
  589. /* Range-limiting is essential due to noise introduced by DCT losses. */
  590. outptr[RGB_RED] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])];
  591. outptr[RGB_GREEN] = range_limit[MAXJSAMPLE - (y +
  592. ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
  593. SCALEBITS)))];
  594. outptr[RGB_BLUE] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])];
  595. outptr[3] = inptr3[col];
  596. outptr += 4;
  597. }
  598. }
  599. __asm emms
  600. }
  601. #else
  602. if (0) { }
  603. #endif
  604. else {
  605. while (--num_rows >= 0) {
  606. inptr0 = input_buf[0][input_row];
  607. inptr1 = input_buf[1][input_row];
  608. inptr2 = input_buf[2][input_row];
  609. inptr3 = input_buf[3][input_row];
  610. input_row++;
  611. outptr = *output_buf++;
  612. for (col = 0; col < num_cols; col++) {
  613. y = GETJSAMPLE(inptr0[col]);
  614. cb = GETJSAMPLE(inptr1[col]);
  615. cr = GETJSAMPLE(inptr2[col]);
  616. /* Range-limiting is essential due to noise introduced by DCT losses. */
  617. outptr[RGB_RED] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])]; /* red */
  618. outptr[RGB_GREEN] = range_limit[MAXJSAMPLE - (y + /* green */
  619. ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
  620. SCALEBITS)))];
  621. outptr[RGB_BLUE] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])]; /* blue */
  622. /* Alpha passes through unchanged */
  623. outptr[3] = inptr3[col]; /* don't need GETJSAMPLE here */
  624. outptr += 4;
  625. }
  626. }
  627. }
  628. }
  629. METHODDEF (void)
  630. ycbcr_rgba_convert (j_decompress_ptr cinfo,
  631. JSAMPIMAGE input_buf, JDIMENSION input_row,
  632. JSAMPARRAY output_buf, int num_rows)
  633. {
  634. my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
  635. register int y, cb, cr;
  636. register JSAMPROW outptr;
  637. register JSAMPROW inptr0, inptr1, inptr2;
  638. register JDIMENSION col;
  639. JDIMENSION num_cols = cinfo->output_width;
  640. /* copy these pointers into registers if possible */
  641. register JSAMPLE * range_limit = cinfo->sample_range_limit;
  642. register int * Crrtab = cconvert->Cr_r_tab;
  643. register int * Cbbtab = cconvert->Cb_b_tab;
  644. register INT32 * Crgtab = cconvert->Cr_g_tab;
  645. register INT32 * Cbgtab = cconvert->Cb_g_tab;
  646. SHIFT_TEMPS
  647. while (--num_rows >= 0) {
  648. inptr0 = input_buf[0][input_row];
  649. inptr1 = input_buf[1][input_row];
  650. inptr2 = input_buf[2][input_row];
  651. input_row++;
  652. outptr = *output_buf++;
  653. for (col = 0; col < num_cols; col++) {
  654. y = GETJSAMPLE(inptr0[col]);
  655. cb = GETJSAMPLE(inptr1[col]);
  656. cr = GETJSAMPLE(inptr2[col]);
  657. /* Range-limiting is essential due to noise introduced by DCT losses. */
  658. outptr[RGB_RED] = range_limit[y + Crrtab[cr]];
  659. outptr[RGB_GREEN] = range_limit[y +
  660. ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
  661. SCALEBITS))];
  662. outptr[RGB_BLUE] = range_limit[y + Cbbtab[cb]];
  663. outptr[3] = 255;
  664. outptr += 4;
  665. }
  666. }
  667. }
  668. #endif
  669. /*
  670. * Adobe-style YCCK->CMYK conversion.
  671. * We convert YCbCr to R=1-C, G=1-M, and B=1-Y using the same
  672. * conversion as above, while passing K (black) unchanged.
  673. * We assume build_ycc_rgb_table has been called.
  674. */
  675. METHODDEF(void)
  676. ycck_cmyk_convert (j_decompress_ptr cinfo,
  677. JSAMPIMAGE input_buf, JDIMENSION input_row,
  678. JSAMPARRAY output_buf, int num_rows)
  679. {
  680. my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
  681. register int y, cb, cr;
  682. register JSAMPROW outptr;
  683. register JSAMPROW inptr0, inptr1, inptr2, inptr3;
  684. register JDIMENSION col;
  685. JDIMENSION num_cols = cinfo->output_width;
  686. /* copy these pointers into registers if possible */
  687. register JSAMPLE * range_limit = cinfo->sample_range_limit;
  688. register int * Crrtab = cconvert->Cr_r_tab;
  689. register int * Cbbtab = cconvert->Cb_b_tab;
  690. register INT32 * Crgtab = cconvert->Cr_g_tab;
  691. register INT32 * Cbgtab = cconvert->Cb_g_tab;
  692. SHIFT_TEMPS
  693. while (--num_rows >= 0) {
  694. inptr0 = input_buf[0][input_row];
  695. inptr1 = input_buf[1][input_row];
  696. inptr2 = input_buf[2][input_row];
  697. inptr3 = input_buf[3][input_row];
  698. input_row++;
  699. outptr = *output_buf++;
  700. for (col = 0; col < num_cols; col++) {
  701. y = GETJSAMPLE(inptr0[col]);
  702. cb = GETJSAMPLE(inptr1[col]);
  703. cr = GETJSAMPLE(inptr2[col]);
  704. /* Range-limiting is essential due to noise introduced by DCT losses. */
  705. outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])]; /* red */
  706. outptr[1] = range_limit[MAXJSAMPLE - (y + /* green */
  707. ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
  708. SCALEBITS)))];
  709. outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])]; /* blue */
  710. /* K passes through unchanged */
  711. outptr[3] = inptr3[col]; /* don't need GETJSAMPLE here */
  712. outptr += 4;
  713. }
  714. }
  715. }
  716. /*
  717. * Empty method for start_pass.
  718. */
  719. METHODDEF(void)
  720. start_pass_dcolor (j_decompress_ptr cinfo)
  721. {
  722. /* no work needed */
  723. }
  724. /*
  725. * Module initialization routine for output colorspace conversion.
  726. */
  727. GLOBAL(void)
  728. jinit_color_deconverter (j_decompress_ptr cinfo)
  729. {
  730. my_cconvert_ptr cconvert;
  731. int ci;
  732. cconvert = (my_cconvert_ptr)
  733. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  734. SIZEOF(my_color_deconverter));
  735. cinfo->cconvert = (struct jpeg_color_deconverter *) cconvert;
  736. cconvert->pub.start_pass = start_pass_dcolor;
  737. /* Make sure num_components agrees with jpeg_color_space */
  738. switch (cinfo->jpeg_color_space) {
  739. case JCS_GRAYSCALE:
  740. if (cinfo->num_components != 1)
  741. ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
  742. break;
  743. #ifdef NIFTY
  744. case JCS_YCC:
  745. if (cinfo->num_components != 3)
  746. ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
  747. break;
  748. case JCS_YCCA:
  749. if (cinfo->num_components != 4)
  750. ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
  751. break;
  752. case JCS_RGBA:
  753. if (cinfo->num_components != 4)
  754. ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
  755. break;
  756. case JCS_YCbCrA:
  757. if (cinfo->num_components != 4)
  758. ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
  759. break;
  760. case JCS_YCbCrALegacy:
  761. if (cinfo->num_components != 4)
  762. ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
  763. break;
  764. #endif
  765. case JCS_RGB:
  766. case JCS_YCbCr:
  767. if (cinfo->num_components != 3)
  768. ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
  769. break;
  770. case JCS_CMYK:
  771. case JCS_YCCK:
  772. if (cinfo->num_components != 4)
  773. ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
  774. break;
  775. default: /* JCS_UNKNOWN can be anything */
  776. if (cinfo->num_components < 1)
  777. ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
  778. break;
  779. }
  780. /* Set out_color_components and conversion method based on requested space.
  781. * Also clear the component_needed flags for any unused components,
  782. * so that earlier pipeline stages can avoid useless computation.
  783. */
  784. switch (cinfo->out_color_space) {
  785. case JCS_GRAYSCALE:
  786. cinfo->out_color_components = 1;
  787. if (cinfo->jpeg_color_space == JCS_GRAYSCALE ||
  788. cinfo->jpeg_color_space == JCS_YCbCr) {
  789. cconvert->pub.color_convert = grayscale_convert;
  790. /* For color->grayscale conversion, only the Y (0) component is needed */
  791. for (ci = 1; ci < cinfo->num_components; ci++)
  792. cinfo->comp_info[ci].component_needed = FALSE;
  793. } else
  794. ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
  795. break;
  796. case JCS_RGB:
  797. cinfo->out_color_components = RGB_PIXELSIZE;
  798. if (cinfo->jpeg_color_space == JCS_YCbCr) {
  799. cconvert->pub.color_convert = ycc_rgb_convert;
  800. build_ycc_rgb_table(cinfo);
  801. } else if (cinfo->jpeg_color_space == JCS_RGB && RGB_PIXELSIZE == 3) {
  802. cconvert->pub.color_convert = null_convert;
  803. } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
  804. cconvert->pub.color_convert = grayscale_RGB_convert;
  805. #ifdef NIFTY
  806. } else if (cinfo->jpeg_color_space == JCS_YCC) {
  807. cconvert->pub.color_convert = pycc_rgb_convert;
  808. build_pycc_rgb_table(cinfo);
  809. #endif
  810. } else
  811. ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
  812. break;
  813. #ifdef NIFTY
  814. case JCS_RGBA:
  815. cinfo->out_color_components = 4;
  816. if (cinfo->jpeg_color_space == JCS_YCbCrA) {
  817. cconvert->pub.color_convert = ycbcra_rgba_convert;
  818. build_ycc_rgb_table(cinfo);
  819. }else if (cinfo->jpeg_color_space == JCS_YCbCrALegacy) {
  820. cconvert->pub.color_convert = ycbcralegacy_rgba_convert;
  821. build_ycc_rgb_table(cinfo);
  822. }else if (cinfo->jpeg_color_space == JCS_YCbCr) {
  823. cconvert->pub.color_convert = ycbcr_rgba_convert;
  824. build_ycc_rgb_table(cinfo);
  825. }else if (cinfo->jpeg_color_space == JCS_RGBA) {
  826. cconvert->pub.color_convert = null_convert;
  827. }else if (cinfo->jpeg_color_space == JCS_RGB) {
  828. cconvert->pub.color_convert = rgb_rgba_convert;
  829. }else if (cinfo->jpeg_color_space == JCS_YCC) {
  830. cconvert->pub.color_convert = pycc_rgba_convert;
  831. build_pycc_rgb_table(cinfo);
  832. } else {
  833. ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
  834. }
  835. break;
  836. #endif
  837. case JCS_CMYK:
  838. cinfo->out_color_components = 4;
  839. if (cinfo->jpeg_color_space == JCS_YCCK) {
  840. cconvert->pub.color_convert = ycck_cmyk_convert;
  841. build_ycc_rgb_table(cinfo);
  842. } else if (cinfo->jpeg_color_space == JCS_CMYK) {
  843. cconvert->pub.color_convert = null_convert;
  844. } else
  845. ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
  846. break;
  847. default:
  848. /* Permit null conversion to same output space */
  849. if (cinfo->out_color_space == cinfo->jpeg_color_space) {
  850. cinfo->out_color_components = cinfo->num_components;
  851. cconvert->pub.color_convert = null_convert;
  852. } else /* unsupported non-null conversion */
  853. ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
  854. break;
  855. }
  856. if (cinfo->quantize_colors)
  857. cinfo->output_components = 1; /* single colormapped output component */
  858. else
  859. cinfo->output_components = cinfo->out_color_components;
  860. }
  861. #ifdef _X86_
  862. // MMX assembly code editions begin here - CRK
  863. // Turn off "No EMMS instruction" warning
  864. #pragma warning(disable : 4799)
  865. void MYCbCr2RGB(
  866. int columns,
  867. unsigned char *inY,
  868. unsigned char *inU,
  869. unsigned char *inV,
  870. unsigned char *outRGB)
  871. {
  872. _asm {
  873. // Inits
  874. mov eax, inY
  875. mov ecx, inV
  876. mov edi, columns
  877. mov ebx, inU
  878. shr edi, 2 ; number of loops = cols/4
  879. mov edx, outRGB
  880. YUVtoRGB:
  881. movd mm0, [eax] ; 0/0/0/0/Y3/Y2/Y1/Y0
  882. pxor mm7, mm7 ; use mm7 as const_0 to achieve better pairing at start
  883. movd mm2, [ebx] ; 0/0/0/0/U3/U2/U1/U0
  884. punpcklbw mm0, mm7 ; Y3/Y2/Y1/Y0
  885. movd mm3, [ecx] ; 0/0/0/0/V3/V2/V1/V0
  886. punpcklbw mm2, mm7 ; U3/U2/U1/U0
  887. psubsw mm2, const_sub128 ; U3'/U2'/U1'/U0'
  888. punpcklbw mm3, mm7 ; V3/V2/V1/V0
  889. psubsw mm3, const_sub128 ; V3'/V2'/V1'/V0'
  890. movq mm4, mm2
  891. punpcklwd mm2, mm3 ; V1'/U1'/V0'/U0'
  892. movq mm1, mm0
  893. pmaddwd mm2, const_VUmul ; gvV1'+guU1'/gvV0'+guU0'
  894. psllw mm1, 8 ; Y3*256/Y2*256/Y1*256/Y0*256
  895. movq mm6, mm1
  896. punpcklwd mm1, mm7 ; Y1*256/Y0*256
  897. punpckhwd mm6, mm7 ; Y3*256/Y2*256
  898. movq mm5, mm4
  899. punpckhwd mm5, mm3 ; V3'/U3'/V2'/U2'
  900. paddd mm2, mm1 ; G1*256/G0*256 (mm1 free)
  901. pmaddwd mm5, const_VUmul ; gvV3'+guU3'/gvV2'+guU2'
  902. movq mm1, mm3 ; (using mm1)
  903. punpcklwd mm3, mm0 ; Y1/V1'/Y0/V0'
  904. movq mm7, mm4 ; This wipes out the zero constant
  905. pmaddwd mm3, const_YVmul ; ryY1+rvV1'/ryY0+rvV0'
  906. psrad mm2, 8 ; G1/G0
  907. paddd mm5, mm6 ; G3*256/G2*256 (mm6 free)
  908. punpcklwd mm4, mm0 ; Y1/U1'/Y0/U0'
  909. pmaddwd mm4, const_YUmul ; byY1+buU1'/byY0'+buU0'
  910. psrad mm5, 8 ; G3/G2
  911. psrad mm3, 8 ; R1/R0
  912. punpckhwd mm7 , mm0 ; Y3/U3'/Y2/U2'
  913. psrad mm4, 8 ; B1/B0
  914. movq mm6, mm3
  915. pmaddwd mm7, const_YUmul ; byY3+buU3'/byY2'+buU2'
  916. punpckhwd mm1, mm0 ; Y3/V3'/Y2/V2'
  917. pmaddwd mm1, const_YVmul ; ryY3+rvV3'/ryY2+rvV2'
  918. punpckldq mm3, mm2 ; G0/R0
  919. punpckhdq mm6, mm2 ; G1/R1 (mm2 free)
  920. movq mm0, mm4
  921. psrad mm7, 8 ; B3/B2
  922. punpckldq mm4, const_0 ; 0/B0
  923. punpckhdq mm0, const_0 ; 0/B1
  924. psrad mm1, 8 ; R3/R2
  925. packssdw mm3, mm4 ; 0/B0/G0/R0 (mm4 free)
  926. movq mm2, mm1
  927. packssdw mm6, mm0 ; 0/B1/G1/R1 (mm0 free)
  928. packuswb mm3, mm6 ; 0/B1/G1/R1/0/B0/G0/R0 (mm6 free)
  929. punpckldq mm2, mm5 ; G2/R2
  930. movq mm4, mm7
  931. punpckhdq mm1, mm5 ; G3/R3 (mm5 done)
  932. punpckldq mm7, const_0 ; 0/B2 (change this line for alpha code)
  933. punpckhdq mm4, const_0 ; 0/B3 (change this line for alpha code)
  934. movq mm0, mm3
  935. packssdw mm2, mm7 ; 0/B2/G2/R2
  936. pand mm3, mask_highd ; 0/B1/G1/R1/0/0/0/0
  937. packssdw mm1, mm4 ; 0/B3/G3/R3
  938. psrlq mm3, 8 ; 0/0/B1/G1/R1/0/0/0
  939. add edx, 12
  940. por mm0, mm3 ; 0/0/?/?/R1/B0/G0/R0
  941. packuswb mm2, mm1 ; 0/B3/G3/R3/0/B2/G2/R2
  942. psrlq mm3, 32 ; 0/0/0/0/0/0/B1/G1
  943. add eax, 4
  944. movd [edx][-12], mm0 ; correct for add
  945. punpcklwd mm3, mm2 ; 0/B2/0/0/G2/R2/B1/G1
  946. psrlq mm2, 24 ; 0/0/0/0/B3/G3/R3/0
  947. add ecx, 4
  948. movd [edx][-8], mm3 ; correct for previous add
  949. psrlq mm3, 48 ; 0/0/0/0/0/0/0/B2
  950. por mm2, mm3 ; 0/0/0/0/B3/G3/R3/0
  951. add ebx, 4
  952. movd [edx][-4], mm2 ; correct for previous add
  953. dec edi
  954. jnz YUVtoRGB ; Do 12 more bytes if not zero
  955. //emms // commented out since it is done after the IDCT
  956. } // end of _asm
  957. }
  958. void MYCbCrA2RGBA(
  959. int columns,
  960. unsigned char *inY,
  961. unsigned char *inU,
  962. unsigned char *inV,
  963. unsigned char *inA,
  964. unsigned char *outRGBA)
  965. {
  966. __int64 tempA;
  967. _asm {
  968. // Inits
  969. mov eax, inY
  970. mov ecx, inV
  971. mov edi, columns
  972. mov ebx, inU
  973. shr edi, 2 ; number of loops = cols/4
  974. mov edx, outRGBA
  975. mov esi, inA
  976. YUVAtoRGBA:
  977. movd mm0, [eax] ; 0/0/0/0/Y3/Y2/Y1/Y0
  978. pxor mm7, mm7 ; added this in to achieve better pairing at start
  979. movd mm2, [ebx] ; 0/0/0/0/U3/U2/U1/U0
  980. punpcklbw mm0, mm7 ; Y3/Y2/Y1/Y0
  981. movd mm3, [ecx] ; 0/0/0/0/V3/V2/V1/V0
  982. punpcklbw mm2, mm7 ; U3/U2/U1/U0
  983. psubsw mm2, const_sub128 ; U3'/U2'/U1'/U0'
  984. punpcklbw mm3, mm7 ; V3/V2/V1/V0
  985. psubsw mm3, const_sub128 ; V3'/V2'/V1'/V0'
  986. movq mm4, mm2
  987. punpcklwd mm2, mm3 ; V1'/U1'/V0'/U0'
  988. movq mm1, mm0
  989. pmaddwd mm2, const_VUmul ; guU1'+gvV1'/guU0'+gvV0'
  990. psllw mm1, 8 ; Y3*256/Y2*256/Y1*256/Y0*256
  991. movq mm6, mm1
  992. punpcklwd mm1, mm7 ; Y1*256/Y0*256
  993. punpckhwd mm6, mm7 ; Y3*256/Y2*256
  994. movq mm5, mm4
  995. punpckhwd mm5, mm3 ; V3'/U3'/V2'/U2'
  996. paddd mm2, mm1 ; G1*256/G0*256 (mm1 free)
  997. pmaddwd mm5, const_VUmul ; gvV3'+guU3'/gvV2'+guU2'
  998. movq mm1, mm3 ; (using mm1)
  999. punpcklwd mm3, mm0 ; Y1/V1'/Y0/V0'
  1000. movq mm7, mm4 ; This wipes out the zero constant
  1001. pmaddwd mm3, const_YVmul ; ryY1+rvV1'/ryY0+rvV0'
  1002. psrad mm2, 8 ; G1/G0
  1003. paddd mm5, mm6 ; G3*256/G2*256 (mm6 free)
  1004. punpcklwd mm4, mm0 ; Y1/U1'/Y0/U0'
  1005. pmaddwd mm4, const_YUmul ; byY1+buU1'/byY0'+buU0'
  1006. psrad mm5, 8 ; G3/G2
  1007. psrad mm3, 8 ; R1/R0
  1008. punpckhwd mm7 , mm0 ; Y3/U3'/Y2/U2'
  1009. movq mm6, mm3
  1010. pmaddwd mm7, const_YUmul ; byY3+buU3'/byY2'+buU2'
  1011. punpckhwd mm1, mm0 ; Y3/V3'/Y2/V2'
  1012. pmaddwd mm1, const_YVmul ; ryY3+rvV3'/ryY2+rvV2'
  1013. punpckldq mm3, mm2 ; G0/R0
  1014. punpckhdq mm6, mm2 ; G1/R1 (mm2 free)
  1015. movd mm2, [esi] ; 0/0/0/0/A3/A2/A1/A0
  1016. psrad mm4, 8 ; B1/B0
  1017. punpcklbw mm2, const_0 ; A3/A2/A1/A0
  1018. psrad mm1, 8 ; R3/R2
  1019. movq mm0, mm4 ; B1/B0
  1020. movq tempA, mm2
  1021. psrad mm7, 8 ; B3/B2
  1022. punpcklwd mm2, const_0 ; A1/A0
  1023. punpckldq mm4, mm2 ; A0/B0
  1024. punpckhdq mm0, mm2 ; A1/B1
  1025. movq mm2, mm1
  1026. packssdw mm3, mm4 ; A0/B0/G0/R0 (mm4 free)
  1027. packssdw mm6, mm0 ; A1/B1/G1/R1 (mm0 free)
  1028. movq mm4, mm7
  1029. packuswb mm3, mm6 ; A1/B1/G1/R1/A0/B0/G0/R0 (mm6 free)
  1030. movq mm6, tempA ; A3/A2/A1/A0
  1031. punpckldq mm2, mm5 ; G2/R2
  1032. movq [edx], mm3
  1033. punpckhdq mm1, mm5 ; G3/R3 (mm5 done)
  1034. punpckhwd mm6, const_0 ; A3/A2
  1035. punpckldq mm7, mm6 ; A2/B2
  1036. add eax, 4
  1037. punpckhdq mm4, mm6 ; A3/B3
  1038. add ebx, 4
  1039. packssdw mm2, mm7 ; A2/B2/G2/R2
  1040. add ecx, 4
  1041. packssdw mm1, mm4 ; A3/B3/G3/R3
  1042. add edx, 16
  1043. packuswb mm2, mm1 ; A3/B3/G3/R3/A2/B2/G2/R2
  1044. add esi, 4
  1045. movq [edx][-8], mm2 ; Post-add correction on address
  1046. dec edi
  1047. jnz YUVAtoRGBA ; Do 12 more bytes if not zero
  1048. //emms // commented out since it is done after the IDCT
  1049. } // end of _asm
  1050. }
  1051. void MYCbCrA2RGBALegacy(
  1052. int columns,
  1053. unsigned char *inY,
  1054. unsigned char *inU,
  1055. unsigned char *inV,
  1056. unsigned char *inA,
  1057. unsigned char *outRGBA)
  1058. {
  1059. __int64 tempA;
  1060. _asm {
  1061. // Inits
  1062. mov eax, inY
  1063. mov ecx, inV
  1064. mov edi, columns
  1065. mov ebx, inU
  1066. shr edi, 2 ; number of loops = cols/4
  1067. mov edx, outRGBA
  1068. mov esi, inA
  1069. YUVAtoRGBA:
  1070. movd mm0, [eax] ; 0/0/0/0/Y3/Y2/Y1/Y0
  1071. pxor mm7, mm7 ; added this in to achieve better pairing at start
  1072. movd mm2, [ebx] ; 0/0/0/0/U3/U2/U1/U0
  1073. punpcklbw mm0, mm7 ; Y3/Y2/Y1/Y0
  1074. movd mm3, [ecx] ; 0/0/0/0/V3/V2/V1/V0
  1075. punpcklbw mm2, mm7 ; U3/U2/U1/U0
  1076. psubsw mm2, const_sub128 ; U3'/U2'/U1'/U0'
  1077. punpcklbw mm3, mm7 ; V3/V2/V1/V0
  1078. psubsw mm3, const_sub128 ; V3'/V2'/V1'/V0'
  1079. movq mm4, mm2
  1080. punpcklwd mm2, mm3 ; V1'/U1'/V0'/U0'
  1081. movq mm1, mm0
  1082. pmaddwd mm2, const_VUmul ; guU1'+gvV1'/guU0'+gvV0'
  1083. psllw mm1, 8 ; Y3*256/Y2*256/Y1*256/Y0*256
  1084. movq mm6, mm1
  1085. punpcklwd mm1, mm7 ; Y1*256/Y0*256
  1086. punpckhwd mm6, mm7 ; Y3*256/Y2*256
  1087. movq mm5, mm4
  1088. punpckhwd mm5, mm3 ; V3'/U3'/V2'/U2'
  1089. paddd mm2, mm1 ; G1*256/G0*256 (mm1 free)
  1090. pmaddwd mm5, const_VUmul ; gvV3'+guU3'/gvV2'+guU2'
  1091. movq mm1, mm3 ; (using mm1)
  1092. punpcklwd mm3, mm0 ; Y1/V1'/Y0/V0'
  1093. movq mm7, mm4 ; This wipes out the zero constant
  1094. pmaddwd mm3, const_YVmul ; ryY1+rvV1'/ryY0+rvV0'
  1095. psrad mm2, 8 ; G1/G0
  1096. paddd mm5, mm6 ; G3*256/G2*256 (mm6 free)
  1097. punpcklwd mm4, mm0 ; Y1/U1'/Y0/U0'
  1098. pmaddwd mm4, const_YUmul ; byY1+buU1'/byY0'+buU0'
  1099. punpckhwd mm1, mm0 ; Y3/V3'/Y2/V2'
  1100. psrad mm3, 8 ; R1/R0
  1101. punpckhwd mm7, mm0 ; Y3/U3'/Y2/U2'
  1102. movq mm6, mm3
  1103. pmaddwd mm7, const_YUmul ; byY3+buU3'/byY2'+buU2'
  1104. psrad mm4, 8 ; B1/B0
  1105. pmaddwd mm1, const_YVmul ; ryY3+rvV3'/ryY2+rvV2'
  1106. punpckldq mm3, mm2 ; G0/R0
  1107. punpckhdq mm6, mm2 ; G1/R1 (mm2 free)
  1108. movd mm2, [esi] ; 0/0/0/0/A3/A2/A1/A0
  1109. psrad mm7, 8 ; B3/B2
  1110. punpcklbw mm2, const_0 ; A3/A2/A1/A0
  1111. psrad mm1, 8 ; R3/R2
  1112. movq mm0, mm4 ; B1/B0
  1113. movq tempA, mm2
  1114. psrad mm5, 8 ; G3/G2
  1115. punpcklwd mm2, const_0 ; A1/A0
  1116. punpckldq mm4, mm2 ; A0/B0
  1117. punpckhdq mm0, mm2 ; A1/B1
  1118. movq mm2, mm1
  1119. packssdw mm3, mm4 ; A0/B0/G0/R0 (mm4 free)
  1120. packssdw mm6, mm0 ; A1/B1/G1/R1 (mm0 free)
  1121. movq mm4, mm7
  1122. packuswb mm3, mm6 ; A1/B1/G1/R1/A0/B0/G0/R0 (mm6 free)
  1123. add esi, 4
  1124. movq mm6, tempA ; A3/A2/A1/A0
  1125. punpckldq mm2, mm5 ; G2/R2
  1126. pxor mm3, const_invert ; Invert all RGB values
  1127. punpckhdq mm1, mm5 ; G3/R3 (mm5 done)
  1128. punpckhwd mm6, const_0 ; A3/A2
  1129. movq [edx], mm3
  1130. punpckldq mm7, mm6 ; A2/B2
  1131. punpckhdq mm4, mm6 ; A3/B3
  1132. add eax, 4
  1133. packssdw mm2, mm7 ; A2/B2/G2/R2
  1134. add ebx, 4
  1135. packssdw mm1, mm4 ; A3/B3/G3/R3
  1136. add ecx, 4
  1137. packuswb mm2, mm1 ; A3/B3/G3/R3/A2/B2/G2/R2
  1138. add edx, 16
  1139. pxor mm2, const_invert ; invert all RGB values
  1140. movq [edx][-8], mm2 ; Post-add correction on address
  1141. dec edi
  1142. jnz YUVAtoRGBA ; Do 12 more bytes if not zero
  1143. //emms // commented out since it is done after the IDCT
  1144. } // end of _asm
  1145. }
  1146. // enable "No EMMS instruction" warning
  1147. #pragma warning(default : 4799)
  1148. #endif