lossless.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664
  1. // Copyright 2012 Google Inc. All Rights Reserved.
  2. //
  3. // Use of this source code is governed by a BSD-style license
  4. // that can be found in the COPYING file in the root of the source
  5. // tree. An additional intellectual property rights grant can be found
  6. // in the file PATENTS. All contributing project authors may
  7. // be found in the AUTHORS file in the root of the source tree.
  8. // -----------------------------------------------------------------------------
  9. //
  10. // Image transforms and color space conversion methods for lossless decoder.
  11. //
  12. // Authors: Vikas Arora (vikaas.arora@gmail.com)
  13. // Jyrki Alakuijala (jyrki@google.com)
  14. // Urvang Joshi (urvang@google.com)
  15. #include "./dsp.h"
  16. #include <math.h>
  17. #include <stdlib.h>
  18. #include "../dec/vp8li_dec.h"
  19. #include "../utils/endian_inl_utils.h"
  20. #include "./lossless.h"
  21. #include "./lossless_common.h"
  22. #define MAX_DIFF_COST (1e30f)
  23. //------------------------------------------------------------------------------
  24. // Image transforms.
  25. static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
  26. return (((a0 ^ a1) & 0xfefefefeu) >> 1) + (a0 & a1);
  27. }
  28. static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
  29. return Average2(Average2(a0, a2), a1);
  30. }
  31. static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
  32. uint32_t a2, uint32_t a3) {
  33. return Average2(Average2(a0, a1), Average2(a2, a3));
  34. }
  35. static WEBP_INLINE uint32_t Clip255(uint32_t a) {
  36. if (a < 256) {
  37. return a;
  38. }
  39. // return 0, when a is a negative integer.
  40. // return 255, when a is positive.
  41. return ~a >> 24;
  42. }
  43. static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
  44. return Clip255(a + b - c);
  45. }
  46. static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
  47. uint32_t c2) {
  48. const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
  49. const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
  50. (c1 >> 16) & 0xff,
  51. (c2 >> 16) & 0xff);
  52. const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
  53. (c1 >> 8) & 0xff,
  54. (c2 >> 8) & 0xff);
  55. const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
  56. return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
  57. }
  58. static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
  59. return Clip255(a + (a - b) / 2);
  60. }
  61. static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
  62. uint32_t c2) {
  63. const uint32_t ave = Average2(c0, c1);
  64. const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
  65. const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
  66. const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
  67. const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
  68. return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
  69. }
  70. // gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined.
  71. #if defined(__arm__) && LOCAL_GCC_VERSION == 0x409
  72. # define LOCAL_INLINE __attribute__ ((noinline))
  73. #else
  74. # define LOCAL_INLINE WEBP_INLINE
  75. #endif
  76. static LOCAL_INLINE int Sub3(int a, int b, int c) {
  77. const int pb = b - c;
  78. const int pa = a - c;
  79. return abs(pb) - abs(pa);
  80. }
  81. #undef LOCAL_INLINE
  82. static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
  83. const int pa_minus_pb =
  84. Sub3((a >> 24) , (b >> 24) , (c >> 24) ) +
  85. Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
  86. Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) +
  87. Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff);
  88. return (pa_minus_pb <= 0) ? a : b;
  89. }
  90. //------------------------------------------------------------------------------
  91. // Predictors
  92. static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
  93. (void)top;
  94. (void)left;
  95. return ARGB_BLACK;
  96. }
  97. static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
  98. (void)top;
  99. return left;
  100. }
  101. static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
  102. (void)left;
  103. return top[0];
  104. }
  105. static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
  106. (void)left;
  107. return top[1];
  108. }
  109. static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
  110. (void)left;
  111. return top[-1];
  112. }
  113. static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
  114. const uint32_t pred = Average3(left, top[0], top[1]);
  115. return pred;
  116. }
  117. static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
  118. const uint32_t pred = Average2(left, top[-1]);
  119. return pred;
  120. }
  121. static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
  122. const uint32_t pred = Average2(left, top[0]);
  123. return pred;
  124. }
  125. static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
  126. const uint32_t pred = Average2(top[-1], top[0]);
  127. (void)left;
  128. return pred;
  129. }
  130. static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
  131. const uint32_t pred = Average2(top[0], top[1]);
  132. (void)left;
  133. return pred;
  134. }
  135. static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
  136. const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
  137. return pred;
  138. }
  139. static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
  140. const uint32_t pred = Select(top[0], left, top[-1]);
  141. return pred;
  142. }
  143. static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
  144. const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
  145. return pred;
  146. }
  147. static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
  148. const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
  149. return pred;
  150. }
  151. GENERATE_PREDICTOR_ADD(Predictor0, PredictorAdd0)
  152. static void PredictorAdd1(const uint32_t* in, const uint32_t* upper,
  153. int num_pixels, uint32_t* out) {
  154. int i;
  155. uint32_t left = out[-1];
  156. for (i = 0; i < num_pixels; ++i) {
  157. out[i] = left = VP8LAddPixels(in[i], left);
  158. }
  159. (void)upper;
  160. }
  161. GENERATE_PREDICTOR_ADD(Predictor2, PredictorAdd2)
  162. GENERATE_PREDICTOR_ADD(Predictor3, PredictorAdd3)
  163. GENERATE_PREDICTOR_ADD(Predictor4, PredictorAdd4)
  164. GENERATE_PREDICTOR_ADD(Predictor5, PredictorAdd5)
  165. GENERATE_PREDICTOR_ADD(Predictor6, PredictorAdd6)
  166. GENERATE_PREDICTOR_ADD(Predictor7, PredictorAdd7)
  167. GENERATE_PREDICTOR_ADD(Predictor8, PredictorAdd8)
  168. GENERATE_PREDICTOR_ADD(Predictor9, PredictorAdd9)
  169. GENERATE_PREDICTOR_ADD(Predictor10, PredictorAdd10)
  170. GENERATE_PREDICTOR_ADD(Predictor11, PredictorAdd11)
  171. GENERATE_PREDICTOR_ADD(Predictor12, PredictorAdd12)
  172. GENERATE_PREDICTOR_ADD(Predictor13, PredictorAdd13)
  173. //------------------------------------------------------------------------------
  174. // Inverse prediction.
  175. static void PredictorInverseTransform(const VP8LTransform* const transform,
  176. int y_start, int y_end,
  177. const uint32_t* in, uint32_t* out) {
  178. const int width = transform->xsize_;
  179. if (y_start == 0) { // First Row follows the L (mode=1) mode.
  180. PredictorAdd0(in, NULL, 1, out);
  181. PredictorAdd1(in + 1, NULL, width - 1, out + 1);
  182. in += width;
  183. out += width;
  184. ++y_start;
  185. }
  186. {
  187. int y = y_start;
  188. const int tile_width = 1 << transform->bits_;
  189. const int mask = tile_width - 1;
  190. const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
  191. const uint32_t* pred_mode_base =
  192. transform->data_ + (y >> transform->bits_) * tiles_per_row;
  193. while (y < y_end) {
  194. const uint32_t* pred_mode_src = pred_mode_base;
  195. int x = 1;
  196. // First pixel follows the T (mode=2) mode.
  197. PredictorAdd2(in, out - width, 1, out);
  198. // .. the rest:
  199. while (x < width) {
  200. const VP8LPredictorAddSubFunc pred_func =
  201. VP8LPredictorsAdd[((*pred_mode_src++) >> 8) & 0xf];
  202. int x_end = (x & ~mask) + tile_width;
  203. if (x_end > width) x_end = width;
  204. pred_func(in + x, out + x - width, x_end - x, out + x);
  205. x = x_end;
  206. }
  207. in += width;
  208. out += width;
  209. ++y;
  210. if ((y & mask) == 0) { // Use the same mask, since tiles are squares.
  211. pred_mode_base += tiles_per_row;
  212. }
  213. }
  214. }
  215. }
  216. // Add green to blue and red channels (i.e. perform the inverse transform of
  217. // 'subtract green').
  218. void VP8LAddGreenToBlueAndRed_C(const uint32_t* src, int num_pixels,
  219. uint32_t* dst) {
  220. int i;
  221. for (i = 0; i < num_pixels; ++i) {
  222. const uint32_t argb = src[i];
  223. const uint32_t green = ((argb >> 8) & 0xff);
  224. uint32_t red_blue = (argb & 0x00ff00ffu);
  225. red_blue += (green << 16) | green;
  226. red_blue &= 0x00ff00ffu;
  227. dst[i] = (argb & 0xff00ff00u) | red_blue;
  228. }
  229. }
  230. static WEBP_INLINE int ColorTransformDelta(int8_t color_pred,
  231. int8_t color) {
  232. return ((int)color_pred * color) >> 5;
  233. }
  234. static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
  235. VP8LMultipliers* const m) {
  236. m->green_to_red_ = (color_code >> 0) & 0xff;
  237. m->green_to_blue_ = (color_code >> 8) & 0xff;
  238. m->red_to_blue_ = (color_code >> 16) & 0xff;
  239. }
  240. void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
  241. const uint32_t* src, int num_pixels,
  242. uint32_t* dst) {
  243. int i;
  244. for (i = 0; i < num_pixels; ++i) {
  245. const uint32_t argb = src[i];
  246. const uint32_t green = argb >> 8;
  247. const uint32_t red = argb >> 16;
  248. int new_red = red;
  249. int new_blue = argb;
  250. new_red += ColorTransformDelta(m->green_to_red_, green);
  251. new_red &= 0xff;
  252. new_blue += ColorTransformDelta(m->green_to_blue_, green);
  253. new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
  254. new_blue &= 0xff;
  255. dst[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
  256. }
  257. }
  258. // Color space inverse transform.
  259. static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
  260. int y_start, int y_end,
  261. const uint32_t* src, uint32_t* dst) {
  262. const int width = transform->xsize_;
  263. const int tile_width = 1 << transform->bits_;
  264. const int mask = tile_width - 1;
  265. const int safe_width = width & ~mask;
  266. const int remaining_width = width - safe_width;
  267. const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
  268. int y = y_start;
  269. const uint32_t* pred_row =
  270. transform->data_ + (y >> transform->bits_) * tiles_per_row;
  271. while (y < y_end) {
  272. const uint32_t* pred = pred_row;
  273. VP8LMultipliers m = { 0, 0, 0 };
  274. const uint32_t* const src_safe_end = src + safe_width;
  275. const uint32_t* const src_end = src + width;
  276. while (src < src_safe_end) {
  277. ColorCodeToMultipliers(*pred++, &m);
  278. VP8LTransformColorInverse(&m, src, tile_width, dst);
  279. src += tile_width;
  280. dst += tile_width;
  281. }
  282. if (src < src_end) { // Left-overs using C-version.
  283. ColorCodeToMultipliers(*pred++, &m);
  284. VP8LTransformColorInverse(&m, src, remaining_width, dst);
  285. src += remaining_width;
  286. dst += remaining_width;
  287. }
  288. ++y;
  289. if ((y & mask) == 0) pred_row += tiles_per_row;
  290. }
  291. }
  292. // Separate out pixels packed together using pixel-bundling.
  293. // We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
  294. #define COLOR_INDEX_INVERSE(FUNC_NAME, F_NAME, STATIC_DECL, TYPE, BIT_SUFFIX, \
  295. GET_INDEX, GET_VALUE) \
  296. static void F_NAME(const TYPE* src, const uint32_t* const color_map, \
  297. TYPE* dst, int y_start, int y_end, int width) { \
  298. int y; \
  299. for (y = y_start; y < y_end; ++y) { \
  300. int x; \
  301. for (x = 0; x < width; ++x) { \
  302. *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \
  303. } \
  304. } \
  305. } \
  306. STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform, \
  307. int y_start, int y_end, const TYPE* src, \
  308. TYPE* dst) { \
  309. int y; \
  310. const int bits_per_pixel = 8 >> transform->bits_; \
  311. const int width = transform->xsize_; \
  312. const uint32_t* const color_map = transform->data_; \
  313. if (bits_per_pixel < 8) { \
  314. const int pixels_per_byte = 1 << transform->bits_; \
  315. const int count_mask = pixels_per_byte - 1; \
  316. const uint32_t bit_mask = (1 << bits_per_pixel) - 1; \
  317. for (y = y_start; y < y_end; ++y) { \
  318. uint32_t packed_pixels = 0; \
  319. int x; \
  320. for (x = 0; x < width; ++x) { \
  321. /* We need to load fresh 'packed_pixels' once every */ \
  322. /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */ \
  323. /* is a power of 2, so can just use a mask for that, instead of */ \
  324. /* decrementing a counter. */ \
  325. if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++); \
  326. *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]); \
  327. packed_pixels >>= bits_per_pixel; \
  328. } \
  329. } \
  330. } else { \
  331. VP8LMapColor##BIT_SUFFIX(src, color_map, dst, y_start, y_end, width); \
  332. } \
  333. }
  334. COLOR_INDEX_INVERSE(ColorIndexInverseTransform, MapARGB, static, uint32_t, 32b,
  335. VP8GetARGBIndex, VP8GetARGBValue)
  336. COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha, , uint8_t,
  337. 8b, VP8GetAlphaIndex, VP8GetAlphaValue)
  338. #undef COLOR_INDEX_INVERSE
  339. void VP8LInverseTransform(const VP8LTransform* const transform,
  340. int row_start, int row_end,
  341. const uint32_t* const in, uint32_t* const out) {
  342. const int width = transform->xsize_;
  343. assert(row_start < row_end);
  344. assert(row_end <= transform->ysize_);
  345. switch (transform->type_) {
  346. case SUBTRACT_GREEN:
  347. VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out);
  348. break;
  349. case PREDICTOR_TRANSFORM:
  350. PredictorInverseTransform(transform, row_start, row_end, in, out);
  351. if (row_end != transform->ysize_) {
  352. // The last predicted row in this iteration will be the top-pred row
  353. // for the first row in next iteration.
  354. memcpy(out - width, out + (row_end - row_start - 1) * width,
  355. width * sizeof(*out));
  356. }
  357. break;
  358. case CROSS_COLOR_TRANSFORM:
  359. ColorSpaceInverseTransform(transform, row_start, row_end, in, out);
  360. break;
  361. case COLOR_INDEXING_TRANSFORM:
  362. if (in == out && transform->bits_ > 0) {
  363. // Move packed pixels to the end of unpacked region, so that unpacking
  364. // can occur seamlessly.
  365. // Also, note that this is the only transform that applies on
  366. // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
  367. // transforms work on effective width of xsize_.
  368. const int out_stride = (row_end - row_start) * width;
  369. const int in_stride = (row_end - row_start) *
  370. VP8LSubSampleSize(transform->xsize_, transform->bits_);
  371. uint32_t* const src = out + out_stride - in_stride;
  372. memmove(src, out, in_stride * sizeof(*src));
  373. ColorIndexInverseTransform(transform, row_start, row_end, src, out);
  374. } else {
  375. ColorIndexInverseTransform(transform, row_start, row_end, in, out);
  376. }
  377. break;
  378. }
  379. }
  380. //------------------------------------------------------------------------------
  381. // Color space conversion.
  382. static int is_big_endian(void) {
  383. static const union {
  384. uint16_t w;
  385. uint8_t b[2];
  386. } tmp = { 1 };
  387. return (tmp.b[0] != 1);
  388. }
  389. void VP8LConvertBGRAToRGB_C(const uint32_t* src,
  390. int num_pixels, uint8_t* dst) {
  391. const uint32_t* const src_end = src + num_pixels;
  392. while (src < src_end) {
  393. const uint32_t argb = *src++;
  394. *dst++ = (argb >> 16) & 0xff;
  395. *dst++ = (argb >> 8) & 0xff;
  396. *dst++ = (argb >> 0) & 0xff;
  397. }
  398. }
  399. void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
  400. int num_pixels, uint8_t* dst) {
  401. const uint32_t* const src_end = src + num_pixels;
  402. while (src < src_end) {
  403. const uint32_t argb = *src++;
  404. *dst++ = (argb >> 16) & 0xff;
  405. *dst++ = (argb >> 8) & 0xff;
  406. *dst++ = (argb >> 0) & 0xff;
  407. *dst++ = (argb >> 24) & 0xff;
  408. }
  409. }
  410. void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
  411. int num_pixels, uint8_t* dst) {
  412. const uint32_t* const src_end = src + num_pixels;
  413. while (src < src_end) {
  414. const uint32_t argb = *src++;
  415. const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
  416. const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf);
  417. #ifdef WEBP_SWAP_16BIT_CSP
  418. *dst++ = ba;
  419. *dst++ = rg;
  420. #else
  421. *dst++ = rg;
  422. *dst++ = ba;
  423. #endif
  424. }
  425. }
  426. void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
  427. int num_pixels, uint8_t* dst) {
  428. const uint32_t* const src_end = src + num_pixels;
  429. while (src < src_end) {
  430. const uint32_t argb = *src++;
  431. const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
  432. const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f);
  433. #ifdef WEBP_SWAP_16BIT_CSP
  434. *dst++ = gb;
  435. *dst++ = rg;
  436. #else
  437. *dst++ = rg;
  438. *dst++ = gb;
  439. #endif
  440. }
  441. }
  442. void VP8LConvertBGRAToBGR_C(const uint32_t* src,
  443. int num_pixels, uint8_t* dst) {
  444. const uint32_t* const src_end = src + num_pixels;
  445. while (src < src_end) {
  446. const uint32_t argb = *src++;
  447. *dst++ = (argb >> 0) & 0xff;
  448. *dst++ = (argb >> 8) & 0xff;
  449. *dst++ = (argb >> 16) & 0xff;
  450. }
  451. }
  452. static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
  453. int swap_on_big_endian) {
  454. if (is_big_endian() == swap_on_big_endian) {
  455. const uint32_t* const src_end = src + num_pixels;
  456. while (src < src_end) {
  457. const uint32_t argb = *src++;
  458. #if !defined(WORDS_BIGENDIAN)
  459. #if !defined(WEBP_REFERENCE_IMPLEMENTATION)
  460. WebPUint32ToMem(dst, BSwap32(argb));
  461. #else // WEBP_REFERENCE_IMPLEMENTATION
  462. dst[0] = (argb >> 24) & 0xff;
  463. dst[1] = (argb >> 16) & 0xff;
  464. dst[2] = (argb >> 8) & 0xff;
  465. dst[3] = (argb >> 0) & 0xff;
  466. #endif
  467. #else // WORDS_BIGENDIAN
  468. dst[0] = (argb >> 0) & 0xff;
  469. dst[1] = (argb >> 8) & 0xff;
  470. dst[2] = (argb >> 16) & 0xff;
  471. dst[3] = (argb >> 24) & 0xff;
  472. #endif
  473. dst += sizeof(argb);
  474. }
  475. } else {
  476. memcpy(dst, src, num_pixels * sizeof(*src));
  477. }
  478. }
  479. void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
  480. WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
  481. switch (out_colorspace) {
  482. case MODE_RGB:
  483. VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);
  484. break;
  485. case MODE_RGBA:
  486. VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
  487. break;
  488. case MODE_rgbA:
  489. VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
  490. WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
  491. break;
  492. case MODE_BGR:
  493. VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);
  494. break;
  495. case MODE_BGRA:
  496. CopyOrSwap(in_data, num_pixels, rgba, 1);
  497. break;
  498. case MODE_bgrA:
  499. CopyOrSwap(in_data, num_pixels, rgba, 1);
  500. WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
  501. break;
  502. case MODE_ARGB:
  503. CopyOrSwap(in_data, num_pixels, rgba, 0);
  504. break;
  505. case MODE_Argb:
  506. CopyOrSwap(in_data, num_pixels, rgba, 0);
  507. WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
  508. break;
  509. case MODE_RGBA_4444:
  510. VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
  511. break;
  512. case MODE_rgbA_4444:
  513. VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
  514. WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
  515. break;
  516. case MODE_RGB_565:
  517. VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);
  518. break;
  519. default:
  520. assert(0); // Code flow should not reach here.
  521. }
  522. }
  523. //------------------------------------------------------------------------------
  524. VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
  525. VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
  526. VP8LPredictorFunc VP8LPredictors[16];
  527. // exposed plain-C implementations
  528. VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];
  529. VP8LPredictorFunc VP8LPredictors_C[16];
  530. VP8LTransformColorInverseFunc VP8LTransformColorInverse;
  531. VP8LConvertFunc VP8LConvertBGRAToRGB;
  532. VP8LConvertFunc VP8LConvertBGRAToRGBA;
  533. VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
  534. VP8LConvertFunc VP8LConvertBGRAToRGB565;
  535. VP8LConvertFunc VP8LConvertBGRAToBGR;
  536. VP8LMapARGBFunc VP8LMapColor32b;
  537. VP8LMapAlphaFunc VP8LMapColor8b;
  538. extern void VP8LDspInitSSE2(void);
  539. extern void VP8LDspInitNEON(void);
  540. extern void VP8LDspInitMIPSdspR2(void);
  541. extern void VP8LDspInitMSA(void);
  542. static volatile VP8CPUInfo lossless_last_cpuinfo_used =
  543. (VP8CPUInfo)&lossless_last_cpuinfo_used;
  544. #define COPY_PREDICTOR_ARRAY(IN, OUT) do { \
  545. (OUT)[0] = IN##0; \
  546. (OUT)[1] = IN##1; \
  547. (OUT)[2] = IN##2; \
  548. (OUT)[3] = IN##3; \
  549. (OUT)[4] = IN##4; \
  550. (OUT)[5] = IN##5; \
  551. (OUT)[6] = IN##6; \
  552. (OUT)[7] = IN##7; \
  553. (OUT)[8] = IN##8; \
  554. (OUT)[9] = IN##9; \
  555. (OUT)[10] = IN##10; \
  556. (OUT)[11] = IN##11; \
  557. (OUT)[12] = IN##12; \
  558. (OUT)[13] = IN##13; \
  559. (OUT)[14] = IN##0; /* <- padding security sentinels*/ \
  560. (OUT)[15] = IN##0; \
  561. } while (0);
  562. WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
  563. if (lossless_last_cpuinfo_used == VP8GetCPUInfo) return;
  564. COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors)
  565. COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors_C)
  566. COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd)
  567. COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd_C)
  568. VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
  569. VP8LTransformColorInverse = VP8LTransformColorInverse_C;
  570. VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
  571. VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
  572. VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
  573. VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
  574. VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
  575. VP8LMapColor32b = MapARGB;
  576. VP8LMapColor8b = MapAlpha;
  577. // If defined, use CPUInfo() to overwrite some pointers with faster versions.
  578. if (VP8GetCPUInfo != NULL) {
  579. #if defined(WEBP_USE_SSE2)
  580. if (VP8GetCPUInfo(kSSE2)) {
  581. VP8LDspInitSSE2();
  582. }
  583. #endif
  584. #if defined(WEBP_USE_NEON)
  585. if (VP8GetCPUInfo(kNEON)) {
  586. VP8LDspInitNEON();
  587. }
  588. #endif
  589. #if defined(WEBP_USE_MIPS_DSP_R2)
  590. if (VP8GetCPUInfo(kMIPSdspR2)) {
  591. VP8LDspInitMIPSdspR2();
  592. }
  593. #endif
  594. #if defined(WEBP_USE_MSA)
  595. if (VP8GetCPUInfo(kMSA)) {
  596. VP8LDspInitMSA();
  597. }
  598. #endif
  599. }
  600. lossless_last_cpuinfo_used = VP8GetCPUInfo;
  601. }
  602. #undef COPY_PREDICTOR_ARRAY
  603. //------------------------------------------------------------------------------