lossless_enc_mips_dsp_r2.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. // Copyright 2015 Google Inc. All Rights Reserved.
  2. //
  3. // Use of this source code is governed by a BSD-style license
  4. // that can be found in the COPYING file in the root of the source
  5. // tree. An additional intellectual property rights grant can be found
  6. // in the file PATENTS. All contributing project authors may
  7. // be found in the AUTHORS file in the root of the source tree.
  8. // -----------------------------------------------------------------------------
  9. //
  10. // Image transform methods for lossless encoder.
  11. //
  12. // Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
  13. // Jovan Zelincevic (jovan.zelincevic@imgtec.com)
  14. #include "./dsp.h"
  15. #if defined(WEBP_USE_MIPS_DSP_R2)
  16. #include "./lossless.h"
  17. static void SubtractGreenFromBlueAndRed(uint32_t* argb_data,
  18. int num_pixels) {
  19. uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
  20. uint32_t* const p_loop1_end = argb_data + (num_pixels & ~3);
  21. uint32_t* const p_loop2_end = p_loop1_end + (num_pixels & 3);
  22. __asm__ volatile (
  23. ".set push \n\t"
  24. ".set noreorder \n\t"
  25. "beq %[argb_data], %[p_loop1_end], 3f \n\t"
  26. " nop \n\t"
  27. "0: \n\t"
  28. "lw %[temp0], 0(%[argb_data]) \n\t"
  29. "lw %[temp1], 4(%[argb_data]) \n\t"
  30. "lw %[temp2], 8(%[argb_data]) \n\t"
  31. "lw %[temp3], 12(%[argb_data]) \n\t"
  32. "ext %[temp4], %[temp0], 8, 8 \n\t"
  33. "ext %[temp5], %[temp1], 8, 8 \n\t"
  34. "ext %[temp6], %[temp2], 8, 8 \n\t"
  35. "ext %[temp7], %[temp3], 8, 8 \n\t"
  36. "addiu %[argb_data], %[argb_data], 16 \n\t"
  37. "replv.ph %[temp4], %[temp4] \n\t"
  38. "replv.ph %[temp5], %[temp5] \n\t"
  39. "replv.ph %[temp6], %[temp6] \n\t"
  40. "replv.ph %[temp7], %[temp7] \n\t"
  41. "subu.qb %[temp0], %[temp0], %[temp4] \n\t"
  42. "subu.qb %[temp1], %[temp1], %[temp5] \n\t"
  43. "subu.qb %[temp2], %[temp2], %[temp6] \n\t"
  44. "subu.qb %[temp3], %[temp3], %[temp7] \n\t"
  45. "sw %[temp0], -16(%[argb_data]) \n\t"
  46. "sw %[temp1], -12(%[argb_data]) \n\t"
  47. "sw %[temp2], -8(%[argb_data]) \n\t"
  48. "bne %[argb_data], %[p_loop1_end], 0b \n\t"
  49. " sw %[temp3], -4(%[argb_data]) \n\t"
  50. "3: \n\t"
  51. "beq %[argb_data], %[p_loop2_end], 2f \n\t"
  52. " nop \n\t"
  53. "1: \n\t"
  54. "lw %[temp0], 0(%[argb_data]) \n\t"
  55. "addiu %[argb_data], %[argb_data], 4 \n\t"
  56. "ext %[temp4], %[temp0], 8, 8 \n\t"
  57. "replv.ph %[temp4], %[temp4] \n\t"
  58. "subu.qb %[temp0], %[temp0], %[temp4] \n\t"
  59. "bne %[argb_data], %[p_loop2_end], 1b \n\t"
  60. " sw %[temp0], -4(%[argb_data]) \n\t"
  61. "2: \n\t"
  62. ".set pop \n\t"
  63. : [argb_data]"+&r"(argb_data), [temp0]"=&r"(temp0),
  64. [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
  65. [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
  66. [temp7]"=&r"(temp7)
  67. : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
  68. : "memory"
  69. );
  70. }
  71. static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
  72. int8_t color) {
  73. return (uint32_t)((int)(color_pred) * color) >> 5;
  74. }
  75. static void TransformColor(const VP8LMultipliers* const m, uint32_t* data,
  76. int num_pixels) {
  77. int temp0, temp1, temp2, temp3, temp4, temp5;
  78. uint32_t argb, argb1, new_red, new_red1;
  79. const uint32_t G_to_R = m->green_to_red_;
  80. const uint32_t G_to_B = m->green_to_blue_;
  81. const uint32_t R_to_B = m->red_to_blue_;
  82. uint32_t* const p_loop_end = data + (num_pixels & ~1);
  83. __asm__ volatile (
  84. ".set push \n\t"
  85. ".set noreorder \n\t"
  86. "beq %[data], %[p_loop_end], 1f \n\t"
  87. " nop \n\t"
  88. "replv.ph %[temp0], %[G_to_R] \n\t"
  89. "replv.ph %[temp1], %[G_to_B] \n\t"
  90. "replv.ph %[temp2], %[R_to_B] \n\t"
  91. "shll.ph %[temp0], %[temp0], 8 \n\t"
  92. "shll.ph %[temp1], %[temp1], 8 \n\t"
  93. "shll.ph %[temp2], %[temp2], 8 \n\t"
  94. "shra.ph %[temp0], %[temp0], 8 \n\t"
  95. "shra.ph %[temp1], %[temp1], 8 \n\t"
  96. "shra.ph %[temp2], %[temp2], 8 \n\t"
  97. "0: \n\t"
  98. "lw %[argb], 0(%[data]) \n\t"
  99. "lw %[argb1], 4(%[data]) \n\t"
  100. "lhu %[new_red], 2(%[data]) \n\t"
  101. "lhu %[new_red1], 6(%[data]) \n\t"
  102. "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t"
  103. "precr.qb.ph %[temp4], %[argb], %[argb1] \n\t"
  104. "preceu.ph.qbra %[temp3], %[temp3] \n\t"
  105. "preceu.ph.qbla %[temp4], %[temp4] \n\t"
  106. "shll.ph %[temp3], %[temp3], 8 \n\t"
  107. "shll.ph %[temp4], %[temp4], 8 \n\t"
  108. "shra.ph %[temp3], %[temp3], 8 \n\t"
  109. "shra.ph %[temp4], %[temp4], 8 \n\t"
  110. "mul.ph %[temp5], %[temp3], %[temp0] \n\t"
  111. "mul.ph %[temp3], %[temp3], %[temp1] \n\t"
  112. "mul.ph %[temp4], %[temp4], %[temp2] \n\t"
  113. "addiu %[data], %[data], 8 \n\t"
  114. "ins %[new_red1], %[new_red], 16, 16 \n\t"
  115. "ins %[argb1], %[argb], 16, 16 \n\t"
  116. "shra.ph %[temp5], %[temp5], 5 \n\t"
  117. "shra.ph %[temp3], %[temp3], 5 \n\t"
  118. "shra.ph %[temp4], %[temp4], 5 \n\t"
  119. "subu.ph %[new_red1], %[new_red1], %[temp5] \n\t"
  120. "subu.ph %[argb1], %[argb1], %[temp3] \n\t"
  121. "preceu.ph.qbra %[temp5], %[new_red1] \n\t"
  122. "subu.ph %[argb1], %[argb1], %[temp4] \n\t"
  123. "preceu.ph.qbra %[temp3], %[argb1] \n\t"
  124. "sb %[temp5], -2(%[data]) \n\t"
  125. "sb %[temp3], -4(%[data]) \n\t"
  126. "sra %[temp5], %[temp5], 16 \n\t"
  127. "sra %[temp3], %[temp3], 16 \n\t"
  128. "sb %[temp5], -6(%[data]) \n\t"
  129. "bne %[data], %[p_loop_end], 0b \n\t"
  130. " sb %[temp3], -8(%[data]) \n\t"
  131. "1: \n\t"
  132. ".set pop \n\t"
  133. : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
  134. [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
  135. [new_red1]"=&r"(new_red1), [new_red]"=&r"(new_red),
  136. [argb]"=&r"(argb), [argb1]"=&r"(argb1), [data]"+&r"(data)
  137. : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),
  138. [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)
  139. : "memory", "hi", "lo"
  140. );
  141. if (num_pixels & 1) {
  142. const uint32_t argb_ = data[0];
  143. const uint32_t green = argb_ >> 8;
  144. const uint32_t red = argb_ >> 16;
  145. uint32_t new_blue = argb_;
  146. new_red = red;
  147. new_red -= ColorTransformDelta(m->green_to_red_, green);
  148. new_red &= 0xff;
  149. new_blue -= ColorTransformDelta(m->green_to_blue_, green);
  150. new_blue -= ColorTransformDelta(m->red_to_blue_, red);
  151. new_blue &= 0xff;
  152. data[0] = (argb_ & 0xff00ff00u) | (new_red << 16) | (new_blue);
  153. }
  154. }
  155. static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
  156. uint8_t red_to_blue,
  157. uint32_t argb) {
  158. const uint32_t green = argb >> 8;
  159. const uint32_t red = argb >> 16;
  160. uint8_t new_blue = argb;
  161. new_blue -= ColorTransformDelta(green_to_blue, green);
  162. new_blue -= ColorTransformDelta(red_to_blue, red);
  163. return (new_blue & 0xff);
  164. }
  165. static void CollectColorBlueTransforms(const uint32_t* argb, int stride,
  166. int tile_width, int tile_height,
  167. int green_to_blue, int red_to_blue,
  168. int histo[]) {
  169. const int rtb = (red_to_blue << 16) | (red_to_blue & 0xffff);
  170. const int gtb = (green_to_blue << 16) | (green_to_blue & 0xffff);
  171. const uint32_t mask = 0xff00ffu;
  172. while (tile_height-- > 0) {
  173. int x;
  174. const uint32_t* p_argb = argb;
  175. argb += stride;
  176. for (x = 0; x < (tile_width >> 1); ++x) {
  177. int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
  178. __asm__ volatile (
  179. "lw %[temp0], 0(%[p_argb]) \n\t"
  180. "lw %[temp1], 4(%[p_argb]) \n\t"
  181. "precr.qb.ph %[temp2], %[temp0], %[temp1] \n\t"
  182. "ins %[temp1], %[temp0], 16, 16 \n\t"
  183. "shra.ph %[temp2], %[temp2], 8 \n\t"
  184. "shra.ph %[temp3], %[temp1], 8 \n\t"
  185. "mul.ph %[temp5], %[temp2], %[rtb] \n\t"
  186. "mul.ph %[temp6], %[temp3], %[gtb] \n\t"
  187. "and %[temp4], %[temp1], %[mask] \n\t"
  188. "addiu %[p_argb], %[p_argb], 8 \n\t"
  189. "shra.ph %[temp5], %[temp5], 5 \n\t"
  190. "shra.ph %[temp6], %[temp6], 5 \n\t"
  191. "subu.qb %[temp2], %[temp4], %[temp5] \n\t"
  192. "subu.qb %[temp2], %[temp2], %[temp6] \n\t"
  193. : [p_argb]"+&r"(p_argb), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
  194. [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
  195. [temp5]"=&r"(temp5), [temp6]"=&r"(temp6)
  196. : [rtb]"r"(rtb), [gtb]"r"(gtb), [mask]"r"(mask)
  197. : "memory", "hi", "lo"
  198. );
  199. ++histo[(uint8_t)(temp2 >> 16)];
  200. ++histo[(uint8_t)temp2];
  201. }
  202. if (tile_width & 1) {
  203. ++histo[TransformColorBlue(green_to_blue, red_to_blue, *p_argb)];
  204. }
  205. }
  206. }
  207. static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
  208. uint32_t argb) {
  209. const uint32_t green = argb >> 8;
  210. uint32_t new_red = argb >> 16;
  211. new_red -= ColorTransformDelta(green_to_red, green);
  212. return (new_red & 0xff);
  213. }
  214. static void CollectColorRedTransforms(const uint32_t* argb, int stride,
  215. int tile_width, int tile_height,
  216. int green_to_red, int histo[]) {
  217. const int gtr = (green_to_red << 16) | (green_to_red & 0xffff);
  218. while (tile_height-- > 0) {
  219. int x;
  220. const uint32_t* p_argb = argb;
  221. argb += stride;
  222. for (x = 0; x < (tile_width >> 1); ++x) {
  223. int temp0, temp1, temp2, temp3, temp4;
  224. __asm__ volatile (
  225. "lw %[temp0], 0(%[p_argb]) \n\t"
  226. "lw %[temp1], 4(%[p_argb]) \n\t"
  227. "precrq.ph.w %[temp4], %[temp0], %[temp1] \n\t"
  228. "ins %[temp1], %[temp0], 16, 16 \n\t"
  229. "shra.ph %[temp3], %[temp1], 8 \n\t"
  230. "mul.ph %[temp2], %[temp3], %[gtr] \n\t"
  231. "addiu %[p_argb], %[p_argb], 8 \n\t"
  232. "shra.ph %[temp2], %[temp2], 5 \n\t"
  233. "subu.qb %[temp2], %[temp4], %[temp2] \n\t"
  234. : [p_argb]"+&r"(p_argb), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
  235. [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4)
  236. : [gtr]"r"(gtr)
  237. : "memory", "hi", "lo"
  238. );
  239. ++histo[(uint8_t)(temp2 >> 16)];
  240. ++histo[(uint8_t)temp2];
  241. }
  242. if (tile_width & 1) {
  243. ++histo[TransformColorRed(green_to_red, *p_argb)];
  244. }
  245. }
  246. }
  247. //------------------------------------------------------------------------------
  248. // Entry point
  249. extern void VP8LEncDspInitMIPSdspR2(void);
  250. WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPSdspR2(void) {
  251. VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
  252. VP8LTransformColor = TransformColor;
  253. VP8LCollectColorBlueTransforms = CollectColorBlueTransforms;
  254. VP8LCollectColorRedTransforms = CollectColorRedTransforms;
  255. }
  256. #else // !WEBP_USE_MIPS_DSP_R2
  257. WEBP_DSP_INIT_STUB(VP8LEncDspInitMIPSdspR2)
  258. #endif // WEBP_USE_MIPS_DSP_R2