scale_uv.cc 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892
  1. /*
  2. * Copyright 2020 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "libyuv/scale.h"
  11. #include <assert.h>
  12. #include <string.h>
  13. #include "libyuv/cpu_id.h"
  14. #include "libyuv/planar_functions.h" // For CopyUV
  15. #include "libyuv/row.h"
  16. #include "libyuv/scale_row.h"
  17. #ifdef __cplusplus
  18. namespace libyuv {
  19. extern "C" {
  20. #endif
  21. // Macros to enable specialized scalers
  22. #ifndef HAS_SCALEUVDOWN2
  23. #define HAS_SCALEUVDOWN2 1
  24. #endif
  25. #ifndef HAS_SCALEUVDOWN4BOX
  26. #define HAS_SCALEUVDOWN4BOX 1
  27. #endif
  28. #ifndef HAS_SCALEUVDOWNEVEN
  29. #define HAS_SCALEUVDOWNEVEN 1
  30. #endif
  31. #ifndef HAS_SCALEUVBILINEARDOWN
  32. #define HAS_SCALEUVBILINEARDOWN 1
  33. #endif
  34. #ifndef HAS_SCALEUVBILINEARUP
  35. #define HAS_SCALEUVBILINEARUP 1
  36. #endif
  37. #ifndef HAS_UVCOPY
  38. #define HAS_UVCOPY 1
  39. #endif
  40. #ifndef HAS_SCALEPLANEVERTICAL
  41. #define HAS_SCALEPLANEVERTICAL 1
  42. #endif
  43. static __inline int Abs(int v) {
  44. return v >= 0 ? v : -v;
  45. }
  46. // ScaleUV, 1/2
  47. // This is an optimized version for scaling down a UV to 1/2 of
  48. // its original size.
  49. #if HAS_SCALEUVDOWN2
  50. static void ScaleUVDown2(int src_width,
  51. int src_height,
  52. int dst_width,
  53. int dst_height,
  54. int src_stride,
  55. int dst_stride,
  56. const uint8_t* src_uv,
  57. uint8_t* dst_uv,
  58. int x,
  59. int dx,
  60. int y,
  61. int dy,
  62. enum FilterMode filtering) {
  63. int j;
  64. int row_stride = src_stride * (dy >> 16);
  65. void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride,
  66. uint8_t* dst_uv, int dst_width) =
  67. filtering == kFilterNone
  68. ? ScaleUVRowDown2_C
  69. : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_C
  70. : ScaleUVRowDown2Box_C);
  71. (void)src_width;
  72. (void)src_height;
  73. (void)dx;
  74. assert(dx == 65536 * 2); // Test scale factor of 2.
  75. assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
  76. // Advance to odd row, even column.
  77. if (filtering == kFilterBilinear) {
  78. src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
  79. } else {
  80. src_uv += (y >> 16) * src_stride + ((x >> 16) - 1) * 2;
  81. }
  82. #if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3)
  83. if (TestCpuFlag(kCpuHasSSSE3) && filtering) {
  84. ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_SSSE3;
  85. if (IS_ALIGNED(dst_width, 4)) {
  86. ScaleUVRowDown2 = ScaleUVRowDown2Box_SSSE3;
  87. }
  88. }
  89. #endif
  90. #if defined(HAS_SCALEUVROWDOWN2BOX_AVX2)
  91. if (TestCpuFlag(kCpuHasAVX2) && filtering) {
  92. ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_AVX2;
  93. if (IS_ALIGNED(dst_width, 8)) {
  94. ScaleUVRowDown2 = ScaleUVRowDown2Box_AVX2;
  95. }
  96. }
  97. #endif
  98. #if defined(HAS_SCALEUVROWDOWN2BOX_NEON)
  99. if (TestCpuFlag(kCpuHasNEON) && filtering) {
  100. ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_NEON;
  101. if (IS_ALIGNED(dst_width, 8)) {
  102. ScaleUVRowDown2 = ScaleUVRowDown2Box_NEON;
  103. }
  104. }
  105. #endif
  106. // This code is not enabled. Only box filter is available at this time.
  107. #if defined(HAS_SCALEUVROWDOWN2_SSSE3)
  108. if (TestCpuFlag(kCpuHasSSSE3)) {
  109. ScaleUVRowDown2 =
  110. filtering == kFilterNone
  111. ? ScaleUVRowDown2_Any_SSSE3
  112. : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_Any_SSSE3
  113. : ScaleUVRowDown2Box_Any_SSSE3);
  114. if (IS_ALIGNED(dst_width, 2)) {
  115. ScaleUVRowDown2 =
  116. filtering == kFilterNone
  117. ? ScaleUVRowDown2_SSSE3
  118. : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_SSSE3
  119. : ScaleUVRowDown2Box_SSSE3);
  120. }
  121. }
  122. #endif
  123. // This code is not enabled. Only box filter is available at this time.
  124. #if defined(HAS_SCALEUVROWDOWN2_NEON)
  125. if (TestCpuFlag(kCpuHasNEON)) {
  126. ScaleUVRowDown2 =
  127. filtering == kFilterNone
  128. ? ScaleUVRowDown2_Any_NEON
  129. : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_Any_NEON
  130. : ScaleUVRowDown2Box_Any_NEON);
  131. if (IS_ALIGNED(dst_width, 8)) {
  132. ScaleUVRowDown2 =
  133. filtering == kFilterNone
  134. ? ScaleUVRowDown2_NEON
  135. : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_NEON
  136. : ScaleUVRowDown2Box_NEON);
  137. }
  138. }
  139. #endif
  140. #if defined(HAS_SCALEUVROWDOWN2_MMI)
  141. if (TestCpuFlag(kCpuHasMMI)) {
  142. ScaleUVRowDown2 =
  143. filtering == kFilterNone
  144. ? ScaleUVRowDown2_Any_MMI
  145. : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_Any_MMI
  146. : ScaleUVRowDown2Box_Any_MMI);
  147. if (IS_ALIGNED(dst_width, 2)) {
  148. ScaleUVRowDown2 =
  149. filtering == kFilterNone
  150. ? ScaleUVRowDown2_MMI
  151. : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_MMI
  152. : ScaleUVRowDown2Box_MMI);
  153. }
  154. }
  155. #endif
  156. #if defined(HAS_SCALEUVROWDOWN2_MSA)
  157. if (TestCpuFlag(kCpuHasMSA)) {
  158. ScaleUVRowDown2 =
  159. filtering == kFilterNone
  160. ? ScaleUVRowDown2_Any_MSA
  161. : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_Any_MSA
  162. : ScaleUVRowDown2Box_Any_MSA);
  163. if (IS_ALIGNED(dst_width, 2)) {
  164. ScaleUVRowDown2 =
  165. filtering == kFilterNone
  166. ? ScaleUVRowDown2_MSA
  167. : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_MSA
  168. : ScaleUVRowDown2Box_MSA);
  169. }
  170. }
  171. #endif
  172. if (filtering == kFilterLinear) {
  173. src_stride = 0;
  174. }
  175. for (j = 0; j < dst_height; ++j) {
  176. ScaleUVRowDown2(src_uv, src_stride, dst_uv, dst_width);
  177. src_uv += row_stride;
  178. dst_uv += dst_stride;
  179. }
  180. }
  181. #endif // HAS_SCALEUVDOWN2
  182. // ScaleUV, 1/4
  183. // This is an optimized version for scaling down a UV to 1/4 of
  184. // its original size.
  185. #if HAS_SCALEUVDOWN4BOX
  186. static void ScaleUVDown4Box(int src_width,
  187. int src_height,
  188. int dst_width,
  189. int dst_height,
  190. int src_stride,
  191. int dst_stride,
  192. const uint8_t* src_uv,
  193. uint8_t* dst_uv,
  194. int x,
  195. int dx,
  196. int y,
  197. int dy) {
  198. int j;
  199. // Allocate 2 rows of UV.
  200. const int kRowSize = (dst_width * 2 * 2 + 15) & ~15;
  201. align_buffer_64(row, kRowSize * 2);
  202. int row_stride = src_stride * (dy >> 16);
  203. void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride,
  204. uint8_t* dst_uv, int dst_width) =
  205. ScaleUVRowDown2Box_C;
  206. // Advance to odd row, even column.
  207. src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
  208. (void)src_width;
  209. (void)src_height;
  210. (void)dx;
  211. assert(dx == 65536 * 4); // Test scale factor of 4.
  212. assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
  213. #if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3)
  214. if (TestCpuFlag(kCpuHasSSSE3)) {
  215. ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_SSSE3;
  216. if (IS_ALIGNED(dst_width, 4)) {
  217. ScaleUVRowDown2 = ScaleUVRowDown2Box_SSSE3;
  218. }
  219. }
  220. #endif
  221. #if defined(HAS_SCALEUVROWDOWN2BOX_AVX2)
  222. if (TestCpuFlag(kCpuHasAVX2)) {
  223. ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_AVX2;
  224. if (IS_ALIGNED(dst_width, 8)) {
  225. ScaleUVRowDown2 = ScaleUVRowDown2Box_AVX2;
  226. }
  227. }
  228. #endif
  229. #if defined(HAS_SCALEUVROWDOWN2BOX_NEON)
  230. if (TestCpuFlag(kCpuHasNEON)) {
  231. ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_NEON;
  232. if (IS_ALIGNED(dst_width, 8)) {
  233. ScaleUVRowDown2 = ScaleUVRowDown2Box_NEON;
  234. }
  235. }
  236. #endif
  237. for (j = 0; j < dst_height; ++j) {
  238. ScaleUVRowDown2(src_uv, src_stride, row, dst_width * 2);
  239. ScaleUVRowDown2(src_uv + src_stride * 2, src_stride, row + kRowSize,
  240. dst_width * 2);
  241. ScaleUVRowDown2(row, kRowSize, dst_uv, dst_width);
  242. src_uv += row_stride;
  243. dst_uv += dst_stride;
  244. }
  245. free_aligned_buffer_64(row);
  246. }
  247. #endif // HAS_SCALEUVDOWN4BOX
  248. // ScaleUV Even
  249. // This is an optimized version for scaling down a UV to even
  250. // multiple of its original size.
  251. #if HAS_SCALEUVDOWNEVEN
  252. static void ScaleUVDownEven(int src_width,
  253. int src_height,
  254. int dst_width,
  255. int dst_height,
  256. int src_stride,
  257. int dst_stride,
  258. const uint8_t* src_uv,
  259. uint8_t* dst_uv,
  260. int x,
  261. int dx,
  262. int y,
  263. int dy,
  264. enum FilterMode filtering) {
  265. int j;
  266. int col_step = dx >> 16;
  267. int row_stride = (dy >> 16) * src_stride;
  268. void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride,
  269. int src_step, uint8_t* dst_uv, int dst_width) =
  270. filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C;
  271. (void)src_width;
  272. (void)src_height;
  273. assert(IS_ALIGNED(src_width, 2));
  274. assert(IS_ALIGNED(src_height, 2));
  275. src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
  276. #if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3)
  277. if (TestCpuFlag(kCpuHasSSSE3)) {
  278. ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3
  279. : ScaleUVRowDownEven_Any_SSSE3;
  280. if (IS_ALIGNED(dst_width, 4)) {
  281. ScaleUVRowDownEven =
  282. filtering ? ScaleUVRowDownEvenBox_SSE2 : ScaleUVRowDownEven_SSSE3;
  283. }
  284. }
  285. #endif
  286. #if defined(HAS_SCALEUVROWDOWNEVEN_NEON)
  287. if (TestCpuFlag(kCpuHasNEON) && !filtering) {
  288. ScaleUVRowDownEven = ScaleUVRowDownEven_Any_NEON;
  289. if (IS_ALIGNED(dst_width, 4)) {
  290. ScaleUVRowDownEven = ScaleUVRowDownEven_NEON;
  291. }
  292. }
  293. #endif// TODO(fbarchard): Enable Box filter
  294. #if defined(HAS_SCALEUVROWDOWNEVENBOX_NEON)
  295. if (TestCpuFlag(kCpuHasNEON)) {
  296. ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_NEON
  297. : ScaleUVRowDownEven_Any_NEON;
  298. if (IS_ALIGNED(dst_width, 4)) {
  299. ScaleUVRowDownEven =
  300. filtering ? ScaleUVRowDownEvenBox_NEON : ScaleUVRowDownEven_NEON;
  301. }
  302. }
  303. #endif
  304. #if defined(HAS_SCALEUVROWDOWNEVEN_MMI)
  305. if (TestCpuFlag(kCpuHasMMI)) {
  306. ScaleUVRowDownEven =
  307. filtering ? ScaleUVRowDownEvenBox_Any_MMI : ScaleUVRowDownEven_Any_MMI;
  308. if (IS_ALIGNED(dst_width, 2)) {
  309. ScaleUVRowDownEven =
  310. filtering ? ScaleUVRowDownEvenBox_MMI : ScaleUVRowDownEven_MMI;
  311. }
  312. }
  313. #endif
  314. #if defined(HAS_SCALEUVROWDOWNEVEN_MSA)
  315. if (TestCpuFlag(kCpuHasMSA)) {
  316. ScaleUVRowDownEven =
  317. filtering ? ScaleUVRowDownEvenBox_Any_MSA : ScaleUVRowDownEven_Any_MSA;
  318. if (IS_ALIGNED(dst_width, 4)) {
  319. ScaleUVRowDownEven =
  320. filtering ? ScaleUVRowDownEvenBox_MSA : ScaleUVRowDownEven_MSA;
  321. }
  322. }
  323. #endif
  324. if (filtering == kFilterLinear) {
  325. src_stride = 0;
  326. }
  327. for (j = 0; j < dst_height; ++j) {
  328. ScaleUVRowDownEven(src_uv, src_stride, col_step, dst_uv, dst_width);
  329. src_uv += row_stride;
  330. dst_uv += dst_stride;
  331. }
  332. }
  333. #endif
  334. // Scale UV down with bilinear interpolation.
  335. #if HAS_SCALEUVBILINEARDOWN
  336. static void ScaleUVBilinearDown(int src_width,
  337. int src_height,
  338. int dst_width,
  339. int dst_height,
  340. int src_stride,
  341. int dst_stride,
  342. const uint8_t* src_uv,
  343. uint8_t* dst_uv,
  344. int x,
  345. int dx,
  346. int y,
  347. int dy,
  348. enum FilterMode filtering) {
  349. int j;
  350. void (*InterpolateRow)(uint8_t * dst_uv, const uint8_t* src_uv,
  351. ptrdiff_t src_stride, int dst_width,
  352. int source_y_fraction) = InterpolateRow_C;
  353. void (*ScaleUVFilterCols)(uint8_t * dst_uv, const uint8_t* src_uv,
  354. int dst_width, int x, int dx) =
  355. (src_width >= 32768) ? ScaleUVFilterCols64_C : ScaleUVFilterCols_C;
  356. int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
  357. int64_t xl = (dx >= 0) ? x : xlast;
  358. int64_t xr = (dx >= 0) ? xlast : x;
  359. int clip_src_width;
  360. xl = (xl >> 16) & ~3; // Left edge aligned.
  361. xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
  362. xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
  363. if (xr > src_width) {
  364. xr = src_width;
  365. }
  366. clip_src_width = (int)(xr - xl) * 2; // Width aligned to 2.
  367. src_uv += xl * 2;
  368. x -= (int)(xl << 16);
  369. #if defined(HAS_INTERPOLATEROW_SSSE3)
  370. if (TestCpuFlag(kCpuHasSSSE3)) {
  371. InterpolateRow = InterpolateRow_Any_SSSE3;
  372. if (IS_ALIGNED(clip_src_width, 16)) {
  373. InterpolateRow = InterpolateRow_SSSE3;
  374. }
  375. }
  376. #endif
  377. #if defined(HAS_INTERPOLATEROW_AVX2)
  378. if (TestCpuFlag(kCpuHasAVX2)) {
  379. InterpolateRow = InterpolateRow_Any_AVX2;
  380. if (IS_ALIGNED(clip_src_width, 32)) {
  381. InterpolateRow = InterpolateRow_AVX2;
  382. }
  383. }
  384. #endif
  385. #if defined(HAS_INTERPOLATEROW_NEON)
  386. if (TestCpuFlag(kCpuHasNEON)) {
  387. InterpolateRow = InterpolateRow_Any_NEON;
  388. if (IS_ALIGNED(clip_src_width, 16)) {
  389. InterpolateRow = InterpolateRow_NEON;
  390. }
  391. }
  392. #endif
  393. #if defined(HAS_INTERPOLATEROW_MSA)
  394. if (TestCpuFlag(kCpuHasMSA)) {
  395. InterpolateRow = InterpolateRow_Any_MSA;
  396. if (IS_ALIGNED(clip_src_width, 32)) {
  397. InterpolateRow = InterpolateRow_MSA;
  398. }
  399. }
  400. #endif
  401. #if defined(HAS_SCALEUVFILTERCOLS_SSSE3)
  402. if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  403. ScaleUVFilterCols = ScaleUVFilterCols_SSSE3;
  404. }
  405. #endif
  406. #if defined(HAS_SCALEUVFILTERCOLS_NEON)
  407. if (TestCpuFlag(kCpuHasNEON)) {
  408. ScaleUVFilterCols = ScaleUVFilterCols_Any_NEON;
  409. if (IS_ALIGNED(dst_width, 4)) {
  410. ScaleUVFilterCols = ScaleUVFilterCols_NEON;
  411. }
  412. }
  413. #endif
  414. #if defined(HAS_SCALEUVFILTERCOLS_MSA)
  415. if (TestCpuFlag(kCpuHasMSA)) {
  416. ScaleUVFilterCols = ScaleUVFilterCols_Any_MSA;
  417. if (IS_ALIGNED(dst_width, 8)) {
  418. ScaleUVFilterCols = ScaleUVFilterCols_MSA;
  419. }
  420. }
  421. #endif
  422. // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
  423. // Allocate a row of UV.
  424. {
  425. align_buffer_64(row, clip_src_width * 2);
  426. const int max_y = (src_height - 1) << 16;
  427. if (y > max_y) {
  428. y = max_y;
  429. }
  430. for (j = 0; j < dst_height; ++j) {
  431. int yi = y >> 16;
  432. const uint8_t* src = src_uv + yi * src_stride;
  433. if (filtering == kFilterLinear) {
  434. ScaleUVFilterCols(dst_uv, src, dst_width, x, dx);
  435. } else {
  436. int yf = (y >> 8) & 255;
  437. InterpolateRow(row, src, src_stride, clip_src_width, yf);
  438. ScaleUVFilterCols(dst_uv, row, dst_width, x, dx);
  439. }
  440. dst_uv += dst_stride;
  441. y += dy;
  442. if (y > max_y) {
  443. y = max_y;
  444. }
  445. }
  446. free_aligned_buffer_64(row);
  447. }
  448. }
  449. #endif
  450. // Scale UV up with bilinear interpolation.
  451. #if HAS_SCALEUVBILINEARUP
  452. static void ScaleUVBilinearUp(int src_width,
  453. int src_height,
  454. int dst_width,
  455. int dst_height,
  456. int src_stride,
  457. int dst_stride,
  458. const uint8_t* src_uv,
  459. uint8_t* dst_uv,
  460. int x,
  461. int dx,
  462. int y,
  463. int dy,
  464. enum FilterMode filtering) {
  465. int j;
  466. void (*InterpolateRow)(uint8_t * dst_uv, const uint8_t* src_uv,
  467. ptrdiff_t src_stride, int dst_width,
  468. int source_y_fraction) = InterpolateRow_C;
  469. void (*ScaleUVFilterCols)(uint8_t * dst_uv, const uint8_t* src_uv,
  470. int dst_width, int x, int dx) =
  471. filtering ? ScaleUVFilterCols_C : ScaleUVCols_C;
  472. const int max_y = (src_height - 1) << 16;
  473. #if defined(HAS_INTERPOLATEROW_SSSE3)
  474. if (TestCpuFlag(kCpuHasSSSE3)) {
  475. InterpolateRow = InterpolateRow_Any_SSSE3;
  476. if (IS_ALIGNED(dst_width, 4)) {
  477. InterpolateRow = InterpolateRow_SSSE3;
  478. }
  479. }
  480. #endif
  481. #if defined(HAS_INTERPOLATEROW_AVX2)
  482. if (TestCpuFlag(kCpuHasAVX2)) {
  483. InterpolateRow = InterpolateRow_Any_AVX2;
  484. if (IS_ALIGNED(dst_width, 8)) {
  485. InterpolateRow = InterpolateRow_AVX2;
  486. }
  487. }
  488. #endif
  489. #if defined(HAS_INTERPOLATEROW_NEON)
  490. if (TestCpuFlag(kCpuHasNEON)) {
  491. InterpolateRow = InterpolateRow_Any_NEON;
  492. if (IS_ALIGNED(dst_width, 4)) {
  493. InterpolateRow = InterpolateRow_NEON;
  494. }
  495. }
  496. #endif
  497. #if defined(HAS_INTERPOLATEROW_MMI)
  498. if (TestCpuFlag(kCpuHasMMI)) {
  499. InterpolateRow = InterpolateRow_Any_MMI;
  500. if (IS_ALIGNED(dst_width, 2)) {
  501. InterpolateRow = InterpolateRow_MMI;
  502. }
  503. }
  504. #endif
  505. #if defined(HAS_INTERPOLATEROW_MSA)
  506. if (TestCpuFlag(kCpuHasMSA)) {
  507. InterpolateRow = InterpolateRow_Any_MSA;
  508. if (IS_ALIGNED(dst_width, 8)) {
  509. InterpolateRow = InterpolateRow_MSA;
  510. }
  511. }
  512. #endif
  513. if (src_width >= 32768) {
  514. ScaleUVFilterCols = filtering ? ScaleUVFilterCols64_C : ScaleUVCols64_C;
  515. }
  516. #if defined(HAS_SCALEUVFILTERCOLS_SSSE3)
  517. if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  518. ScaleUVFilterCols = ScaleUVFilterCols_SSSE3;
  519. }
  520. #endif
  521. #if defined(HAS_SCALEUVFILTERCOLS_NEON)
  522. if (filtering && TestCpuFlag(kCpuHasNEON)) {
  523. ScaleUVFilterCols = ScaleUVFilterCols_Any_NEON;
  524. if (IS_ALIGNED(dst_width, 4)) {
  525. ScaleUVFilterCols = ScaleUVFilterCols_NEON;
  526. }
  527. }
  528. #endif
  529. #if defined(HAS_SCALEUVFILTERCOLS_MSA)
  530. if (filtering && TestCpuFlag(kCpuHasMSA)) {
  531. ScaleUVFilterCols = ScaleUVFilterCols_Any_MSA;
  532. if (IS_ALIGNED(dst_width, 8)) {
  533. ScaleUVFilterCols = ScaleUVFilterCols_MSA;
  534. }
  535. }
  536. #endif
  537. #if defined(HAS_SCALEUVCOLS_SSSE3)
  538. if (!filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  539. ScaleUVFilterCols = ScaleUVCols_SSSE3;
  540. }
  541. #endif
  542. #if defined(HAS_SCALEUVCOLS_NEON)
  543. if (!filtering && TestCpuFlag(kCpuHasNEON)) {
  544. ScaleUVFilterCols = ScaleUVCols_Any_NEON;
  545. if (IS_ALIGNED(dst_width, 8)) {
  546. ScaleUVFilterCols = ScaleUVCols_NEON;
  547. }
  548. }
  549. #endif
  550. #if defined(HAS_SCALEUVCOLS_MMI)
  551. if (!filtering && TestCpuFlag(kCpuHasMMI)) {
  552. ScaleUVFilterCols = ScaleUVCols_Any_MMI;
  553. if (IS_ALIGNED(dst_width, 1)) {
  554. ScaleUVFilterCols = ScaleUVCols_MMI;
  555. }
  556. }
  557. #endif
  558. #if defined(HAS_SCALEUVCOLS_MSA)
  559. if (!filtering && TestCpuFlag(kCpuHasMSA)) {
  560. ScaleUVFilterCols = ScaleUVCols_Any_MSA;
  561. if (IS_ALIGNED(dst_width, 4)) {
  562. ScaleUVFilterCols = ScaleUVCols_MSA;
  563. }
  564. }
  565. #endif
  566. if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
  567. ScaleUVFilterCols = ScaleUVColsUp2_C;
  568. #if defined(HAS_SCALEUVCOLSUP2_SSSE3)
  569. if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(dst_width, 8)) {
  570. ScaleUVFilterCols = ScaleUVColsUp2_SSSE3;
  571. }
  572. #endif
  573. #if defined(HAS_SCALEUVCOLSUP2_MMI)
  574. if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
  575. ScaleUVFilterCols = ScaleUVColsUp2_MMI;
  576. }
  577. #endif
  578. }
  579. if (y > max_y) {
  580. y = max_y;
  581. }
  582. {
  583. int yi = y >> 16;
  584. const uint8_t* src = src_uv + yi * src_stride;
  585. // Allocate 2 rows of UV.
  586. const int kRowSize = (dst_width * 2 + 15) & ~15;
  587. align_buffer_64(row, kRowSize * 2);
  588. uint8_t* rowptr = row;
  589. int rowstride = kRowSize;
  590. int lasty = yi;
  591. ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
  592. if (src_height > 1) {
  593. src += src_stride;
  594. }
  595. ScaleUVFilterCols(rowptr + rowstride, src, dst_width, x, dx);
  596. src += src_stride;
  597. for (j = 0; j < dst_height; ++j) {
  598. yi = y >> 16;
  599. if (yi != lasty) {
  600. if (y > max_y) {
  601. y = max_y;
  602. yi = y >> 16;
  603. src = src_uv + yi * src_stride;
  604. }
  605. if (yi != lasty) {
  606. ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
  607. rowptr += rowstride;
  608. rowstride = -rowstride;
  609. lasty = yi;
  610. src += src_stride;
  611. }
  612. }
  613. if (filtering == kFilterLinear) {
  614. InterpolateRow(dst_uv, rowptr, 0, dst_width * 2, 0);
  615. } else {
  616. int yf = (y >> 8) & 255;
  617. InterpolateRow(dst_uv, rowptr, rowstride, dst_width * 2, yf);
  618. }
  619. dst_uv += dst_stride;
  620. y += dy;
  621. }
  622. free_aligned_buffer_64(row);
  623. }
  624. }
  625. #endif // HAS_SCALEUVBILINEARUP
  626. // Scale UV to/from any dimensions, without interpolation.
  627. // Fixed point math is used for performance: The upper 16 bits
  628. // of x and dx is the integer part of the source position and
  629. // the lower 16 bits are the fixed decimal part.
  630. static void ScaleUVSimple(int src_width,
  631. int src_height,
  632. int dst_width,
  633. int dst_height,
  634. int src_stride,
  635. int dst_stride,
  636. const uint8_t* src_uv,
  637. uint8_t* dst_uv,
  638. int x,
  639. int dx,
  640. int y,
  641. int dy) {
  642. int j;
  643. void (*ScaleUVCols)(uint8_t * dst_uv, const uint8_t* src_uv, int dst_width,
  644. int x, int dx) =
  645. (src_width >= 32768) ? ScaleUVCols64_C : ScaleUVCols_C;
  646. (void)src_height;
  647. #if defined(HAS_SCALEUVCOLS_SSSE3)
  648. if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  649. ScaleUVCols = ScaleUVCols_SSSE3;
  650. }
  651. #endif
  652. #if defined(HAS_SCALEUVCOLS_NEON)
  653. if (TestCpuFlag(kCpuHasNEON)) {
  654. ScaleUVCols = ScaleUVCols_Any_NEON;
  655. if (IS_ALIGNED(dst_width, 8)) {
  656. ScaleUVCols = ScaleUVCols_NEON;
  657. }
  658. }
  659. #endif
  660. #if defined(HAS_SCALEUVCOLS_MMI)
  661. if (TestCpuFlag(kCpuHasMMI)) {
  662. ScaleUVCols = ScaleUVCols_Any_MMI;
  663. if (IS_ALIGNED(dst_width, 1)) {
  664. ScaleUVCols = ScaleUVCols_MMI;
  665. }
  666. }
  667. #endif
  668. #if defined(HAS_SCALEUVCOLS_MSA)
  669. if (TestCpuFlag(kCpuHasMSA)) {
  670. ScaleUVCols = ScaleUVCols_Any_MSA;
  671. if (IS_ALIGNED(dst_width, 4)) {
  672. ScaleUVCols = ScaleUVCols_MSA;
  673. }
  674. }
  675. #endif
  676. if (src_width * 2 == dst_width && x < 0x8000) {
  677. ScaleUVCols = ScaleUVColsUp2_C;
  678. #if defined(HAS_SCALEUVCOLSUP2_SSSE3)
  679. if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(dst_width, 8)) {
  680. ScaleUVCols = ScaleUVColsUp2_SSSE3;
  681. }
  682. #endif
  683. #if defined(HAS_SCALEUVCOLSUP2_MMI)
  684. if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
  685. ScaleUVCols = ScaleUVColsUp2_MMI;
  686. }
  687. #endif
  688. }
  689. for (j = 0; j < dst_height; ++j) {
  690. ScaleUVCols(dst_uv, src_uv + (y >> 16) * src_stride, dst_width, x, dx);
  691. dst_uv += dst_stride;
  692. y += dy;
  693. }
  694. }
  695. // Copy UV with optional flipping
  696. #if HAS_UVCOPY
  697. static int UVCopy(const uint8_t* src_UV,
  698. int src_stride_UV,
  699. uint8_t* dst_UV,
  700. int dst_stride_UV,
  701. int width,
  702. int height) {
  703. if (!src_UV || !dst_UV || width <= 0 || height == 0) {
  704. return -1;
  705. }
  706. // Negative height means invert the image.
  707. if (height < 0) {
  708. height = -height;
  709. src_UV = src_UV + (height - 1) * src_stride_UV;
  710. src_stride_UV = -src_stride_UV;
  711. }
  712. CopyPlane(src_UV, src_stride_UV, dst_UV, dst_stride_UV, width * 2, height);
  713. return 0;
  714. }
  715. #endif // HAS_UVCOPY
  716. // Scale a UV plane (from NV12)
  717. // This function in turn calls a scaling function
  718. // suitable for handling the desired resolutions.
  719. static void ScaleUV(const uint8_t* src,
  720. int src_stride,
  721. int src_width,
  722. int src_height,
  723. uint8_t* dst,
  724. int dst_stride,
  725. int dst_width,
  726. int dst_height,
  727. int clip_x,
  728. int clip_y,
  729. int clip_width,
  730. int clip_height,
  731. enum FilterMode filtering) {
  732. // Initial source x/y coordinate and step values as 16.16 fixed point.
  733. int x = 0;
  734. int y = 0;
  735. int dx = 0;
  736. int dy = 0;
  737. // UV does not support box filter yet, but allow the user to pass it.
  738. // Simplify filtering when possible.
  739. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
  740. filtering);
  741. // Negative src_height means invert the image.
  742. if (src_height < 0) {
  743. src_height = -src_height;
  744. src = src + (src_height - 1) * src_stride;
  745. src_stride = -src_stride;
  746. }
  747. ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
  748. &dx, &dy);
  749. src_width = Abs(src_width);
  750. if (clip_x) {
  751. int64_t clipf = (int64_t)(clip_x)*dx;
  752. x += (clipf & 0xffff);
  753. src += (clipf >> 16) * 2;
  754. dst += clip_x * 2;
  755. }
  756. if (clip_y) {
  757. int64_t clipf = (int64_t)(clip_y)*dy;
  758. y += (clipf & 0xffff);
  759. src += (clipf >> 16) * src_stride;
  760. dst += clip_y * dst_stride;
  761. }
  762. // Special case for integer step values.
  763. if (((dx | dy) & 0xffff) == 0) {
  764. if (!dx || !dy) { // 1 pixel wide and/or tall.
  765. filtering = kFilterNone;
  766. } else {
  767. // Optimized even scale down. ie 2, 4, 6, 8, 10x.
  768. if (!(dx & 0x10000) && !(dy & 0x10000)) {
  769. #if HAS_SCALEUVDOWN2
  770. if (dx == 0x20000) {
  771. // Optimized 1/2 downsample.
  772. ScaleUVDown2(src_width, src_height, clip_width, clip_height,
  773. src_stride, dst_stride, src, dst, x, dx, y, dy,
  774. filtering);
  775. return;
  776. }
  777. #endif
  778. #if HAS_SCALEUVDOWN4BOX
  779. if (dx == 0x40000 && filtering == kFilterBox) {
  780. // Optimized 1/4 box downsample.
  781. ScaleUVDown4Box(src_width, src_height, clip_width, clip_height,
  782. src_stride, dst_stride, src, dst, x, dx, y, dy);
  783. return;
  784. }
  785. #endif
  786. #if HAS_SCALEUVDOWNEVEN
  787. ScaleUVDownEven(src_width, src_height, clip_width, clip_height,
  788. src_stride, dst_stride, src, dst, x, dx, y, dy,
  789. filtering);
  790. return;
  791. #endif
  792. }
  793. // Optimized odd scale down. ie 3, 5, 7, 9x.
  794. if ((dx & 0x10000) && (dy & 0x10000)) {
  795. filtering = kFilterNone;
  796. #ifdef HAS_UVCOPY
  797. if (dx == 0x10000 && dy == 0x10000) {
  798. // Straight copy.
  799. UVCopy(src + (y >> 16) * src_stride + (x >> 16) * 2, src_stride, dst,
  800. dst_stride, clip_width, clip_height);
  801. return;
  802. }
  803. #endif
  804. }
  805. }
  806. }
  807. // HAS_SCALEPLANEVERTICAL
  808. if (dx == 0x10000 && (x & 0xffff) == 0) {
  809. // Arbitrary scale vertically, but unscaled horizontally.
  810. ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
  811. dst_stride, src, dst, x, y, dy, 4, filtering);
  812. return;
  813. }
  814. #if HAS_SCALEUVBILINEARUP
  815. if (filtering && dy < 65536) {
  816. ScaleUVBilinearUp(src_width, src_height, clip_width, clip_height,
  817. src_stride, dst_stride, src, dst, x, dx, y, dy,
  818. filtering);
  819. return;
  820. }
  821. #endif
  822. #if HAS_SCALEUVBILINEARDOWN
  823. if (filtering) {
  824. ScaleUVBilinearDown(src_width, src_height, clip_width, clip_height,
  825. src_stride, dst_stride, src, dst, x, dx, y, dy,
  826. filtering);
  827. return;
  828. }
  829. #endif
  830. ScaleUVSimple(src_width, src_height, clip_width, clip_height, src_stride,
  831. dst_stride, src, dst, x, dx, y, dy);
  832. }
  833. // Scale an UV image.
  834. LIBYUV_API
  835. int UVScale(const uint8_t* src_uv,
  836. int src_stride_uv,
  837. int src_width,
  838. int src_height,
  839. uint8_t* dst_uv,
  840. int dst_stride_uv,
  841. int dst_width,
  842. int dst_height,
  843. enum FilterMode filtering) {
  844. if (!src_uv || src_width == 0 || src_height == 0 || src_width > 32768 ||
  845. src_height > 32768 || !dst_uv || dst_width <= 0 || dst_height <= 0) {
  846. return -1;
  847. }
  848. ScaleUV(src_uv, src_stride_uv, src_width, src_height, dst_uv, dst_stride_uv,
  849. dst_width, dst_height, 0, 0, dst_width, dst_height, filtering);
  850. return 0;
  851. }
  852. #ifdef __cplusplus
  853. } // extern "C"
  854. } // namespace libyuv
  855. #endif