scale_any.cc 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616
  1. /*
  2. * Copyright 2015 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <string.h> // For memset/memcpy
  11. #include "libyuv/scale.h"
  12. #include "libyuv/scale_row.h"
  13. #include "libyuv/basic_types.h"
  14. #ifdef __cplusplus
  15. namespace libyuv {
  16. extern "C" {
  17. #endif
  18. // Fixed scale down.
  19. // Mask may be non-power of 2, so use MOD
  20. #define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
  21. void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \
  22. int dst_width) { \
  23. int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \
  24. int n = dst_width - r; \
  25. if (n > 0) { \
  26. SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
  27. } \
  28. SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
  29. dst_ptr + n * BPP, r); \
  30. }
  31. // Fixed scale down for odd source width. Used by I420Blend subsampling.
  32. // Since dst_width is (width + 1) / 2, this function scales one less pixel
  33. // and copies the last pixel.
  34. #define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
  35. void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \
  36. int dst_width) { \
  37. int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \
  38. int n = (dst_width - 1) - r; \
  39. if (n > 0) { \
  40. SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
  41. } \
  42. SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
  43. dst_ptr + n * BPP, r + 1); \
  44. }
  45. #ifdef HAS_SCALEROWDOWN2_SSSE3
  46. SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15)
  47. SDANY(ScaleRowDown2Linear_Any_SSSE3,
  48. ScaleRowDown2Linear_SSSE3,
  49. ScaleRowDown2Linear_C,
  50. 2,
  51. 1,
  52. 15)
  53. SDANY(ScaleRowDown2Box_Any_SSSE3,
  54. ScaleRowDown2Box_SSSE3,
  55. ScaleRowDown2Box_C,
  56. 2,
  57. 1,
  58. 15)
  59. SDODD(ScaleRowDown2Box_Odd_SSSE3,
  60. ScaleRowDown2Box_SSSE3,
  61. ScaleRowDown2Box_Odd_C,
  62. 2,
  63. 1,
  64. 15)
  65. #endif
  66. #ifdef HAS_SCALEUVROWDOWN2BOX_SSSE3
  67. SDANY(ScaleUVRowDown2Box_Any_SSSE3,
  68. ScaleUVRowDown2Box_SSSE3,
  69. ScaleUVRowDown2Box_C,
  70. 2,
  71. 2,
  72. 4)
  73. #endif
  74. #ifdef HAS_SCALEUVROWDOWN2BOX_AVX2
  75. SDANY(ScaleUVRowDown2Box_Any_AVX2,
  76. ScaleUVRowDown2Box_AVX2,
  77. ScaleUVRowDown2Box_C,
  78. 2,
  79. 2,
  80. 8)
  81. #endif
  82. #ifdef HAS_SCALEROWDOWN2_AVX2
  83. SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31)
  84. SDANY(ScaleRowDown2Linear_Any_AVX2,
  85. ScaleRowDown2Linear_AVX2,
  86. ScaleRowDown2Linear_C,
  87. 2,
  88. 1,
  89. 31)
  90. SDANY(ScaleRowDown2Box_Any_AVX2,
  91. ScaleRowDown2Box_AVX2,
  92. ScaleRowDown2Box_C,
  93. 2,
  94. 1,
  95. 31)
  96. SDODD(ScaleRowDown2Box_Odd_AVX2,
  97. ScaleRowDown2Box_AVX2,
  98. ScaleRowDown2Box_Odd_C,
  99. 2,
  100. 1,
  101. 31)
  102. #endif
  103. #ifdef HAS_SCALEROWDOWN2_NEON
  104. SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15)
  105. SDANY(ScaleRowDown2Linear_Any_NEON,
  106. ScaleRowDown2Linear_NEON,
  107. ScaleRowDown2Linear_C,
  108. 2,
  109. 1,
  110. 15)
  111. SDANY(ScaleRowDown2Box_Any_NEON,
  112. ScaleRowDown2Box_NEON,
  113. ScaleRowDown2Box_C,
  114. 2,
  115. 1,
  116. 15)
  117. SDODD(ScaleRowDown2Box_Odd_NEON,
  118. ScaleRowDown2Box_NEON,
  119. ScaleRowDown2Box_Odd_C,
  120. 2,
  121. 1,
  122. 15)
  123. #endif
  124. #ifdef HAS_SCALEUVROWDOWN2BOX_NEON
  125. SDANY(ScaleUVRowDown2Box_Any_NEON,
  126. ScaleUVRowDown2Box_NEON,
  127. ScaleUVRowDown2Box_C,
  128. 2,
  129. 2,
  130. 8)
  131. #endif
  132. #ifdef HAS_SCALEROWDOWN2_MSA
  133. SDANY(ScaleRowDown2_Any_MSA, ScaleRowDown2_MSA, ScaleRowDown2_C, 2, 1, 31)
  134. SDANY(ScaleRowDown2Linear_Any_MSA,
  135. ScaleRowDown2Linear_MSA,
  136. ScaleRowDown2Linear_C,
  137. 2,
  138. 1,
  139. 31)
  140. SDANY(ScaleRowDown2Box_Any_MSA,
  141. ScaleRowDown2Box_MSA,
  142. ScaleRowDown2Box_C,
  143. 2,
  144. 1,
  145. 31)
  146. #endif
  147. #ifdef HAS_SCALEROWDOWN2_MMI
  148. SDANY(ScaleRowDown2_Any_MMI, ScaleRowDown2_MMI, ScaleRowDown2_C, 2, 1, 7)
  149. SDANY(ScaleRowDown2Linear_Any_MMI,
  150. ScaleRowDown2Linear_MMI,
  151. ScaleRowDown2Linear_C,
  152. 2,
  153. 1,
  154. 7)
  155. SDANY(ScaleRowDown2Box_Any_MMI,
  156. ScaleRowDown2Box_MMI,
  157. ScaleRowDown2Box_C,
  158. 2,
  159. 1,
  160. 7)
  161. SDODD(ScaleRowDown2Box_Odd_MMI,
  162. ScaleRowDown2Box_MMI,
  163. ScaleRowDown2Box_Odd_C,
  164. 2,
  165. 1,
  166. 7)
  167. #endif
  168. #ifdef HAS_SCALEROWDOWN4_SSSE3
  169. SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7)
  170. SDANY(ScaleRowDown4Box_Any_SSSE3,
  171. ScaleRowDown4Box_SSSE3,
  172. ScaleRowDown4Box_C,
  173. 4,
  174. 1,
  175. 7)
  176. #endif
  177. #ifdef HAS_SCALEROWDOWN4_AVX2
  178. SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15)
  179. SDANY(ScaleRowDown4Box_Any_AVX2,
  180. ScaleRowDown4Box_AVX2,
  181. ScaleRowDown4Box_C,
  182. 4,
  183. 1,
  184. 15)
  185. #endif
  186. #ifdef HAS_SCALEROWDOWN4_NEON
  187. SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7)
  188. SDANY(ScaleRowDown4Box_Any_NEON,
  189. ScaleRowDown4Box_NEON,
  190. ScaleRowDown4Box_C,
  191. 4,
  192. 1,
  193. 7)
  194. #endif
  195. #ifdef HAS_SCALEROWDOWN4_MSA
  196. SDANY(ScaleRowDown4_Any_MSA, ScaleRowDown4_MSA, ScaleRowDown4_C, 4, 1, 15)
  197. SDANY(ScaleRowDown4Box_Any_MSA,
  198. ScaleRowDown4Box_MSA,
  199. ScaleRowDown4Box_C,
  200. 4,
  201. 1,
  202. 15)
  203. #endif
  204. #ifdef HAS_SCALEROWDOWN4_MMI
  205. SDANY(ScaleRowDown4_Any_MMI, ScaleRowDown4_MMI, ScaleRowDown4_C, 4, 1, 7)
  206. SDANY(ScaleRowDown4Box_Any_MMI,
  207. ScaleRowDown4Box_MMI,
  208. ScaleRowDown4Box_C,
  209. 4,
  210. 1,
  211. 7)
  212. #endif
  213. #ifdef HAS_SCALEROWDOWN34_SSSE3
  214. SDANY(ScaleRowDown34_Any_SSSE3,
  215. ScaleRowDown34_SSSE3,
  216. ScaleRowDown34_C,
  217. 4 / 3,
  218. 1,
  219. 23)
  220. SDANY(ScaleRowDown34_0_Box_Any_SSSE3,
  221. ScaleRowDown34_0_Box_SSSE3,
  222. ScaleRowDown34_0_Box_C,
  223. 4 / 3,
  224. 1,
  225. 23)
  226. SDANY(ScaleRowDown34_1_Box_Any_SSSE3,
  227. ScaleRowDown34_1_Box_SSSE3,
  228. ScaleRowDown34_1_Box_C,
  229. 4 / 3,
  230. 1,
  231. 23)
  232. #endif
  233. #ifdef HAS_SCALEROWDOWN34_NEON
  234. SDANY(ScaleRowDown34_Any_NEON,
  235. ScaleRowDown34_NEON,
  236. ScaleRowDown34_C,
  237. 4 / 3,
  238. 1,
  239. 23)
  240. SDANY(ScaleRowDown34_0_Box_Any_NEON,
  241. ScaleRowDown34_0_Box_NEON,
  242. ScaleRowDown34_0_Box_C,
  243. 4 / 3,
  244. 1,
  245. 23)
  246. SDANY(ScaleRowDown34_1_Box_Any_NEON,
  247. ScaleRowDown34_1_Box_NEON,
  248. ScaleRowDown34_1_Box_C,
  249. 4 / 3,
  250. 1,
  251. 23)
  252. #endif
  253. #ifdef HAS_SCALEROWDOWN34_MSA
  254. SDANY(ScaleRowDown34_Any_MSA,
  255. ScaleRowDown34_MSA,
  256. ScaleRowDown34_C,
  257. 4 / 3,
  258. 1,
  259. 47)
  260. SDANY(ScaleRowDown34_0_Box_Any_MSA,
  261. ScaleRowDown34_0_Box_MSA,
  262. ScaleRowDown34_0_Box_C,
  263. 4 / 3,
  264. 1,
  265. 47)
  266. SDANY(ScaleRowDown34_1_Box_Any_MSA,
  267. ScaleRowDown34_1_Box_MSA,
  268. ScaleRowDown34_1_Box_C,
  269. 4 / 3,
  270. 1,
  271. 47)
  272. #endif
  273. #ifdef HAS_SCALEROWDOWN34_MMI
  274. SDANY(ScaleRowDown34_Any_MMI,
  275. ScaleRowDown34_MMI,
  276. ScaleRowDown34_C,
  277. 4 / 3,
  278. 1,
  279. 23)
  280. #endif
  281. #ifdef HAS_SCALEROWDOWN38_SSSE3
  282. SDANY(ScaleRowDown38_Any_SSSE3,
  283. ScaleRowDown38_SSSE3,
  284. ScaleRowDown38_C,
  285. 8 / 3,
  286. 1,
  287. 11)
  288. SDANY(ScaleRowDown38_3_Box_Any_SSSE3,
  289. ScaleRowDown38_3_Box_SSSE3,
  290. ScaleRowDown38_3_Box_C,
  291. 8 / 3,
  292. 1,
  293. 5)
  294. SDANY(ScaleRowDown38_2_Box_Any_SSSE3,
  295. ScaleRowDown38_2_Box_SSSE3,
  296. ScaleRowDown38_2_Box_C,
  297. 8 / 3,
  298. 1,
  299. 5)
  300. #endif
  301. #ifdef HAS_SCALEROWDOWN38_NEON
  302. SDANY(ScaleRowDown38_Any_NEON,
  303. ScaleRowDown38_NEON,
  304. ScaleRowDown38_C,
  305. 8 / 3,
  306. 1,
  307. 11)
  308. SDANY(ScaleRowDown38_3_Box_Any_NEON,
  309. ScaleRowDown38_3_Box_NEON,
  310. ScaleRowDown38_3_Box_C,
  311. 8 / 3,
  312. 1,
  313. 11)
  314. SDANY(ScaleRowDown38_2_Box_Any_NEON,
  315. ScaleRowDown38_2_Box_NEON,
  316. ScaleRowDown38_2_Box_C,
  317. 8 / 3,
  318. 1,
  319. 11)
  320. #endif
  321. #ifdef HAS_SCALEROWDOWN38_MSA
  322. SDANY(ScaleRowDown38_Any_MSA,
  323. ScaleRowDown38_MSA,
  324. ScaleRowDown38_C,
  325. 8 / 3,
  326. 1,
  327. 11)
  328. SDANY(ScaleRowDown38_3_Box_Any_MSA,
  329. ScaleRowDown38_3_Box_MSA,
  330. ScaleRowDown38_3_Box_C,
  331. 8 / 3,
  332. 1,
  333. 11)
  334. SDANY(ScaleRowDown38_2_Box_Any_MSA,
  335. ScaleRowDown38_2_Box_MSA,
  336. ScaleRowDown38_2_Box_C,
  337. 8 / 3,
  338. 1,
  339. 11)
  340. #endif
  341. #ifdef HAS_SCALEARGBROWDOWN2_SSE2
  342. SDANY(ScaleARGBRowDown2_Any_SSE2,
  343. ScaleARGBRowDown2_SSE2,
  344. ScaleARGBRowDown2_C,
  345. 2,
  346. 4,
  347. 3)
  348. SDANY(ScaleARGBRowDown2Linear_Any_SSE2,
  349. ScaleARGBRowDown2Linear_SSE2,
  350. ScaleARGBRowDown2Linear_C,
  351. 2,
  352. 4,
  353. 3)
  354. SDANY(ScaleARGBRowDown2Box_Any_SSE2,
  355. ScaleARGBRowDown2Box_SSE2,
  356. ScaleARGBRowDown2Box_C,
  357. 2,
  358. 4,
  359. 3)
  360. #endif
  361. #ifdef HAS_SCALEARGBROWDOWN2_NEON
  362. SDANY(ScaleARGBRowDown2_Any_NEON,
  363. ScaleARGBRowDown2_NEON,
  364. ScaleARGBRowDown2_C,
  365. 2,
  366. 4,
  367. 7)
  368. SDANY(ScaleARGBRowDown2Linear_Any_NEON,
  369. ScaleARGBRowDown2Linear_NEON,
  370. ScaleARGBRowDown2Linear_C,
  371. 2,
  372. 4,
  373. 7)
  374. SDANY(ScaleARGBRowDown2Box_Any_NEON,
  375. ScaleARGBRowDown2Box_NEON,
  376. ScaleARGBRowDown2Box_C,
  377. 2,
  378. 4,
  379. 7)
  380. #endif
  381. #ifdef HAS_SCALEARGBROWDOWN2_MSA
  382. SDANY(ScaleARGBRowDown2_Any_MSA,
  383. ScaleARGBRowDown2_MSA,
  384. ScaleARGBRowDown2_C,
  385. 2,
  386. 4,
  387. 3)
  388. SDANY(ScaleARGBRowDown2Linear_Any_MSA,
  389. ScaleARGBRowDown2Linear_MSA,
  390. ScaleARGBRowDown2Linear_C,
  391. 2,
  392. 4,
  393. 3)
  394. SDANY(ScaleARGBRowDown2Box_Any_MSA,
  395. ScaleARGBRowDown2Box_MSA,
  396. ScaleARGBRowDown2Box_C,
  397. 2,
  398. 4,
  399. 3)
  400. #endif
  401. #ifdef HAS_SCALEARGBROWDOWN2_MMI
  402. SDANY(ScaleARGBRowDown2_Any_MMI,
  403. ScaleARGBRowDown2_MMI,
  404. ScaleARGBRowDown2_C,
  405. 2,
  406. 4,
  407. 1)
  408. SDANY(ScaleARGBRowDown2Linear_Any_MMI,
  409. ScaleARGBRowDown2Linear_MMI,
  410. ScaleARGBRowDown2Linear_C,
  411. 2,
  412. 4,
  413. 1)
  414. SDANY(ScaleARGBRowDown2Box_Any_MMI,
  415. ScaleARGBRowDown2Box_MMI,
  416. ScaleARGBRowDown2Box_C,
  417. 2,
  418. 4,
  419. 1)
  420. #endif
  421. #undef SDANY
  422. // Scale down by even scale factor.
  423. #define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \
  424. void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, \
  425. uint8_t* dst_ptr, int dst_width) { \
  426. int r = dst_width & MASK; \
  427. int n = dst_width & ~MASK; \
  428. if (n > 0) { \
  429. SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \
  430. } \
  431. SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \
  432. dst_ptr + n * BPP, r); \
  433. }
  434. #ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2
  435. SDAANY(ScaleARGBRowDownEven_Any_SSE2,
  436. ScaleARGBRowDownEven_SSE2,
  437. ScaleARGBRowDownEven_C,
  438. 4,
  439. 3)
  440. SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2,
  441. ScaleARGBRowDownEvenBox_SSE2,
  442. ScaleARGBRowDownEvenBox_C,
  443. 4,
  444. 3)
  445. #endif
  446. #ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
  447. SDAANY(ScaleARGBRowDownEven_Any_NEON,
  448. ScaleARGBRowDownEven_NEON,
  449. ScaleARGBRowDownEven_C,
  450. 4,
  451. 3)
  452. SDAANY(ScaleARGBRowDownEvenBox_Any_NEON,
  453. ScaleARGBRowDownEvenBox_NEON,
  454. ScaleARGBRowDownEvenBox_C,
  455. 4,
  456. 3)
  457. #endif
  458. #ifdef HAS_SCALEARGBROWDOWNEVEN_MSA
  459. SDAANY(ScaleARGBRowDownEven_Any_MSA,
  460. ScaleARGBRowDownEven_MSA,
  461. ScaleARGBRowDownEven_C,
  462. 4,
  463. 3)
  464. SDAANY(ScaleARGBRowDownEvenBox_Any_MSA,
  465. ScaleARGBRowDownEvenBox_MSA,
  466. ScaleARGBRowDownEvenBox_C,
  467. 4,
  468. 3)
  469. #endif
  470. #ifdef HAS_SCALEARGBROWDOWNEVEN_MMI
  471. SDAANY(ScaleARGBRowDownEven_Any_MMI,
  472. ScaleARGBRowDownEven_MMI,
  473. ScaleARGBRowDownEven_C,
  474. 4,
  475. 1)
  476. SDAANY(ScaleARGBRowDownEvenBox_Any_MMI,
  477. ScaleARGBRowDownEvenBox_MMI,
  478. ScaleARGBRowDownEvenBox_C,
  479. 4,
  480. 1)
  481. #endif
  482. #ifdef HAS_SCALEUVROWDOWNEVEN_NEON
  483. SDAANY(ScaleUVRowDownEven_Any_NEON,
  484. ScaleUVRowDownEven_NEON,
  485. ScaleUVRowDownEven_C,
  486. 2,
  487. 3)
  488. #endif
  489. #ifdef SASIMDONLY
  490. // This also works and uses memcpy and SIMD instead of C, but is slower on ARM
  491. // Add rows box filter scale down. Using macro from row_any
  492. #define SAROW(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
  493. void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int width) { \
  494. SIMD_ALIGNED(uint16_t dst_temp[32]); \
  495. SIMD_ALIGNED(uint8_t src_temp[32]); \
  496. memset(dst_temp, 0, 32 * 2); /* for msan */ \
  497. int r = width & MASK; \
  498. int n = width & ~MASK; \
  499. if (n > 0) { \
  500. ANY_SIMD(src_ptr, dst_ptr, n); \
  501. } \
  502. memcpy(src_temp, src_ptr + n * SBPP, r * SBPP); \
  503. memcpy(dst_temp, dst_ptr + n * BPP, r * BPP); \
  504. ANY_SIMD(src_temp, dst_temp, MASK + 1); \
  505. memcpy(dst_ptr + n * BPP, dst_temp, r * BPP); \
  506. }
  507. #ifdef HAS_SCALEADDROW_SSE2
  508. SAROW(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, 1, 2, 15)
  509. #endif
  510. #ifdef HAS_SCALEADDROW_AVX2
  511. SAROW(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, 1, 2, 31)
  512. #endif
  513. #ifdef HAS_SCALEADDROW_NEON
  514. SAROW(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, 1, 2, 15)
  515. #endif
  516. #ifdef HAS_SCALEADDROW_MSA
  517. SAROW(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, 1, 2, 15)
  518. #endif
  519. #ifdef HAS_SCALEADDROW_MMI
  520. SAROW(ScaleAddRow_Any_MMI, ScaleAddRow_MMI, 1, 2, 7)
  521. #endif
  522. #undef SAANY
  523. #else
  524. // Add rows box filter scale down.
  525. #define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
  526. void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \
  527. int n = src_width & ~MASK; \
  528. if (n > 0) { \
  529. SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \
  530. } \
  531. SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \
  532. }
  533. #ifdef HAS_SCALEADDROW_SSE2
  534. SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15)
  535. #endif
  536. #ifdef HAS_SCALEADDROW_AVX2
  537. SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31)
  538. #endif
  539. #ifdef HAS_SCALEADDROW_NEON
  540. SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
  541. #endif
  542. #ifdef HAS_SCALEADDROW_MSA
  543. SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15)
  544. #endif
  545. #ifdef HAS_SCALEADDROW_MMI
  546. SAANY(ScaleAddRow_Any_MMI, ScaleAddRow_MMI, ScaleAddRow_C, 7)
  547. #endif
  548. #undef SAANY
  549. #endif // SASIMDONLY
  550. // Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols
  551. #define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \
  552. void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, \
  553. int dx) { \
  554. int r = dst_width & MASK; \
  555. int n = dst_width & ~MASK; \
  556. if (n > 0) { \
  557. TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \
  558. } \
  559. TERP_C(dst_ptr + n * BPP, src_ptr, r, x + n * dx, dx); \
  560. }
  561. #ifdef HAS_SCALEFILTERCOLS_NEON
  562. CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7)
  563. #endif
  564. #ifdef HAS_SCALEFILTERCOLS_MSA
  565. CANY(ScaleFilterCols_Any_MSA, ScaleFilterCols_MSA, ScaleFilterCols_C, 1, 15)
  566. #endif
  567. #ifdef HAS_SCALEARGBCOLS_NEON
  568. CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7)
  569. #endif
  570. #ifdef HAS_SCALEARGBCOLS_MSA
  571. CANY(ScaleARGBCols_Any_MSA, ScaleARGBCols_MSA, ScaleARGBCols_C, 4, 3)
  572. #endif
  573. #ifdef HAS_SCALEARGBCOLS_MMI
  574. CANY(ScaleARGBCols_Any_MMI, ScaleARGBCols_MMI, ScaleARGBCols_C, 4, 0)
  575. #endif
  576. #ifdef HAS_SCALEARGBFILTERCOLS_NEON
  577. CANY(ScaleARGBFilterCols_Any_NEON,
  578. ScaleARGBFilterCols_NEON,
  579. ScaleARGBFilterCols_C,
  580. 4,
  581. 3)
  582. #endif
  583. #ifdef HAS_SCALEARGBFILTERCOLS_MSA
  584. CANY(ScaleARGBFilterCols_Any_MSA,
  585. ScaleARGBFilterCols_MSA,
  586. ScaleARGBFilterCols_C,
  587. 4,
  588. 7)
  589. #endif
  590. #undef CANY
  591. #ifdef __cplusplus
  592. } // extern "C"
  593. } // namespace libyuv
  594. #endif