astcenc_color_unquantize.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949
  1. // SPDX-License-Identifier: Apache-2.0
  2. // ----------------------------------------------------------------------------
  3. // Copyright 2011-2023 Arm Limited
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License"); you may not
  6. // use this file except in compliance with the License. You may obtain a copy
  7. // of the License at:
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // Unless required by applicable law or agreed to in writing, software
  12. // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  13. // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  14. // License for the specific language governing permissions and limitations
  15. // under the License.
  16. // ----------------------------------------------------------------------------
  17. #include <utility>
  18. /**
  19. * @brief Functions for color unquantization.
  20. */
  21. #include "astcenc_internal.h"
  22. /**
  23. * @brief Un-blue-contract a color.
  24. *
  25. * This function reverses any applied blue contraction.
  26. *
  27. * @param input The input color that has been blue-contracted.
  28. *
  29. * @return The uncontracted color.
  30. */
  31. static ASTCENC_SIMD_INLINE vint4 uncontract_color(
  32. vint4 input
  33. ) {
  34. vmask4 mask(true, true, false, false);
  35. vint4 bc0 = asr<1>(input + input.lane<2>());
  36. return select(input, bc0, mask);
  37. }
  38. void rgba_delta_unpack(
  39. vint4 input0,
  40. vint4 input1,
  41. vint4& output0,
  42. vint4& output1
  43. ) {
  44. // Apply bit transfer
  45. bit_transfer_signed(input1, input0);
  46. // Apply blue-uncontraction if needed
  47. int rgb_sum = hadd_rgb_s(input1);
  48. input1 = input1 + input0;
  49. if (rgb_sum < 0)
  50. {
  51. input0 = uncontract_color(input0);
  52. input1 = uncontract_color(input1);
  53. std::swap(input0, input1);
  54. }
  55. output0 = clamp(0, 255, input0);
  56. output1 = clamp(0, 255, input1);
  57. }
  58. /**
  59. * @brief Unpack an LDR RGB color that uses delta encoding.
  60. *
  61. * Output alpha set to 255.
  62. *
  63. * @param input0 The packed endpoint 0 color.
  64. * @param input1 The packed endpoint 1 color deltas.
  65. * @param[out] output0 The unpacked endpoint 0 color.
  66. * @param[out] output1 The unpacked endpoint 1 color.
  67. */
  68. static void rgb_delta_unpack(
  69. vint4 input0,
  70. vint4 input1,
  71. vint4& output0,
  72. vint4& output1
  73. ) {
  74. rgba_delta_unpack(input0, input1, output0, output1);
  75. output0.set_lane<3>(255);
  76. output1.set_lane<3>(255);
  77. }
  78. void rgba_unpack(
  79. vint4 input0,
  80. vint4 input1,
  81. vint4& output0,
  82. vint4& output1
  83. ) {
  84. // Apply blue-uncontraction if needed
  85. if (hadd_rgb_s(input0) > hadd_rgb_s(input1))
  86. {
  87. input0 = uncontract_color(input0);
  88. input1 = uncontract_color(input1);
  89. std::swap(input0, input1);
  90. }
  91. output0 = input0;
  92. output1 = input1;
  93. }
  94. /**
  95. * @brief Unpack an LDR RGB color that uses direct encoding.
  96. *
  97. * Output alpha set to 255.
  98. *
  99. * @param input0 The packed endpoint 0 color.
  100. * @param input1 The packed endpoint 1 color.
  101. * @param[out] output0 The unpacked endpoint 0 color.
  102. * @param[out] output1 The unpacked endpoint 1 color.
  103. */
  104. static void rgb_unpack(
  105. vint4 input0,
  106. vint4 input1,
  107. vint4& output0,
  108. vint4& output1
  109. ) {
  110. rgba_unpack(input0, input1, output0, output1);
  111. output0.set_lane<3>(255);
  112. output1.set_lane<3>(255);
  113. }
  114. /**
  115. * @brief Unpack an LDR RGBA color that uses scaled encoding.
  116. *
  117. * Note only the RGB channels use the scaled encoding, alpha uses direct.
  118. *
  119. * @param input0 The packed endpoint 0 color.
  120. * @param alpha1 The packed endpoint 1 alpha value.
  121. * @param scale The packed quantized scale.
  122. * @param[out] output0 The unpacked endpoint 0 color.
  123. * @param[out] output1 The unpacked endpoint 1 color.
  124. */
  125. static void rgb_scale_alpha_unpack(
  126. vint4 input0,
  127. uint8_t alpha1,
  128. uint8_t scale,
  129. vint4& output0,
  130. vint4& output1
  131. ) {
  132. output1 = input0;
  133. output1.set_lane<3>(alpha1);
  134. output0 = asr<8>(input0 * scale);
  135. output0.set_lane<3>(input0.lane<3>());
  136. }
  137. /**
  138. * @brief Unpack an LDR RGB color that uses scaled encoding.
  139. *
  140. * Output alpha is 255.
  141. *
  142. * @param input0 The packed endpoint 0 color.
  143. * @param scale The packed scale.
  144. * @param[out] output0 The unpacked endpoint 0 color.
  145. * @param[out] output1 The unpacked endpoint 1 color.
  146. */
  147. static void rgb_scale_unpack(
  148. vint4 input0,
  149. int scale,
  150. vint4& output0,
  151. vint4& output1
  152. ) {
  153. output1 = input0;
  154. output1.set_lane<3>(255);
  155. output0 = asr<8>(input0 * scale);
  156. output0.set_lane<3>(255);
  157. }
  158. /**
  159. * @brief Unpack an LDR L color that uses direct encoding.
  160. *
  161. * Output alpha is 255.
  162. *
  163. * @param input The packed endpoints.
  164. * @param[out] output0 The unpacked endpoint 0 color.
  165. * @param[out] output1 The unpacked endpoint 1 color.
  166. */
  167. static void luminance_unpack(
  168. const uint8_t input[2],
  169. vint4& output0,
  170. vint4& output1
  171. ) {
  172. int lum0 = input[0];
  173. int lum1 = input[1];
  174. output0 = vint4(lum0, lum0, lum0, 255);
  175. output1 = vint4(lum1, lum1, lum1, 255);
  176. }
  177. /**
  178. * @brief Unpack an LDR L color that uses delta encoding.
  179. *
  180. * Output alpha is 255.
  181. *
  182. * @param input The packed endpoints (L0, L1).
  183. * @param[out] output0 The unpacked endpoint 0 color.
  184. * @param[out] output1 The unpacked endpoint 1 color.
  185. */
  186. static void luminance_delta_unpack(
  187. const uint8_t input[2],
  188. vint4& output0,
  189. vint4& output1
  190. ) {
  191. int v0 = input[0];
  192. int v1 = input[1];
  193. int l0 = (v0 >> 2) | (v1 & 0xC0);
  194. int l1 = l0 + (v1 & 0x3F);
  195. l1 = astc::min(l1, 255);
  196. output0 = vint4(l0, l0, l0, 255);
  197. output1 = vint4(l1, l1, l1, 255);
  198. }
  199. /**
  200. * @brief Unpack an LDR LA color that uses direct encoding.
  201. *
  202. * @param input The packed endpoints (L0, L1, A0, A1).
  203. * @param[out] output0 The unpacked endpoint 0 color.
  204. * @param[out] output1 The unpacked endpoint 1 color.
  205. */
  206. static void luminance_alpha_unpack(
  207. const uint8_t input[4],
  208. vint4& output0,
  209. vint4& output1
  210. ) {
  211. int lum0 = input[0];
  212. int lum1 = input[1];
  213. int alpha0 = input[2];
  214. int alpha1 = input[3];
  215. output0 = vint4(lum0, lum0, lum0, alpha0);
  216. output1 = vint4(lum1, lum1, lum1, alpha1);
  217. }
  218. /**
  219. * @brief Unpack an LDR LA color that uses delta encoding.
  220. *
  221. * @param input The packed endpoints (L0, L1, A0, A1).
  222. * @param[out] output0 The unpacked endpoint 0 color.
  223. * @param[out] output1 The unpacked endpoint 1 color.
  224. */
  225. static void luminance_alpha_delta_unpack(
  226. const uint8_t input[4],
  227. vint4& output0,
  228. vint4& output1
  229. ) {
  230. int lum0 = input[0];
  231. int lum1 = input[1];
  232. int alpha0 = input[2];
  233. int alpha1 = input[3];
  234. lum0 |= (lum1 & 0x80) << 1;
  235. alpha0 |= (alpha1 & 0x80) << 1;
  236. lum1 &= 0x7F;
  237. alpha1 &= 0x7F;
  238. if (lum1 & 0x40)
  239. {
  240. lum1 -= 0x80;
  241. }
  242. if (alpha1 & 0x40)
  243. {
  244. alpha1 -= 0x80;
  245. }
  246. lum0 >>= 1;
  247. lum1 >>= 1;
  248. alpha0 >>= 1;
  249. alpha1 >>= 1;
  250. lum1 += lum0;
  251. alpha1 += alpha0;
  252. lum1 = astc::clamp(lum1, 0, 255);
  253. alpha1 = astc::clamp(alpha1, 0, 255);
  254. output0 = vint4(lum0, lum0, lum0, alpha0);
  255. output1 = vint4(lum1, lum1, lum1, alpha1);
  256. }
  257. /**
  258. * @brief Unpack an HDR RGB + offset encoding.
  259. *
  260. * @param input The packed endpoints (packed and modal).
  261. * @param[out] output0 The unpacked endpoint 0 color.
  262. * @param[out] output1 The unpacked endpoint 1 color.
  263. */
  264. static void hdr_rgbo_unpack(
  265. const uint8_t input[4],
  266. vint4& output0,
  267. vint4& output1
  268. ) {
  269. int v0 = input[0];
  270. int v1 = input[1];
  271. int v2 = input[2];
  272. int v3 = input[3];
  273. int modeval = ((v0 & 0xC0) >> 6) | (((v1 & 0x80) >> 7) << 2) | (((v2 & 0x80) >> 7) << 3);
  274. int majcomp;
  275. int mode;
  276. if ((modeval & 0xC) != 0xC)
  277. {
  278. majcomp = modeval >> 2;
  279. mode = modeval & 3;
  280. }
  281. else if (modeval != 0xF)
  282. {
  283. majcomp = modeval & 3;
  284. mode = 4;
  285. }
  286. else
  287. {
  288. majcomp = 0;
  289. mode = 5;
  290. }
  291. int red = v0 & 0x3F;
  292. int green = v1 & 0x1F;
  293. int blue = v2 & 0x1F;
  294. int scale = v3 & 0x1F;
  295. int bit0 = (v1 >> 6) & 1;
  296. int bit1 = (v1 >> 5) & 1;
  297. int bit2 = (v2 >> 6) & 1;
  298. int bit3 = (v2 >> 5) & 1;
  299. int bit4 = (v3 >> 7) & 1;
  300. int bit5 = (v3 >> 6) & 1;
  301. int bit6 = (v3 >> 5) & 1;
  302. int ohcomp = 1 << mode;
  303. if (ohcomp & 0x30)
  304. green |= bit0 << 6;
  305. if (ohcomp & 0x3A)
  306. green |= bit1 << 5;
  307. if (ohcomp & 0x30)
  308. blue |= bit2 << 6;
  309. if (ohcomp & 0x3A)
  310. blue |= bit3 << 5;
  311. if (ohcomp & 0x3D)
  312. scale |= bit6 << 5;
  313. if (ohcomp & 0x2D)
  314. scale |= bit5 << 6;
  315. if (ohcomp & 0x04)
  316. scale |= bit4 << 7;
  317. if (ohcomp & 0x3B)
  318. red |= bit4 << 6;
  319. if (ohcomp & 0x04)
  320. red |= bit3 << 6;
  321. if (ohcomp & 0x10)
  322. red |= bit5 << 7;
  323. if (ohcomp & 0x0F)
  324. red |= bit2 << 7;
  325. if (ohcomp & 0x05)
  326. red |= bit1 << 8;
  327. if (ohcomp & 0x0A)
  328. red |= bit0 << 8;
  329. if (ohcomp & 0x05)
  330. red |= bit0 << 9;
  331. if (ohcomp & 0x02)
  332. red |= bit6 << 9;
  333. if (ohcomp & 0x01)
  334. red |= bit3 << 10;
  335. if (ohcomp & 0x02)
  336. red |= bit5 << 10;
  337. // expand to 12 bits.
  338. static const int shamts[6] { 1, 1, 2, 3, 4, 5 };
  339. int shamt = shamts[mode];
  340. red <<= shamt;
  341. green <<= shamt;
  342. blue <<= shamt;
  343. scale <<= shamt;
  344. // on modes 0 to 4, the values stored for "green" and "blue" are differentials,
  345. // not absolute values.
  346. if (mode != 5)
  347. {
  348. green = red - green;
  349. blue = red - blue;
  350. }
  351. // switch around components.
  352. int temp;
  353. switch (majcomp)
  354. {
  355. case 1:
  356. temp = red;
  357. red = green;
  358. green = temp;
  359. break;
  360. case 2:
  361. temp = red;
  362. red = blue;
  363. blue = temp;
  364. break;
  365. default:
  366. break;
  367. }
  368. int red0 = red - scale;
  369. int green0 = green - scale;
  370. int blue0 = blue - scale;
  371. // clamp to [0,0xFFF].
  372. if (red < 0)
  373. red = 0;
  374. if (green < 0)
  375. green = 0;
  376. if (blue < 0)
  377. blue = 0;
  378. if (red0 < 0)
  379. red0 = 0;
  380. if (green0 < 0)
  381. green0 = 0;
  382. if (blue0 < 0)
  383. blue0 = 0;
  384. output0 = vint4(red0 << 4, green0 << 4, blue0 << 4, 0x7800);
  385. output1 = vint4(red << 4, green << 4, blue << 4, 0x7800);
  386. }
  387. /**
  388. * @brief Unpack an HDR RGB direct encoding.
  389. *
  390. * @param input The packed endpoints (packed and modal).
  391. * @param[out] output0 The unpacked endpoint 0 color.
  392. * @param[out] output1 The unpacked endpoint 1 color.
  393. */
  394. static void hdr_rgb_unpack(
  395. const uint8_t input[6],
  396. vint4& output0,
  397. vint4& output1
  398. ) {
  399. int v0 = input[0];
  400. int v1 = input[1];
  401. int v2 = input[2];
  402. int v3 = input[3];
  403. int v4 = input[4];
  404. int v5 = input[5];
  405. // extract all the fixed-placement bitfields
  406. int modeval = ((v1 & 0x80) >> 7) | (((v2 & 0x80) >> 7) << 1) | (((v3 & 0x80) >> 7) << 2);
  407. int majcomp = ((v4 & 0x80) >> 7) | (((v5 & 0x80) >> 7) << 1);
  408. if (majcomp == 3)
  409. {
  410. output0 = vint4(v0 << 8, v2 << 8, (v4 & 0x7F) << 9, 0x7800);
  411. output1 = vint4(v1 << 8, v3 << 8, (v5 & 0x7F) << 9, 0x7800);
  412. return;
  413. }
  414. int a = v0 | ((v1 & 0x40) << 2);
  415. int b0 = v2 & 0x3f;
  416. int b1 = v3 & 0x3f;
  417. int c = v1 & 0x3f;
  418. int d0 = v4 & 0x7f;
  419. int d1 = v5 & 0x7f;
  420. // get hold of the number of bits in 'd0' and 'd1'
  421. static const int dbits_tab[8] { 7, 6, 7, 6, 5, 6, 5, 6 };
  422. int dbits = dbits_tab[modeval];
  423. // extract six variable-placement bits
  424. int bit0 = (v2 >> 6) & 1;
  425. int bit1 = (v3 >> 6) & 1;
  426. int bit2 = (v4 >> 6) & 1;
  427. int bit3 = (v5 >> 6) & 1;
  428. int bit4 = (v4 >> 5) & 1;
  429. int bit5 = (v5 >> 5) & 1;
  430. // and prepend the variable-placement bits depending on mode.
  431. int ohmod = 1 << modeval; // one-hot-mode
  432. if (ohmod & 0xA4)
  433. a |= bit0 << 9;
  434. if (ohmod & 0x8)
  435. a |= bit2 << 9;
  436. if (ohmod & 0x50)
  437. a |= bit4 << 9;
  438. if (ohmod & 0x50)
  439. a |= bit5 << 10;
  440. if (ohmod & 0xA0)
  441. a |= bit1 << 10;
  442. if (ohmod & 0xC0)
  443. a |= bit2 << 11;
  444. if (ohmod & 0x4)
  445. c |= bit1 << 6;
  446. if (ohmod & 0xE8)
  447. c |= bit3 << 6;
  448. if (ohmod & 0x20)
  449. c |= bit2 << 7;
  450. if (ohmod & 0x5B)
  451. {
  452. b0 |= bit0 << 6;
  453. b1 |= bit1 << 6;
  454. }
  455. if (ohmod & 0x12)
  456. {
  457. b0 |= bit2 << 7;
  458. b1 |= bit3 << 7;
  459. }
  460. if (ohmod & 0xAF)
  461. {
  462. d0 |= bit4 << 5;
  463. d1 |= bit5 << 5;
  464. }
  465. if (ohmod & 0x5)
  466. {
  467. d0 |= bit2 << 6;
  468. d1 |= bit3 << 6;
  469. }
  470. // sign-extend 'd0' and 'd1'
  471. // note: this code assumes that signed right-shift actually sign-fills, not zero-fills.
  472. int32_t d0x = d0;
  473. int32_t d1x = d1;
  474. int sx_shamt = 32 - dbits;
  475. d0x <<= sx_shamt;
  476. d0x >>= sx_shamt;
  477. d1x <<= sx_shamt;
  478. d1x >>= sx_shamt;
  479. d0 = d0x;
  480. d1 = d1x;
  481. // expand all values to 12 bits, with left-shift as needed.
  482. int val_shamt = (modeval >> 1) ^ 3;
  483. a <<= val_shamt;
  484. b0 <<= val_shamt;
  485. b1 <<= val_shamt;
  486. c <<= val_shamt;
  487. d0 <<= val_shamt;
  488. d1 <<= val_shamt;
  489. // then compute the actual color values.
  490. int red1 = a;
  491. int green1 = a - b0;
  492. int blue1 = a - b1;
  493. int red0 = a - c;
  494. int green0 = a - b0 - c - d0;
  495. int blue0 = a - b1 - c - d1;
  496. // clamp the color components to [0,2^12 - 1]
  497. red0 = astc::clamp(red0, 0, 4095);
  498. green0 = astc::clamp(green0, 0, 4095);
  499. blue0 = astc::clamp(blue0, 0, 4095);
  500. red1 = astc::clamp(red1, 0, 4095);
  501. green1 = astc::clamp(green1, 0, 4095);
  502. blue1 = astc::clamp(blue1, 0, 4095);
  503. // switch around the color components
  504. int temp0, temp1;
  505. switch (majcomp)
  506. {
  507. case 1: // switch around red and green
  508. temp0 = red0;
  509. temp1 = red1;
  510. red0 = green0;
  511. red1 = green1;
  512. green0 = temp0;
  513. green1 = temp1;
  514. break;
  515. case 2: // switch around red and blue
  516. temp0 = red0;
  517. temp1 = red1;
  518. red0 = blue0;
  519. red1 = blue1;
  520. blue0 = temp0;
  521. blue1 = temp1;
  522. break;
  523. case 0: // no switch
  524. break;
  525. }
  526. output0 = vint4(red0 << 4, green0 << 4, blue0 << 4, 0x7800);
  527. output1 = vint4(red1 << 4, green1 << 4, blue1 << 4, 0x7800);
  528. }
  529. /**
  530. * @brief Unpack an HDR RGB + LDR A direct encoding.
  531. *
  532. * @param input The packed endpoints (packed and modal).
  533. * @param[out] output0 The unpacked endpoint 0 color.
  534. * @param[out] output1 The unpacked endpoint 1 color.
  535. */
  536. static void hdr_rgb_ldr_alpha_unpack(
  537. const uint8_t input[8],
  538. vint4& output0,
  539. vint4& output1
  540. ) {
  541. hdr_rgb_unpack(input, output0, output1);
  542. int v6 = input[6];
  543. int v7 = input[7];
  544. output0.set_lane<3>(v6);
  545. output1.set_lane<3>(v7);
  546. }
  547. /**
  548. * @brief Unpack an HDR L (small range) direct encoding.
  549. *
  550. * @param input The packed endpoints (packed and modal).
  551. * @param[out] output0 The unpacked endpoint 0 color.
  552. * @param[out] output1 The unpacked endpoint 1 color.
  553. */
  554. static void hdr_luminance_small_range_unpack(
  555. const uint8_t input[2],
  556. vint4& output0,
  557. vint4& output1
  558. ) {
  559. int v0 = input[0];
  560. int v1 = input[1];
  561. int y0, y1;
  562. if (v0 & 0x80)
  563. {
  564. y0 = ((v1 & 0xE0) << 4) | ((v0 & 0x7F) << 2);
  565. y1 = (v1 & 0x1F) << 2;
  566. }
  567. else
  568. {
  569. y0 = ((v1 & 0xF0) << 4) | ((v0 & 0x7F) << 1);
  570. y1 = (v1 & 0xF) << 1;
  571. }
  572. y1 += y0;
  573. if (y1 > 0xFFF)
  574. {
  575. y1 = 0xFFF;
  576. }
  577. output0 = vint4(y0 << 4, y0 << 4, y0 << 4, 0x7800);
  578. output1 = vint4(y1 << 4, y1 << 4, y1 << 4, 0x7800);
  579. }
  580. /**
  581. * @brief Unpack an HDR L (large range) direct encoding.
  582. *
  583. * @param input The packed endpoints (packed and modal).
  584. * @param[out] output0 The unpacked endpoint 0 color.
  585. * @param[out] output1 The unpacked endpoint 1 color.
  586. */
  587. static void hdr_luminance_large_range_unpack(
  588. const uint8_t input[2],
  589. vint4& output0,
  590. vint4& output1
  591. ) {
  592. int v0 = input[0];
  593. int v1 = input[1];
  594. int y0, y1;
  595. if (v1 >= v0)
  596. {
  597. y0 = v0 << 4;
  598. y1 = v1 << 4;
  599. }
  600. else
  601. {
  602. y0 = (v1 << 4) + 8;
  603. y1 = (v0 << 4) - 8;
  604. }
  605. output0 = vint4(y0 << 4, y0 << 4, y0 << 4, 0x7800);
  606. output1 = vint4(y1 << 4, y1 << 4, y1 << 4, 0x7800);
  607. }
  608. /**
  609. * @brief Unpack an HDR A direct encoding.
  610. *
  611. * @param input The packed endpoints (packed and modal).
  612. * @param[out] output0 The unpacked endpoint 0 color.
  613. * @param[out] output1 The unpacked endpoint 1 color.
  614. */
  615. static void hdr_alpha_unpack(
  616. const uint8_t input[2],
  617. int& output0,
  618. int& output1
  619. ) {
  620. int v6 = input[0];
  621. int v7 = input[1];
  622. int selector = ((v6 >> 7) & 1) | ((v7 >> 6) & 2);
  623. v6 &= 0x7F;
  624. v7 &= 0x7F;
  625. if (selector == 3)
  626. {
  627. output0 = v6 << 5;
  628. output1 = v7 << 5;
  629. }
  630. else
  631. {
  632. v6 |= (v7 << (selector + 1)) & 0x780;
  633. v7 &= (0x3f >> selector);
  634. v7 ^= 32 >> selector;
  635. v7 -= 32 >> selector;
  636. v6 <<= (4 - selector);
  637. v7 <<= (4 - selector);
  638. v7 += v6;
  639. if (v7 < 0)
  640. {
  641. v7 = 0;
  642. }
  643. else if (v7 > 0xFFF)
  644. {
  645. v7 = 0xFFF;
  646. }
  647. output0 = v6;
  648. output1 = v7;
  649. }
  650. output0 <<= 4;
  651. output1 <<= 4;
  652. }
  653. /**
  654. * @brief Unpack an HDR RGBA direct encoding.
  655. *
  656. * @param input The packed endpoints (packed and modal).
  657. * @param[out] output0 The unpacked endpoint 0 color.
  658. * @param[out] output1 The unpacked endpoint 1 color.
  659. */
  660. static void hdr_rgb_hdr_alpha_unpack(
  661. const uint8_t input[8],
  662. vint4& output0,
  663. vint4& output1
  664. ) {
  665. hdr_rgb_unpack(input, output0, output1);
  666. int alpha0, alpha1;
  667. hdr_alpha_unpack(input + 6, alpha0, alpha1);
  668. output0.set_lane<3>(alpha0);
  669. output1.set_lane<3>(alpha1);
  670. }
  671. /* See header for documentation. */
  672. void unpack_color_endpoints(
  673. astcenc_profile decode_mode,
  674. int format,
  675. const uint8_t* input,
  676. bool& rgb_hdr,
  677. bool& alpha_hdr,
  678. vint4& output0,
  679. vint4& output1
  680. ) {
  681. // Assume no NaNs and LDR endpoints unless set later
  682. rgb_hdr = false;
  683. alpha_hdr = false;
  684. bool alpha_hdr_default = false;
  685. switch (format)
  686. {
  687. case FMT_LUMINANCE:
  688. luminance_unpack(input, output0, output1);
  689. break;
  690. case FMT_LUMINANCE_DELTA:
  691. luminance_delta_unpack(input, output0, output1);
  692. break;
  693. case FMT_HDR_LUMINANCE_SMALL_RANGE:
  694. rgb_hdr = true;
  695. alpha_hdr_default = true;
  696. hdr_luminance_small_range_unpack(input, output0, output1);
  697. break;
  698. case FMT_HDR_LUMINANCE_LARGE_RANGE:
  699. rgb_hdr = true;
  700. alpha_hdr_default = true;
  701. hdr_luminance_large_range_unpack(input, output0, output1);
  702. break;
  703. case FMT_LUMINANCE_ALPHA:
  704. luminance_alpha_unpack(input, output0, output1);
  705. break;
  706. case FMT_LUMINANCE_ALPHA_DELTA:
  707. luminance_alpha_delta_unpack(input, output0, output1);
  708. break;
  709. case FMT_RGB_SCALE:
  710. {
  711. vint4 input0q(input[0], input[1], input[2], 0);
  712. uint8_t scale = input[3];
  713. rgb_scale_unpack(input0q, scale, output0, output1);
  714. }
  715. break;
  716. case FMT_RGB_SCALE_ALPHA:
  717. {
  718. vint4 input0q(input[0], input[1], input[2], input[4]);
  719. uint8_t alpha1q = input[5];
  720. uint8_t scaleq = input[3];
  721. rgb_scale_alpha_unpack(input0q, alpha1q, scaleq, output0, output1);
  722. }
  723. break;
  724. case FMT_HDR_RGB_SCALE:
  725. rgb_hdr = true;
  726. alpha_hdr_default = true;
  727. hdr_rgbo_unpack(input, output0, output1);
  728. break;
  729. case FMT_RGB:
  730. {
  731. vint4 input0q(input[0], input[2], input[4], 0);
  732. vint4 input1q(input[1], input[3], input[5], 0);
  733. rgb_unpack(input0q, input1q, output0, output1);
  734. }
  735. break;
  736. case FMT_RGB_DELTA:
  737. {
  738. vint4 input0q(input[0], input[2], input[4], 0);
  739. vint4 input1q(input[1], input[3], input[5], 0);
  740. rgb_delta_unpack(input0q, input1q, output0, output1);
  741. }
  742. break;
  743. case FMT_HDR_RGB:
  744. rgb_hdr = true;
  745. alpha_hdr_default = true;
  746. hdr_rgb_unpack(input, output0, output1);
  747. break;
  748. case FMT_RGBA:
  749. {
  750. vint4 input0q(input[0], input[2], input[4], input[6]);
  751. vint4 input1q(input[1], input[3], input[5], input[7]);
  752. rgba_unpack(input0q, input1q, output0, output1);
  753. }
  754. break;
  755. case FMT_RGBA_DELTA:
  756. {
  757. vint4 input0q(input[0], input[2], input[4], input[6]);
  758. vint4 input1q(input[1], input[3], input[5], input[7]);
  759. rgba_delta_unpack(input0q, input1q, output0, output1);
  760. }
  761. break;
  762. case FMT_HDR_RGB_LDR_ALPHA:
  763. rgb_hdr = true;
  764. hdr_rgb_ldr_alpha_unpack(input, output0, output1);
  765. break;
  766. case FMT_HDR_RGBA:
  767. rgb_hdr = true;
  768. alpha_hdr = true;
  769. hdr_rgb_hdr_alpha_unpack(input, output0, output1);
  770. break;
  771. }
  772. // Assign a correct default alpha
  773. if (alpha_hdr_default)
  774. {
  775. if (decode_mode == ASTCENC_PRF_HDR)
  776. {
  777. output0.set_lane<3>(0x7800);
  778. output1.set_lane<3>(0x7800);
  779. alpha_hdr = true;
  780. }
  781. else
  782. {
  783. output0.set_lane<3>(0x00FF);
  784. output1.set_lane<3>(0x00FF);
  785. alpha_hdr = false;
  786. }
  787. }
  788. // Handle endpoint errors and expansion
  789. // Linear LDR 8-bit endpoints are expanded to 16-bit by replication
  790. if (decode_mode == ASTCENC_PRF_LDR)
  791. {
  792. // Error color - HDR endpoint in an LDR encoding
  793. if (rgb_hdr || alpha_hdr)
  794. {
  795. output0 = vint4(0xFF, 0x00, 0xFF, 0xFF);
  796. output1 = vint4(0xFF, 0x00, 0xFF, 0xFF);
  797. rgb_hdr = false;
  798. alpha_hdr = false;
  799. }
  800. output0 = output0 * 257;
  801. output1 = output1 * 257;
  802. }
  803. // sRGB LDR 8-bit endpoints are expanded to 16 bit by:
  804. // - RGB = shift left by 8 bits and OR with 0x80
  805. // - A = replication
  806. else if (decode_mode == ASTCENC_PRF_LDR_SRGB)
  807. {
  808. // Error color - HDR endpoint in an LDR encoding
  809. if (rgb_hdr || alpha_hdr)
  810. {
  811. output0 = vint4(0xFF, 0x00, 0xFF, 0xFF);
  812. output1 = vint4(0xFF, 0x00, 0xFF, 0xFF);
  813. rgb_hdr = false;
  814. alpha_hdr = false;
  815. }
  816. vmask4 mask(true, true, true, false);
  817. vint4 output0rgb = lsl<8>(output0) | vint4(0x80);
  818. vint4 output0a = output0 * 257;
  819. output0 = select(output0a, output0rgb, mask);
  820. vint4 output1rgb = lsl<8>(output1) | vint4(0x80);
  821. vint4 output1a = output1 * 257;
  822. output1 = select(output1a, output1rgb, mask);
  823. }
  824. // An HDR profile decode, but may be using linear LDR endpoints
  825. // Linear LDR 8-bit endpoints are expanded to 16-bit by replication
  826. // HDR endpoints are already 16-bit
  827. else
  828. {
  829. vmask4 hdr_lanes(rgb_hdr, rgb_hdr, rgb_hdr, alpha_hdr);
  830. vint4 output_scale = select(vint4(257), vint4(1), hdr_lanes);
  831. output0 = output0 * output_scale;
  832. output1 = output1 * output_scale;
  833. }
  834. }