astcenc_entry.cpp 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391
  1. // SPDX-License-Identifier: Apache-2.0
  2. // ----------------------------------------------------------------------------
  3. // Copyright 2011-2024 Arm Limited
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License"); you may not
  6. // use this file except in compliance with the License. You may obtain a copy
  7. // of the License at:
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // Unless required by applicable law or agreed to in writing, software
  12. // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  13. // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  14. // License for the specific language governing permissions and limitations
  15. // under the License.
  16. // ----------------------------------------------------------------------------
  17. /**
  18. * @brief Functions for the library entrypoint.
  19. */
  20. #include <array>
  21. #include <cstring>
  22. #include <new>
  23. #include "astcenc.h"
  24. #include "astcenc_internal_entry.h"
  25. #include "astcenc_diagnostic_trace.h"
  26. /**
  27. * @brief Record of the quality tuning parameter values.
  28. *
  29. * See the @c astcenc_config structure for detailed parameter documentation.
  30. *
  31. * Note that the mse_overshoot entries are scaling factors relative to the base MSE to hit db_limit.
  32. * A 20% overshoot is harder to hit for a higher base db_limit, so we may actually use lower ratios
  33. * for the more through search presets because the underlying db_limit is so much higher.
  34. */
  35. struct astcenc_preset_config
  36. {
  37. float quality;
  38. unsigned int tune_partition_count_limit;
  39. unsigned int tune_2partition_index_limit;
  40. unsigned int tune_3partition_index_limit;
  41. unsigned int tune_4partition_index_limit;
  42. unsigned int tune_block_mode_limit;
  43. unsigned int tune_refinement_limit;
  44. unsigned int tune_candidate_limit;
  45. unsigned int tune_2partitioning_candidate_limit;
  46. unsigned int tune_3partitioning_candidate_limit;
  47. unsigned int tune_4partitioning_candidate_limit;
  48. float tune_db_limit_a_base;
  49. float tune_db_limit_b_base;
  50. float tune_mse_overshoot;
  51. float tune_2partition_early_out_limit_factor;
  52. float tune_3partition_early_out_limit_factor;
  53. float tune_2plane_early_out_limit_correlation;
  54. float tune_search_mode0_enable;
  55. };
  56. /**
  57. * @brief The static presets for high bandwidth encodings (x < 25 texels per block).
  58. */
  59. static const std::array<astcenc_preset_config, 6> preset_configs_high {{
  60. {
  61. ASTCENC_PRE_FASTEST,
  62. 2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f, 0.0f
  63. }, {
  64. ASTCENC_PRE_FAST,
  65. 3, 18, 10, 8, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.90f, 0.0f
  66. }, {
  67. ASTCENC_PRE_MEDIUM,
  68. 4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 2.5f, 1.1f, 1.05f, 0.95f, 0.0f
  69. }, {
  70. ASTCENC_PRE_THOROUGH,
  71. 4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.35f, 1.15f, 0.97f, 0.0f
  72. }, {
  73. ASTCENC_PRE_VERYTHOROUGH,
  74. 4, 256, 128, 64, 98, 4, 6, 8, 6, 4, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 0.0f
  75. }, {
  76. ASTCENC_PRE_EXHAUSTIVE,
  77. 4, 512, 512, 512, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 0.0f
  78. }
  79. }};
  80. /**
  81. * @brief The static presets for medium bandwidth encodings (25 <= x < 64 texels per block).
  82. */
  83. static const std::array<astcenc_preset_config, 6> preset_configs_mid {{
  84. {
  85. ASTCENC_PRE_FASTEST,
  86. 2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.80f, 1.0f
  87. }, {
  88. ASTCENC_PRE_FAST,
  89. 3, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f, 1.0f
  90. }, {
  91. ASTCENC_PRE_MEDIUM,
  92. 3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.0f, 1.1f, 1.05f, 0.90f, 1.0f
  93. }, {
  94. ASTCENC_PRE_THOROUGH,
  95. 4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.4f, 1.2f, 0.95f, 0.0f
  96. }, {
  97. ASTCENC_PRE_VERYTHOROUGH,
  98. 4, 256, 128, 64, 98, 4, 6, 8, 6, 3, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 0.0f
  99. }, {
  100. ASTCENC_PRE_EXHAUSTIVE,
  101. 4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 0.0f
  102. }
  103. }};
  104. /**
  105. * @brief The static presets for low bandwidth encodings (64 <= x texels per block).
  106. */
  107. static const std::array<astcenc_preset_config, 6> preset_configs_low {{
  108. {
  109. ASTCENC_PRE_FASTEST,
  110. 2, 10, 6, 4, 40, 2, 2, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.80f, 1.0f
  111. }, {
  112. ASTCENC_PRE_FAST,
  113. 2, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.85f, 1.0f
  114. }, {
  115. ASTCENC_PRE_MEDIUM,
  116. 3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.5f, 1.1f, 1.05f, 0.90f, 1.0f
  117. }, {
  118. ASTCENC_PRE_THOROUGH,
  119. 4, 82, 60, 30, 93, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.3f, 1.2f, 0.97f, 1.0f
  120. }, {
  121. ASTCENC_PRE_VERYTHOROUGH,
  122. 4, 256, 128, 64, 98, 4, 6, 8, 5, 2, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 1.0f
  123. }, {
  124. ASTCENC_PRE_EXHAUSTIVE,
  125. 4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 1.0f
  126. }
  127. }};
  128. /**
  129. * @brief Validate CPU floating point meets assumptions made in the codec.
  130. *
  131. * The codec is written with the assumption that a float threaded through the @c if32 union will be
  132. * stored and reloaded as a 32-bit IEEE-754 float with round-to-nearest rounding. This is always the
  133. * case in an IEEE-754 compliant system, however not every system or compilation mode is actually
  134. * IEEE-754 compliant. This normally fails if the code is compiled with fast math enabled.
  135. *
  136. * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  137. */
  138. static astcenc_error validate_cpu_float()
  139. {
  140. if32 p;
  141. volatile float xprec_testval = 2.51f;
  142. p.f = xprec_testval + 12582912.0f;
  143. float q = p.f - 12582912.0f;
  144. if (q != 3.0f)
  145. {
  146. return ASTCENC_ERR_BAD_CPU_FLOAT;
  147. }
  148. return ASTCENC_SUCCESS;
  149. }
  150. /**
  151. * @brief Validate config profile.
  152. *
  153. * @param profile The profile to check.
  154. *
  155. * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  156. */
  157. static astcenc_error validate_profile(
  158. astcenc_profile profile
  159. ) {
  160. // Values in this enum are from an external user, so not guaranteed to be
  161. // bounded to the enum values
  162. switch (static_cast<int>(profile))
  163. {
  164. case ASTCENC_PRF_LDR_SRGB:
  165. case ASTCENC_PRF_LDR:
  166. case ASTCENC_PRF_HDR_RGB_LDR_A:
  167. case ASTCENC_PRF_HDR:
  168. return ASTCENC_SUCCESS;
  169. default:
  170. return ASTCENC_ERR_BAD_PROFILE;
  171. }
  172. }
  173. /**
  174. * @brief Validate block size.
  175. *
  176. * @param block_x The block x dimensions.
  177. * @param block_y The block y dimensions.
  178. * @param block_z The block z dimensions.
  179. *
  180. * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  181. */
  182. static astcenc_error validate_block_size(
  183. unsigned int block_x,
  184. unsigned int block_y,
  185. unsigned int block_z
  186. ) {
  187. // Test if this is a legal block size at all
  188. bool is_legal = (((block_z <= 1) && is_legal_2d_block_size(block_x, block_y)) ||
  189. ((block_z >= 2) && is_legal_3d_block_size(block_x, block_y, block_z)));
  190. if (!is_legal)
  191. {
  192. return ASTCENC_ERR_BAD_BLOCK_SIZE;
  193. }
  194. // Test if this build has sufficient capacity for this block size
  195. bool have_capacity = (block_x * block_y * block_z) <= BLOCK_MAX_TEXELS;
  196. if (!have_capacity)
  197. {
  198. return ASTCENC_ERR_NOT_IMPLEMENTED;
  199. }
  200. return ASTCENC_SUCCESS;
  201. }
  202. /**
  203. * @brief Validate flags.
  204. *
  205. * @param profile The profile to check.
  206. * @param flags The flags to check.
  207. *
  208. * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  209. */
  210. static astcenc_error validate_flags(
  211. astcenc_profile profile,
  212. unsigned int flags
  213. ) {
  214. // Flags field must not contain any unknown flag bits
  215. unsigned int exMask = ~ASTCENC_ALL_FLAGS;
  216. if (popcount(flags & exMask) != 0)
  217. {
  218. return ASTCENC_ERR_BAD_FLAGS;
  219. }
  220. // Flags field must only contain at most a single map type
  221. exMask = ASTCENC_FLG_MAP_NORMAL
  222. | ASTCENC_FLG_MAP_RGBM;
  223. if (popcount(flags & exMask) > 1)
  224. {
  225. return ASTCENC_ERR_BAD_FLAGS;
  226. }
  227. // Decode_unorm8 must only be used with an LDR profile
  228. bool is_unorm8 = flags & ASTCENC_FLG_USE_DECODE_UNORM8;
  229. bool is_hdr = (profile == ASTCENC_PRF_HDR) || (profile == ASTCENC_PRF_HDR_RGB_LDR_A);
  230. if (is_unorm8 && is_hdr)
  231. {
  232. return ASTCENC_ERR_BAD_DECODE_MODE;
  233. }
  234. return ASTCENC_SUCCESS;
  235. }
  236. #if !defined(ASTCENC_DECOMPRESS_ONLY)
  237. /**
  238. * @brief Validate single channel compression swizzle.
  239. *
  240. * @param swizzle The swizzle to check.
  241. *
  242. * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  243. */
  244. static astcenc_error validate_compression_swz(
  245. astcenc_swz swizzle
  246. ) {
  247. // Not all enum values are handled; SWZ_Z is invalid for compression
  248. switch (static_cast<int>(swizzle))
  249. {
  250. case ASTCENC_SWZ_R:
  251. case ASTCENC_SWZ_G:
  252. case ASTCENC_SWZ_B:
  253. case ASTCENC_SWZ_A:
  254. case ASTCENC_SWZ_0:
  255. case ASTCENC_SWZ_1:
  256. return ASTCENC_SUCCESS;
  257. default:
  258. return ASTCENC_ERR_BAD_SWIZZLE;
  259. }
  260. }
  261. /**
  262. * @brief Validate overall compression swizzle.
  263. *
  264. * @param swizzle The swizzle to check.
  265. *
  266. * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  267. */
  268. static astcenc_error validate_compression_swizzle(
  269. const astcenc_swizzle& swizzle
  270. ) {
  271. if (validate_compression_swz(swizzle.r) ||
  272. validate_compression_swz(swizzle.g) ||
  273. validate_compression_swz(swizzle.b) ||
  274. validate_compression_swz(swizzle.a))
  275. {
  276. return ASTCENC_ERR_BAD_SWIZZLE;
  277. }
  278. return ASTCENC_SUCCESS;
  279. }
  280. #endif
  281. /**
  282. * @brief Validate single channel decompression swizzle.
  283. *
  284. * @param swizzle The swizzle to check.
  285. *
  286. * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  287. */
  288. static astcenc_error validate_decompression_swz(
  289. astcenc_swz swizzle
  290. ) {
  291. // Values in this enum are from an external user, so not guaranteed to be
  292. // bounded to the enum values
  293. switch (static_cast<int>(swizzle))
  294. {
  295. case ASTCENC_SWZ_R:
  296. case ASTCENC_SWZ_G:
  297. case ASTCENC_SWZ_B:
  298. case ASTCENC_SWZ_A:
  299. case ASTCENC_SWZ_0:
  300. case ASTCENC_SWZ_1:
  301. case ASTCENC_SWZ_Z:
  302. return ASTCENC_SUCCESS;
  303. default:
  304. return ASTCENC_ERR_BAD_SWIZZLE;
  305. }
  306. }
  307. /**
  308. * @brief Validate overall decompression swizzle.
  309. *
  310. * @param swizzle The swizzle to check.
  311. *
  312. * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  313. */
  314. static astcenc_error validate_decompression_swizzle(
  315. const astcenc_swizzle& swizzle
  316. ) {
  317. if (validate_decompression_swz(swizzle.r) ||
  318. validate_decompression_swz(swizzle.g) ||
  319. validate_decompression_swz(swizzle.b) ||
  320. validate_decompression_swz(swizzle.a))
  321. {
  322. return ASTCENC_ERR_BAD_SWIZZLE;
  323. }
  324. return ASTCENC_SUCCESS;
  325. }
  326. /**
  327. * Validate that an incoming configuration is in-spec.
  328. *
  329. * This function can respond in two ways:
  330. *
  331. * * Numerical inputs that have valid ranges are clamped to those valid ranges. No error is thrown
  332. * for out-of-range inputs in this case.
  333. * * Numerical inputs and logic inputs are are logically invalid and which make no sense
  334. * algorithmically will return an error.
  335. *
  336. * @param[in,out] config The input compressor configuration.
  337. *
  338. * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
  339. */
  340. static astcenc_error validate_config(
  341. astcenc_config &config
  342. ) {
  343. astcenc_error status;
  344. status = validate_profile(config.profile);
  345. if (status != ASTCENC_SUCCESS)
  346. {
  347. return status;
  348. }
  349. status = validate_flags(config.profile, config.flags);
  350. if (status != ASTCENC_SUCCESS)
  351. {
  352. return status;
  353. }
  354. status = validate_block_size(config.block_x, config.block_y, config.block_z);
  355. if (status != ASTCENC_SUCCESS)
  356. {
  357. return status;
  358. }
  359. #if defined(ASTCENC_DECOMPRESS_ONLY)
  360. // Decompress-only builds only support decompress-only contexts
  361. if (!(config.flags & ASTCENC_FLG_DECOMPRESS_ONLY))
  362. {
  363. return ASTCENC_ERR_BAD_PARAM;
  364. }
  365. #endif
  366. config.rgbm_m_scale = astc::max(config.rgbm_m_scale, 1.0f);
  367. config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1u, 4u);
  368. config.tune_2partition_index_limit = astc::clamp(config.tune_2partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
  369. config.tune_3partition_index_limit = astc::clamp(config.tune_3partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
  370. config.tune_4partition_index_limit = astc::clamp(config.tune_4partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
  371. config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1u, 100u);
  372. config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1u);
  373. config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1u, TUNE_MAX_TRIAL_CANDIDATES);
  374. config.tune_2partitioning_candidate_limit = astc::clamp(config.tune_2partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES);
  375. config.tune_3partitioning_candidate_limit = astc::clamp(config.tune_3partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES);
  376. config.tune_4partitioning_candidate_limit = astc::clamp(config.tune_4partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES);
  377. config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f);
  378. config.tune_mse_overshoot = astc::max(config.tune_mse_overshoot, 1.0f);
  379. config.tune_2partition_early_out_limit_factor = astc::max(config.tune_2partition_early_out_limit_factor, 0.0f);
  380. config.tune_3partition_early_out_limit_factor = astc::max(config.tune_3partition_early_out_limit_factor, 0.0f);
  381. config.tune_2plane_early_out_limit_correlation = astc::max(config.tune_2plane_early_out_limit_correlation, 0.0f);
  382. // Specifying a zero weight color component is not allowed; force to small value
  383. float max_weight = astc::max(astc::max(config.cw_r_weight, config.cw_g_weight),
  384. astc::max(config.cw_b_weight, config.cw_a_weight));
  385. if (max_weight > 0.0f)
  386. {
  387. max_weight /= 1000.0f;
  388. config.cw_r_weight = astc::max(config.cw_r_weight, max_weight);
  389. config.cw_g_weight = astc::max(config.cw_g_weight, max_weight);
  390. config.cw_b_weight = astc::max(config.cw_b_weight, max_weight);
  391. config.cw_a_weight = astc::max(config.cw_a_weight, max_weight);
  392. }
  393. // If all color components error weights are zero then return an error
  394. else
  395. {
  396. return ASTCENC_ERR_BAD_PARAM;
  397. }
  398. return ASTCENC_SUCCESS;
  399. }
  400. /* See header for documentation. */
  401. astcenc_error astcenc_config_init(
  402. astcenc_profile profile,
  403. unsigned int block_x,
  404. unsigned int block_y,
  405. unsigned int block_z,
  406. float quality,
  407. unsigned int flags,
  408. astcenc_config* configp
  409. ) {
  410. astcenc_error status;
  411. status = validate_cpu_float();
  412. if (status != ASTCENC_SUCCESS)
  413. {
  414. return status;
  415. }
  416. // Zero init all config fields; although most of will be over written
  417. astcenc_config& config = *configp;
  418. std::memset(&config, 0, sizeof(config));
  419. // Process the block size
  420. block_z = astc::max(block_z, 1u); // For 2D blocks Z==0 is accepted, but convert to 1
  421. status = validate_block_size(block_x, block_y, block_z);
  422. if (status != ASTCENC_SUCCESS)
  423. {
  424. return status;
  425. }
  426. config.block_x = block_x;
  427. config.block_y = block_y;
  428. config.block_z = block_z;
  429. float texels = static_cast<float>(block_x * block_y * block_z);
  430. float ltexels = logf(texels) / logf(10.0f);
  431. // Process the performance quality level or preset; note that this must be done before we
  432. // process any additional settings, such as color profile and flags, which may replace some of
  433. // these settings with more use case tuned values
  434. if (quality < ASTCENC_PRE_FASTEST ||
  435. quality > ASTCENC_PRE_EXHAUSTIVE)
  436. {
  437. return ASTCENC_ERR_BAD_QUALITY;
  438. }
  439. static const std::array<astcenc_preset_config, 6>* preset_configs;
  440. int texels_int = block_x * block_y * block_z;
  441. if (texels_int < 25)
  442. {
  443. preset_configs = &preset_configs_high;
  444. }
  445. else if (texels_int < 64)
  446. {
  447. preset_configs = &preset_configs_mid;
  448. }
  449. else
  450. {
  451. preset_configs = &preset_configs_low;
  452. }
  453. // Determine which preset to use, or which pair to interpolate
  454. size_t start;
  455. size_t end;
  456. for (end = 0; end < preset_configs->size(); end++)
  457. {
  458. if ((*preset_configs)[end].quality >= quality)
  459. {
  460. break;
  461. }
  462. }
  463. start = end == 0 ? 0 : end - 1;
  464. // Start and end node are the same - so just transfer the values.
  465. if (start == end)
  466. {
  467. config.tune_partition_count_limit = (*preset_configs)[start].tune_partition_count_limit;
  468. config.tune_2partition_index_limit = (*preset_configs)[start].tune_2partition_index_limit;
  469. config.tune_3partition_index_limit = (*preset_configs)[start].tune_3partition_index_limit;
  470. config.tune_4partition_index_limit = (*preset_configs)[start].tune_4partition_index_limit;
  471. config.tune_block_mode_limit = (*preset_configs)[start].tune_block_mode_limit;
  472. config.tune_refinement_limit = (*preset_configs)[start].tune_refinement_limit;
  473. config.tune_candidate_limit = (*preset_configs)[start].tune_candidate_limit;
  474. config.tune_2partitioning_candidate_limit = (*preset_configs)[start].tune_2partitioning_candidate_limit;
  475. config.tune_3partitioning_candidate_limit = (*preset_configs)[start].tune_3partitioning_candidate_limit;
  476. config.tune_4partitioning_candidate_limit = (*preset_configs)[start].tune_4partitioning_candidate_limit;
  477. config.tune_db_limit = astc::max((*preset_configs)[start].tune_db_limit_a_base - 35 * ltexels,
  478. (*preset_configs)[start].tune_db_limit_b_base - 19 * ltexels);
  479. config.tune_mse_overshoot = (*preset_configs)[start].tune_mse_overshoot;
  480. config.tune_2partition_early_out_limit_factor = (*preset_configs)[start].tune_2partition_early_out_limit_factor;
  481. config.tune_3partition_early_out_limit_factor = (*preset_configs)[start].tune_3partition_early_out_limit_factor;
  482. config.tune_2plane_early_out_limit_correlation = (*preset_configs)[start].tune_2plane_early_out_limit_correlation;
  483. config.tune_search_mode0_enable = (*preset_configs)[start].tune_search_mode0_enable;
  484. }
  485. // Start and end node are not the same - so interpolate between them
  486. else
  487. {
  488. auto& node_a = (*preset_configs)[start];
  489. auto& node_b = (*preset_configs)[end];
  490. float wt_range = node_b.quality - node_a.quality;
  491. assert(wt_range > 0);
  492. // Compute interpolation factors
  493. float wt_node_a = (node_b.quality - quality) / wt_range;
  494. float wt_node_b = (quality - node_a.quality) / wt_range;
  495. #define LERP(param) ((node_a.param * wt_node_a) + (node_b.param * wt_node_b))
  496. #define LERPI(param) astc::flt2int_rtn(\
  497. (static_cast<float>(node_a.param) * wt_node_a) + \
  498. (static_cast<float>(node_b.param) * wt_node_b))
  499. #define LERPUI(param) static_cast<unsigned int>(LERPI(param))
  500. config.tune_partition_count_limit = LERPI(tune_partition_count_limit);
  501. config.tune_2partition_index_limit = LERPI(tune_2partition_index_limit);
  502. config.tune_3partition_index_limit = LERPI(tune_3partition_index_limit);
  503. config.tune_4partition_index_limit = LERPI(tune_4partition_index_limit);
  504. config.tune_block_mode_limit = LERPI(tune_block_mode_limit);
  505. config.tune_refinement_limit = LERPI(tune_refinement_limit);
  506. config.tune_candidate_limit = LERPUI(tune_candidate_limit);
  507. config.tune_2partitioning_candidate_limit = LERPUI(tune_2partitioning_candidate_limit);
  508. config.tune_3partitioning_candidate_limit = LERPUI(tune_3partitioning_candidate_limit);
  509. config.tune_4partitioning_candidate_limit = LERPUI(tune_4partitioning_candidate_limit);
  510. config.tune_db_limit = astc::max(LERP(tune_db_limit_a_base) - 35 * ltexels,
  511. LERP(tune_db_limit_b_base) - 19 * ltexels);
  512. config.tune_mse_overshoot = LERP(tune_mse_overshoot);
  513. config.tune_2partition_early_out_limit_factor = LERP(tune_2partition_early_out_limit_factor);
  514. config.tune_3partition_early_out_limit_factor = LERP(tune_3partition_early_out_limit_factor);
  515. config.tune_2plane_early_out_limit_correlation = LERP(tune_2plane_early_out_limit_correlation);
  516. config.tune_search_mode0_enable = LERP(tune_search_mode0_enable);
  517. #undef LERP
  518. #undef LERPI
  519. #undef LERPUI
  520. }
  521. // Set heuristics to the defaults for each color profile
  522. config.cw_r_weight = 1.0f;
  523. config.cw_g_weight = 1.0f;
  524. config.cw_b_weight = 1.0f;
  525. config.cw_a_weight = 1.0f;
  526. config.a_scale_radius = 0;
  527. config.rgbm_m_scale = 0.0f;
  528. config.profile = profile;
  529. // Values in this enum are from an external user, so not guaranteed to be
  530. // bounded to the enum values
  531. switch (static_cast<int>(profile))
  532. {
  533. case ASTCENC_PRF_LDR:
  534. case ASTCENC_PRF_LDR_SRGB:
  535. break;
  536. case ASTCENC_PRF_HDR_RGB_LDR_A:
  537. case ASTCENC_PRF_HDR:
  538. config.tune_db_limit = 999.0f;
  539. config.tune_search_mode0_enable = 0.0f;
  540. break;
  541. default:
  542. return ASTCENC_ERR_BAD_PROFILE;
  543. }
  544. // Flags field must not contain any unknown flag bits
  545. status = validate_flags(profile, flags);
  546. if (status != ASTCENC_SUCCESS)
  547. {
  548. return status;
  549. }
  550. if (flags & ASTCENC_FLG_MAP_NORMAL)
  551. {
  552. // Normal map encoding uses L+A blocks, so allow one more partitioning
  553. // than normal. We need need fewer bits for endpoints, so more likely
  554. // to be able to use more partitions than an RGB/RGBA block
  555. config.tune_partition_count_limit = astc::min(config.tune_partition_count_limit + 1u, 4u);
  556. config.cw_g_weight = 0.0f;
  557. config.cw_b_weight = 0.0f;
  558. config.tune_2partition_early_out_limit_factor *= 1.5f;
  559. config.tune_3partition_early_out_limit_factor *= 1.5f;
  560. config.tune_2plane_early_out_limit_correlation = 0.99f;
  561. // Normals are prone to blocking artifacts on smooth curves
  562. // so force compressor to try harder here ...
  563. config.tune_db_limit *= 1.03f;
  564. }
  565. else if (flags & ASTCENC_FLG_MAP_RGBM)
  566. {
  567. config.rgbm_m_scale = 5.0f;
  568. config.cw_a_weight = 2.0f * config.rgbm_m_scale;
  569. }
  570. else // (This is color data)
  571. {
  572. // This is a very basic perceptual metric for RGB color data, which weights error
  573. // significance by the perceptual luminance contribution of each color channel. For
  574. // luminance the usual weights to compute luminance from a linear RGB value are as
  575. // follows:
  576. //
  577. // l = r * 0.3 + g * 0.59 + b * 0.11
  578. //
  579. // ... but we scale these up to keep a better balance between color and alpha. Note
  580. // that if the content is using alpha we'd recommend using the -a option to weight
  581. // the color contribution by the alpha transparency.
  582. if (flags & ASTCENC_FLG_USE_PERCEPTUAL)
  583. {
  584. config.cw_r_weight = 0.30f * 2.25f;
  585. config.cw_g_weight = 0.59f * 2.25f;
  586. config.cw_b_weight = 0.11f * 2.25f;
  587. }
  588. }
  589. config.flags = flags;
  590. return ASTCENC_SUCCESS;
  591. }
  592. /* See header for documentation. */
  593. astcenc_error astcenc_context_alloc(
  594. const astcenc_config* configp,
  595. unsigned int thread_count,
  596. astcenc_context** context
  597. ) {
  598. astcenc_error status;
  599. const astcenc_config& config = *configp;
  600. status = validate_cpu_float();
  601. if (status != ASTCENC_SUCCESS)
  602. {
  603. return status;
  604. }
  605. if (thread_count == 0)
  606. {
  607. return ASTCENC_ERR_BAD_PARAM;
  608. }
  609. #if defined(ASTCENC_DIAGNOSTICS)
  610. // Force single threaded compressor use in diagnostic mode.
  611. if (thread_count != 1)
  612. {
  613. return ASTCENC_ERR_BAD_PARAM;
  614. }
  615. #endif
  616. astcenc_context* ctxo = new astcenc_context;
  617. astcenc_contexti* ctx = &ctxo->context;
  618. ctx->thread_count = thread_count;
  619. ctx->config = config;
  620. ctx->working_buffers = nullptr;
  621. // These are allocated per-compress, as they depend on image size
  622. ctx->input_alpha_averages = nullptr;
  623. // Copy the config first and validate the copy (we may modify it)
  624. status = validate_config(ctx->config);
  625. if (status != ASTCENC_SUCCESS)
  626. {
  627. delete ctxo;
  628. return status;
  629. }
  630. ctx->bsd = aligned_malloc<block_size_descriptor>(sizeof(block_size_descriptor), ASTCENC_VECALIGN);
  631. if (!ctx->bsd)
  632. {
  633. delete ctxo;
  634. return ASTCENC_ERR_OUT_OF_MEM;
  635. }
  636. bool can_omit_modes = static_cast<bool>(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY);
  637. init_block_size_descriptor(config.block_x, config.block_y, config.block_z,
  638. can_omit_modes,
  639. config.tune_partition_count_limit,
  640. static_cast<float>(config.tune_block_mode_limit) / 100.0f,
  641. *ctx->bsd);
  642. #if !defined(ASTCENC_DECOMPRESS_ONLY)
  643. // Do setup only needed by compression
  644. if (!(ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY))
  645. {
  646. // Turn a dB limit into a per-texel error for faster use later
  647. if ((ctx->config.profile == ASTCENC_PRF_LDR) || (ctx->config.profile == ASTCENC_PRF_LDR_SRGB))
  648. {
  649. ctx->config.tune_db_limit = astc::pow(0.1f, ctx->config.tune_db_limit * 0.1f) * 65535.0f * 65535.0f;
  650. }
  651. else
  652. {
  653. ctx->config.tune_db_limit = 0.0f;
  654. }
  655. size_t worksize = sizeof(compression_working_buffers) * thread_count;
  656. ctx->working_buffers = aligned_malloc<compression_working_buffers>(worksize, ASTCENC_VECALIGN);
  657. static_assert((ASTCENC_VECALIGN == 0) || ((sizeof(compression_working_buffers) % ASTCENC_VECALIGN) == 0),
  658. "compression_working_buffers size must be multiple of vector alignment");
  659. if (!ctx->working_buffers)
  660. {
  661. aligned_free<block_size_descriptor>(ctx->bsd);
  662. delete ctxo;
  663. *context = nullptr;
  664. return ASTCENC_ERR_OUT_OF_MEM;
  665. }
  666. }
  667. #endif
  668. #if defined(ASTCENC_DIAGNOSTICS)
  669. ctx->trace_log = new TraceLog(ctx->config.trace_file_path);
  670. if (!ctx->trace_log->m_file)
  671. {
  672. return ASTCENC_ERR_DTRACE_FAILURE;
  673. }
  674. trace_add_data("block_x", config.block_x);
  675. trace_add_data("block_y", config.block_y);
  676. trace_add_data("block_z", config.block_z);
  677. #endif
  678. *context = ctxo;
  679. #if !defined(ASTCENC_DECOMPRESS_ONLY)
  680. prepare_angular_tables();
  681. #endif
  682. return ASTCENC_SUCCESS;
  683. }
  684. /* See header dor documentation. */
  685. void astcenc_context_free(
  686. astcenc_context* ctxo
  687. ) {
  688. if (ctxo)
  689. {
  690. astcenc_contexti* ctx = &ctxo->context;
  691. aligned_free<compression_working_buffers>(ctx->working_buffers);
  692. aligned_free<block_size_descriptor>(ctx->bsd);
  693. #if defined(ASTCENC_DIAGNOSTICS)
  694. delete ctx->trace_log;
  695. #endif
  696. delete ctxo;
  697. }
  698. }
  699. #if !defined(ASTCENC_DECOMPRESS_ONLY)
  700. /**
  701. * @brief Compress an image, after any preflight has completed.
  702. *
  703. * @param[out] ctxo The compressor context.
  704. * @param thread_index The thread index.
  705. * @param image The intput image.
  706. * @param swizzle The input swizzle.
  707. * @param[out] buffer The output array for the compressed data.
  708. */
  709. static void compress_image(
  710. astcenc_context& ctxo,
  711. unsigned int thread_index,
  712. const astcenc_image& image,
  713. const astcenc_swizzle& swizzle,
  714. uint8_t* buffer
  715. ) {
  716. astcenc_contexti& ctx = ctxo.context;
  717. const block_size_descriptor& bsd = *ctx.bsd;
  718. astcenc_profile decode_mode = ctx.config.profile;
  719. image_block blk;
  720. int block_x = bsd.xdim;
  721. int block_y = bsd.ydim;
  722. int block_z = bsd.zdim;
  723. blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z);
  724. int dim_x = image.dim_x;
  725. int dim_y = image.dim_y;
  726. int dim_z = image.dim_z;
  727. int xblocks = (dim_x + block_x - 1) / block_x;
  728. int yblocks = (dim_y + block_y - 1) / block_y;
  729. int zblocks = (dim_z + block_z - 1) / block_z;
  730. int block_count = zblocks * yblocks * xblocks;
  731. int row_blocks = xblocks;
  732. int plane_blocks = xblocks * yblocks;
  733. blk.decode_unorm8 = ctxo.context.config.flags & ASTCENC_FLG_USE_DECODE_UNORM8;
  734. // Populate the block channel weights
  735. blk.channel_weight = vfloat4(ctx.config.cw_r_weight,
  736. ctx.config.cw_g_weight,
  737. ctx.config.cw_b_weight,
  738. ctx.config.cw_a_weight);
  739. // Use preallocated scratch buffer
  740. auto& temp_buffers = ctx.working_buffers[thread_index];
  741. // Only the first thread actually runs the initializer
  742. ctxo.manage_compress.init(block_count, ctx.config.progress_callback);
  743. // Determine if we can use an optimized load function
  744. bool needs_swz = (swizzle.r != ASTCENC_SWZ_R) || (swizzle.g != ASTCENC_SWZ_G) ||
  745. (swizzle.b != ASTCENC_SWZ_B) || (swizzle.a != ASTCENC_SWZ_A);
  746. bool needs_hdr = (decode_mode == ASTCENC_PRF_HDR) ||
  747. (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A);
  748. bool use_fast_load = !needs_swz && !needs_hdr &&
  749. block_z == 1 && image.data_type == ASTCENC_TYPE_U8;
  750. auto load_func = load_image_block;
  751. if (use_fast_load)
  752. {
  753. load_func = load_image_block_fast_ldr;
  754. }
  755. // All threads run this processing loop until there is no work remaining
  756. while (true)
  757. {
  758. unsigned int count;
  759. unsigned int base = ctxo.manage_compress.get_task_assignment(16, count);
  760. if (!count)
  761. {
  762. break;
  763. }
  764. for (unsigned int i = base; i < base + count; i++)
  765. {
  766. // Decode i into x, y, z block indices
  767. int z = i / plane_blocks;
  768. unsigned int rem = i - (z * plane_blocks);
  769. int y = rem / row_blocks;
  770. int x = rem - (y * row_blocks);
  771. // Test if we can apply some basic alpha-scale RDO
  772. bool use_full_block = true;
  773. if (ctx.config.a_scale_radius != 0 && block_z == 1)
  774. {
  775. int start_x = x * block_x;
  776. int end_x = astc::min(dim_x, start_x + block_x);
  777. int start_y = y * block_y;
  778. int end_y = astc::min(dim_y, start_y + block_y);
  779. // SATs accumulate error, so don't test exactly zero. Test for
  780. // less than 1 alpha in the expanded block footprint that
  781. // includes the alpha radius.
  782. int x_footprint = block_x + 2 * (ctx.config.a_scale_radius - 1);
  783. int y_footprint = block_y + 2 * (ctx.config.a_scale_radius - 1);
  784. float footprint = static_cast<float>(x_footprint * y_footprint);
  785. float threshold = 0.9f / (255.0f * footprint);
  786. // Do we have any alpha values?
  787. use_full_block = false;
  788. for (int ay = start_y; ay < end_y; ay++)
  789. {
  790. for (int ax = start_x; ax < end_x; ax++)
  791. {
  792. float a_avg = ctx.input_alpha_averages[ay * dim_x + ax];
  793. if (a_avg > threshold)
  794. {
  795. use_full_block = true;
  796. ax = end_x;
  797. ay = end_y;
  798. }
  799. }
  800. }
  801. }
  802. // Fetch the full block for compression
  803. if (use_full_block)
  804. {
  805. load_func(decode_mode, image, blk, bsd, x * block_x, y * block_y, z * block_z, swizzle);
  806. // Scale RGB error contribution by the maximum alpha in the block
  807. // This encourages preserving alpha accuracy in regions with high
  808. // transparency, and can buy up to 0.5 dB PSNR.
  809. if (ctx.config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT)
  810. {
  811. float alpha_scale = blk.data_max.lane<3>() * (1.0f / 65535.0f);
  812. blk.channel_weight = vfloat4(ctx.config.cw_r_weight * alpha_scale,
  813. ctx.config.cw_g_weight * alpha_scale,
  814. ctx.config.cw_b_weight * alpha_scale,
  815. ctx.config.cw_a_weight);
  816. }
  817. }
  818. // Apply alpha scale RDO - substitute constant color block
  819. else
  820. {
  821. blk.origin_texel = vfloat4::zero();
  822. blk.data_min = vfloat4::zero();
  823. blk.data_mean = vfloat4::zero();
  824. blk.data_max = vfloat4::zero();
  825. blk.grayscale = true;
  826. }
  827. int offset = ((z * yblocks + y) * xblocks + x) * 16;
  828. uint8_t *bp = buffer + offset;
  829. compress_block(ctx, blk, bp, temp_buffers);
  830. }
  831. ctxo.manage_compress.complete_task_assignment(count);
  832. }
  833. }
  834. /**
  835. * @brief Compute regional averages in an image.
  836. *
  837. * This function can be called by multiple threads, but only after a single
  838. * thread calls the setup function @c init_compute_averages().
  839. *
  840. * Results are written back into @c img->input_alpha_averages.
  841. *
  842. * @param[out] ctx The context.
  843. * @param ag The average and variance arguments created during setup.
  844. */
  845. static void compute_averages(
  846. astcenc_context& ctx,
  847. const avg_args &ag
  848. ) {
  849. pixel_region_args arg = ag.arg;
  850. arg.work_memory = new vfloat4[ag.work_memory_size];
  851. int size_x = ag.img_size_x;
  852. int size_y = ag.img_size_y;
  853. int size_z = ag.img_size_z;
  854. int step_xy = ag.blk_size_xy;
  855. int step_z = ag.blk_size_z;
  856. int y_tasks = (size_y + step_xy - 1) / step_xy;
  857. // All threads run this processing loop until there is no work remaining
  858. while (true)
  859. {
  860. unsigned int count;
  861. unsigned int base = ctx.manage_avg.get_task_assignment(16, count);
  862. if (!count)
  863. {
  864. break;
  865. }
  866. for (unsigned int i = base; i < base + count; i++)
  867. {
  868. int z = (i / (y_tasks)) * step_z;
  869. int y = (i - (z * y_tasks)) * step_xy;
  870. arg.size_z = astc::min(step_z, size_z - z);
  871. arg.offset_z = z;
  872. arg.size_y = astc::min(step_xy, size_y - y);
  873. arg.offset_y = y;
  874. for (int x = 0; x < size_x; x += step_xy)
  875. {
  876. arg.size_x = astc::min(step_xy, size_x - x);
  877. arg.offset_x = x;
  878. compute_pixel_region_variance(ctx.context, arg);
  879. }
  880. }
  881. ctx.manage_avg.complete_task_assignment(count);
  882. }
  883. delete[] arg.work_memory;
  884. }
  885. #endif
  886. /* See header for documentation. */
  887. astcenc_error astcenc_compress_image(
  888. astcenc_context* ctxo,
  889. astcenc_image* imagep,
  890. const astcenc_swizzle* swizzle,
  891. uint8_t* data_out,
  892. size_t data_len,
  893. unsigned int thread_index
  894. ) {
  895. #if defined(ASTCENC_DECOMPRESS_ONLY)
  896. (void)ctxo;
  897. (void)imagep;
  898. (void)swizzle;
  899. (void)data_out;
  900. (void)data_len;
  901. (void)thread_index;
  902. return ASTCENC_ERR_BAD_CONTEXT;
  903. #else
  904. astcenc_contexti* ctx = &ctxo->context;
  905. astcenc_error status;
  906. astcenc_image& image = *imagep;
  907. if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)
  908. {
  909. return ASTCENC_ERR_BAD_CONTEXT;
  910. }
  911. status = validate_compression_swizzle(*swizzle);
  912. if (status != ASTCENC_SUCCESS)
  913. {
  914. return status;
  915. }
  916. if (thread_index >= ctx->thread_count)
  917. {
  918. return ASTCENC_ERR_BAD_PARAM;
  919. }
  920. unsigned int block_x = ctx->config.block_x;
  921. unsigned int block_y = ctx->config.block_y;
  922. unsigned int block_z = ctx->config.block_z;
  923. unsigned int xblocks = (image.dim_x + block_x - 1) / block_x;
  924. unsigned int yblocks = (image.dim_y + block_y - 1) / block_y;
  925. unsigned int zblocks = (image.dim_z + block_z - 1) / block_z;
  926. // Check we have enough output space (16 bytes per block)
  927. size_t size_needed = xblocks * yblocks * zblocks * 16;
  928. if (data_len < size_needed)
  929. {
  930. return ASTCENC_ERR_OUT_OF_MEM;
  931. }
  932. // If context thread count is one then implicitly reset
  933. if (ctx->thread_count == 1)
  934. {
  935. astcenc_compress_reset(ctxo);
  936. }
  937. if (ctx->config.a_scale_radius != 0)
  938. {
  939. // First thread to enter will do setup, other threads will subsequently
  940. // enter the critical section but simply skip over the initialization
  941. auto init_avg = [ctx, &image, swizzle]() {
  942. // Perform memory allocations for the destination buffers
  943. size_t texel_count = image.dim_x * image.dim_y * image.dim_z;
  944. ctx->input_alpha_averages = new float[texel_count];
  945. return init_compute_averages(
  946. image, ctx->config.a_scale_radius, *swizzle,
  947. ctx->avg_preprocess_args);
  948. };
  949. // Only the first thread actually runs the initializer
  950. ctxo->manage_avg.init(init_avg);
  951. // All threads will enter this function and dynamically grab work
  952. compute_averages(*ctxo, ctx->avg_preprocess_args);
  953. }
  954. // Wait for compute_averages to complete before compressing
  955. ctxo->manage_avg.wait();
  956. compress_image(*ctxo, thread_index, image, *swizzle, data_out);
  957. // Wait for compress to complete before freeing memory
  958. ctxo->manage_compress.wait();
  959. auto term_compress = [ctx]() {
  960. delete[] ctx->input_alpha_averages;
  961. ctx->input_alpha_averages = nullptr;
  962. };
  963. // Only the first thread to arrive actually runs the term
  964. ctxo->manage_compress.term(term_compress);
  965. return ASTCENC_SUCCESS;
  966. #endif
  967. }
  968. /* See header for documentation. */
  969. astcenc_error astcenc_compress_reset(
  970. astcenc_context* ctxo
  971. ) {
  972. #if defined(ASTCENC_DECOMPRESS_ONLY)
  973. (void)ctxo;
  974. return ASTCENC_ERR_BAD_CONTEXT;
  975. #else
  976. astcenc_contexti* ctx = &ctxo->context;
  977. if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)
  978. {
  979. return ASTCENC_ERR_BAD_CONTEXT;
  980. }
  981. ctxo->manage_avg.reset();
  982. ctxo->manage_compress.reset();
  983. return ASTCENC_SUCCESS;
  984. #endif
  985. }
  986. /* See header for documentation. */
  987. astcenc_error astcenc_decompress_image(
  988. astcenc_context* ctxo,
  989. const uint8_t* data,
  990. size_t data_len,
  991. astcenc_image* image_outp,
  992. const astcenc_swizzle* swizzle,
  993. unsigned int thread_index
  994. ) {
  995. astcenc_error status;
  996. astcenc_image& image_out = *image_outp;
  997. astcenc_contexti* ctx = &ctxo->context;
  998. // Today this doesn't matter (working set on stack) but might in future ...
  999. if (thread_index >= ctx->thread_count)
  1000. {
  1001. return ASTCENC_ERR_BAD_PARAM;
  1002. }
  1003. status = validate_decompression_swizzle(*swizzle);
  1004. if (status != ASTCENC_SUCCESS)
  1005. {
  1006. return status;
  1007. }
  1008. unsigned int block_x = ctx->config.block_x;
  1009. unsigned int block_y = ctx->config.block_y;
  1010. unsigned int block_z = ctx->config.block_z;
  1011. unsigned int xblocks = (image_out.dim_x + block_x - 1) / block_x;
  1012. unsigned int yblocks = (image_out.dim_y + block_y - 1) / block_y;
  1013. unsigned int zblocks = (image_out.dim_z + block_z - 1) / block_z;
  1014. unsigned int block_count = zblocks * yblocks * xblocks;
  1015. int row_blocks = xblocks;
  1016. int plane_blocks = xblocks * yblocks;
  1017. // Check we have enough output space (16 bytes per block)
  1018. size_t size_needed = xblocks * yblocks * zblocks * 16;
  1019. if (data_len < size_needed)
  1020. {
  1021. return ASTCENC_ERR_OUT_OF_MEM;
  1022. }
  1023. image_block blk {};
  1024. blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z);
  1025. // Decode mode inferred from the output data type
  1026. blk.decode_unorm8 = image_out.data_type == ASTCENC_TYPE_U8;
  1027. // If context thread count is one then implicitly reset
  1028. if (ctx->thread_count == 1)
  1029. {
  1030. astcenc_decompress_reset(ctxo);
  1031. }
  1032. // Only the first thread actually runs the initializer
  1033. ctxo->manage_decompress.init(block_count, nullptr);
  1034. // All threads run this processing loop until there is no work remaining
  1035. while (true)
  1036. {
  1037. unsigned int count;
  1038. unsigned int base = ctxo->manage_decompress.get_task_assignment(128, count);
  1039. if (!count)
  1040. {
  1041. break;
  1042. }
  1043. for (unsigned int i = base; i < base + count; i++)
  1044. {
  1045. // Decode i into x, y, z block indices
  1046. int z = i / plane_blocks;
  1047. unsigned int rem = i - (z * plane_blocks);
  1048. int y = rem / row_blocks;
  1049. int x = rem - (y * row_blocks);
  1050. unsigned int offset = (((z * yblocks + y) * xblocks) + x) * 16;
  1051. const uint8_t* bp = data + offset;
  1052. symbolic_compressed_block scb;
  1053. physical_to_symbolic(*ctx->bsd, bp, scb);
  1054. decompress_symbolic_block(ctx->config.profile, *ctx->bsd,
  1055. x * block_x, y * block_y, z * block_z,
  1056. scb, blk);
  1057. store_image_block(image_out, blk, *ctx->bsd,
  1058. x * block_x, y * block_y, z * block_z, *swizzle);
  1059. }
  1060. ctxo->manage_decompress.complete_task_assignment(count);
  1061. }
  1062. return ASTCENC_SUCCESS;
  1063. }
  1064. /* See header for documentation. */
  1065. astcenc_error astcenc_decompress_reset(
  1066. astcenc_context* ctxo
  1067. ) {
  1068. ctxo->manage_decompress.reset();
  1069. return ASTCENC_SUCCESS;
  1070. }
  1071. /* See header for documentation. */
  1072. astcenc_error astcenc_get_block_info(
  1073. astcenc_context* ctxo,
  1074. const uint8_t data[16],
  1075. astcenc_block_info* info
  1076. ) {
  1077. #if defined(ASTCENC_DECOMPRESS_ONLY)
  1078. (void)ctxo;
  1079. (void)data;
  1080. (void)info;
  1081. return ASTCENC_ERR_BAD_CONTEXT;
  1082. #else
  1083. astcenc_contexti* ctx = &ctxo->context;
  1084. // Decode the compressed data into a symbolic form
  1085. symbolic_compressed_block scb;
  1086. physical_to_symbolic(*ctx->bsd, data, scb);
  1087. // Fetch the appropriate partition and decimation tables
  1088. block_size_descriptor& bsd = *ctx->bsd;
  1089. // Start from a clean slate
  1090. memset(info, 0, sizeof(*info));
  1091. // Basic info we can always populate
  1092. info->profile = ctx->config.profile;
  1093. info->block_x = ctx->config.block_x;
  1094. info->block_y = ctx->config.block_y;
  1095. info->block_z = ctx->config.block_z;
  1096. info->texel_count = bsd.texel_count;
  1097. // Check for error blocks first
  1098. info->is_error_block = scb.block_type == SYM_BTYPE_ERROR;
  1099. if (info->is_error_block)
  1100. {
  1101. return ASTCENC_SUCCESS;
  1102. }
  1103. // Check for constant color blocks second
  1104. info->is_constant_block = scb.block_type == SYM_BTYPE_CONST_F16 ||
  1105. scb.block_type == SYM_BTYPE_CONST_U16;
  1106. if (info->is_constant_block)
  1107. {
  1108. return ASTCENC_SUCCESS;
  1109. }
  1110. // Otherwise handle a full block ; known to be valid after conditions above have been checked
  1111. int partition_count = scb.partition_count;
  1112. const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
  1113. const block_mode& bm = bsd.get_block_mode(scb.block_mode);
  1114. const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
  1115. info->weight_x = di.weight_x;
  1116. info->weight_y = di.weight_y;
  1117. info->weight_z = di.weight_z;
  1118. info->is_dual_plane_block = bm.is_dual_plane != 0;
  1119. info->partition_count = scb.partition_count;
  1120. info->partition_index = scb.partition_index;
  1121. info->dual_plane_component = scb.plane2_component;
  1122. info->color_level_count = get_quant_level(scb.get_color_quant_mode());
  1123. info->weight_level_count = get_quant_level(bm.get_weight_quant_mode());
  1124. // Unpack color endpoints for each active partition
  1125. for (unsigned int i = 0; i < scb.partition_count; i++)
  1126. {
  1127. bool rgb_hdr;
  1128. bool a_hdr;
  1129. vint4 endpnt[2];
  1130. unpack_color_endpoints(ctx->config.profile,
  1131. scb.color_formats[i],
  1132. scb.color_values[i],
  1133. rgb_hdr, a_hdr,
  1134. endpnt[0], endpnt[1]);
  1135. // Store the color endpoint mode info
  1136. info->color_endpoint_modes[i] = scb.color_formats[i];
  1137. info->is_hdr_block = info->is_hdr_block || rgb_hdr || a_hdr;
  1138. // Store the unpacked and decoded color endpoint
  1139. vmask4 hdr_mask(rgb_hdr, rgb_hdr, rgb_hdr, a_hdr);
  1140. for (int j = 0; j < 2; j++)
  1141. {
  1142. vint4 color_lns = lns_to_sf16(endpnt[j]);
  1143. vint4 color_unorm = unorm16_to_sf16(endpnt[j]);
  1144. vint4 datai = select(color_unorm, color_lns, hdr_mask);
  1145. store(float16_to_float(datai), info->color_endpoints[i][j]);
  1146. }
  1147. }
  1148. // Unpack weights for each texel
  1149. int weight_plane1[BLOCK_MAX_TEXELS];
  1150. int weight_plane2[BLOCK_MAX_TEXELS];
  1151. unpack_weights(bsd, scb, di, bm.is_dual_plane, weight_plane1, weight_plane2);
  1152. for (unsigned int i = 0; i < bsd.texel_count; i++)
  1153. {
  1154. info->weight_values_plane1[i] = static_cast<float>(weight_plane1[i]) * (1.0f / WEIGHTS_TEXEL_SUM);
  1155. if (info->is_dual_plane_block)
  1156. {
  1157. info->weight_values_plane2[i] = static_cast<float>(weight_plane2[i]) * (1.0f / WEIGHTS_TEXEL_SUM);
  1158. }
  1159. }
  1160. // Unpack partition assignments for each texel
  1161. for (unsigned int i = 0; i < bsd.texel_count; i++)
  1162. {
  1163. info->partition_assignment[i] = pi.partition_of_texel[i];
  1164. }
  1165. return ASTCENC_SUCCESS;
  1166. #endif
  1167. }
  1168. /* See header for documentation. */
  1169. const char* astcenc_get_error_string(
  1170. astcenc_error status
  1171. ) {
  1172. // Values in this enum are from an external user, so not guaranteed to be
  1173. // bounded to the enum values
  1174. switch (static_cast<int>(status))
  1175. {
  1176. case ASTCENC_SUCCESS:
  1177. return "ASTCENC_SUCCESS";
  1178. case ASTCENC_ERR_OUT_OF_MEM:
  1179. return "ASTCENC_ERR_OUT_OF_MEM";
  1180. case ASTCENC_ERR_BAD_CPU_FLOAT:
  1181. return "ASTCENC_ERR_BAD_CPU_FLOAT";
  1182. case ASTCENC_ERR_BAD_PARAM:
  1183. return "ASTCENC_ERR_BAD_PARAM";
  1184. case ASTCENC_ERR_BAD_BLOCK_SIZE:
  1185. return "ASTCENC_ERR_BAD_BLOCK_SIZE";
  1186. case ASTCENC_ERR_BAD_PROFILE:
  1187. return "ASTCENC_ERR_BAD_PROFILE";
  1188. case ASTCENC_ERR_BAD_QUALITY:
  1189. return "ASTCENC_ERR_BAD_QUALITY";
  1190. case ASTCENC_ERR_BAD_FLAGS:
  1191. return "ASTCENC_ERR_BAD_FLAGS";
  1192. case ASTCENC_ERR_BAD_SWIZZLE:
  1193. return "ASTCENC_ERR_BAD_SWIZZLE";
  1194. case ASTCENC_ERR_BAD_CONTEXT:
  1195. return "ASTCENC_ERR_BAD_CONTEXT";
  1196. case ASTCENC_ERR_NOT_IMPLEMENTED:
  1197. return "ASTCENC_ERR_NOT_IMPLEMENTED";
  1198. case ASTCENC_ERR_BAD_DECODE_MODE:
  1199. return "ASTCENC_ERR_BAD_DECODE_MODE";
  1200. #if defined(ASTCENC_DIAGNOSTICS)
  1201. case ASTCENC_ERR_DTRACE_FAILURE:
  1202. return "ASTCENC_ERR_DTRACE_FAILURE";
  1203. #endif
  1204. default:
  1205. return nullptr;
  1206. }
  1207. }