basisu_backend.cpp 63 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779
  1. // basisu_backend.cpp
  2. // Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. //
  16. // TODO: This code originally supported full ETC1 and ETC1S, so there's some legacy stuff in here.
  17. //
  18. #include "basisu_backend.h"
  19. #if BASISU_SUPPORT_SSE
  20. #define CPPSPMD_NAME(a) a##_sse41
  21. #include "basisu_kernels_declares.h"
  22. #endif
  23. #define BASISU_FASTER_SELECTOR_REORDERING 0
  24. #define BASISU_BACKEND_VERIFY(c) verify(c, __LINE__);
  25. namespace basisu
  26. {
  27. // TODO
  28. static inline void verify(bool condition, int line)
  29. {
  30. if (!condition)
  31. {
  32. fprintf(stderr, "ERROR: basisu_backend: verify() failed at line %i!\n", line);
  33. abort();
  34. }
  35. }
  36. basisu_backend::basisu_backend()
  37. {
  38. clear();
  39. }
  40. void basisu_backend::clear()
  41. {
  42. m_pFront_end = NULL;
  43. m_params.clear();
  44. m_output.clear();
  45. }
  46. void basisu_backend::init(basisu_frontend* pFront_end, basisu_backend_params& params, const basisu_backend_slice_desc_vec& slice_descs)
  47. {
  48. m_pFront_end = pFront_end;
  49. m_params = params;
  50. m_slices = slice_descs;
  51. debug_printf("basisu_backend::Init: Slices: %u, ETC1S: %u, EndpointRDOQualityThresh: %f, SelectorRDOQualityThresh: %f\n",
  52. m_slices.size(),
  53. params.m_etc1s,
  54. params.m_endpoint_rdo_quality_thresh,
  55. params.m_selector_rdo_quality_thresh);
  56. debug_printf("Frontend endpoints: %u selectors: %u\n", m_pFront_end->get_total_endpoint_clusters(), m_pFront_end->get_total_selector_clusters());
  57. for (uint32_t i = 0; i < m_slices.size(); i++)
  58. {
  59. debug_printf("Slice: %u, OrigWidth: %u, OrigHeight: %u, Width: %u, Height: %u, NumBlocksX: %u, NumBlocksY: %u, FirstBlockIndex: %u\n",
  60. i,
  61. m_slices[i].m_orig_width, m_slices[i].m_orig_height,
  62. m_slices[i].m_width, m_slices[i].m_height,
  63. m_slices[i].m_num_blocks_x, m_slices[i].m_num_blocks_y,
  64. m_slices[i].m_first_block_index);
  65. }
  66. }
  67. void basisu_backend::create_endpoint_palette()
  68. {
  69. const basisu_frontend& r = *m_pFront_end;
  70. m_output.m_num_endpoints = r.get_total_endpoint_clusters();
  71. m_endpoint_palette.resize(r.get_total_endpoint_clusters());
  72. for (uint32_t i = 0; i < r.get_total_endpoint_clusters(); i++)
  73. {
  74. etc1_endpoint_palette_entry& e = m_endpoint_palette[i];
  75. e.m_color5_valid = r.get_endpoint_cluster_color_is_used(i, false);
  76. e.m_color5 = r.get_endpoint_cluster_unscaled_color(i, false);
  77. e.m_inten5 = r.get_endpoint_cluster_inten_table(i, false);
  78. BASISU_BACKEND_VERIFY(e.m_color5_valid);
  79. }
  80. }
  81. void basisu_backend::create_selector_palette()
  82. {
  83. const basisu_frontend& r = *m_pFront_end;
  84. m_output.m_num_selectors = r.get_total_selector_clusters();
  85. m_selector_palette.resize(r.get_total_selector_clusters());
  86. for (uint32_t i = 0; i < r.get_total_selector_clusters(); i++)
  87. {
  88. etc1_selector_palette_entry& s = m_selector_palette[i];
  89. const etc_block& selector_bits = r.get_selector_cluster_selector_bits(i);
  90. for (uint32_t y = 0; y < 4; y++)
  91. {
  92. for (uint32_t x = 0; x < 4; x++)
  93. {
  94. s[y * 4 + x] = static_cast<uint8_t>(selector_bits.get_selector(x, y));
  95. }
  96. }
  97. }
  98. }
  99. static const struct
  100. {
  101. int8_t m_dx, m_dy;
  102. } g_endpoint_preds[] =
  103. {
  104. { -1, 0 },
  105. { 0, -1 },
  106. { -1, -1 }
  107. };
  108. void basisu_backend::reoptimize_and_sort_endpoints_codebook(uint32_t total_block_endpoints_remapped, uint_vec& all_endpoint_indices)
  109. {
  110. basisu_frontend& r = *m_pFront_end;
  111. //const bool is_video = r.get_params().m_tex_type == basist::cBASISTexTypeVideoFrames;
  112. if (m_params.m_used_global_codebooks)
  113. {
  114. m_endpoint_remap_table_old_to_new.clear();
  115. m_endpoint_remap_table_old_to_new.resize(r.get_total_endpoint_clusters());
  116. for (uint32_t i = 0; i < r.get_total_endpoint_clusters(); i++)
  117. m_endpoint_remap_table_old_to_new[i] = i;
  118. }
  119. else
  120. {
  121. //if ((total_block_endpoints_remapped) && (m_params.m_compression_level > 0))
  122. if ((total_block_endpoints_remapped) && (m_params.m_compression_level > 1))
  123. {
  124. // We've changed the block endpoint indices, so we need to go and adjust the endpoint codebook (remove unused entries, optimize existing entries that have changed)
  125. uint_vec new_block_endpoints(get_total_blocks());
  126. for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
  127. {
  128. const uint32_t first_block_index = m_slices[slice_index].m_first_block_index;
  129. const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;
  130. const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;
  131. for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
  132. for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
  133. new_block_endpoints[first_block_index + block_x + block_y * num_blocks_x] = m_slice_encoder_blocks[slice_index](block_x, block_y).m_endpoint_index;
  134. }
  135. int_vec old_to_new_endpoint_indices;
  136. r.reoptimize_remapped_endpoints(new_block_endpoints, old_to_new_endpoint_indices, true);
  137. create_endpoint_palette();
  138. for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
  139. {
  140. //const uint32_t first_block_index = m_slices[slice_index].m_first_block_index;
  141. //const uint32_t width = m_slices[slice_index].m_width;
  142. //const uint32_t height = m_slices[slice_index].m_height;
  143. const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;
  144. const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;
  145. for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
  146. {
  147. for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
  148. {
  149. //const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x;
  150. encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);
  151. m.m_endpoint_index = old_to_new_endpoint_indices[m.m_endpoint_index];
  152. } // block_x
  153. } // block_y
  154. } // slice_index
  155. for (uint32_t i = 0; i < all_endpoint_indices.size(); i++)
  156. all_endpoint_indices[i] = old_to_new_endpoint_indices[all_endpoint_indices[i]];
  157. } //if (total_block_endpoints_remapped)
  158. // Sort endpoint codebook
  159. palette_index_reorderer reorderer;
  160. reorderer.init((uint32_t)all_endpoint_indices.size(), &all_endpoint_indices[0], r.get_total_endpoint_clusters(), nullptr, nullptr, 0);
  161. m_endpoint_remap_table_old_to_new = reorderer.get_remap_table();
  162. }
  163. // For endpoints, old_to_new[] may not be bijective!
  164. // Some "old" entries may be unused and don't get remapped into the "new" array.
  165. m_old_endpoint_was_used.clear();
  166. m_old_endpoint_was_used.resize(r.get_total_endpoint_clusters());
  167. uint32_t first_old_entry_index = UINT32_MAX;
  168. for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
  169. {
  170. const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x, num_blocks_y = m_slices[slice_index].m_num_blocks_y;
  171. for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
  172. {
  173. for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
  174. {
  175. encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);
  176. const uint32_t old_endpoint_index = m.m_endpoint_index;
  177. m_old_endpoint_was_used[old_endpoint_index] = true;
  178. first_old_entry_index = basisu::minimum(first_old_entry_index, old_endpoint_index);
  179. } // block_x
  180. } // block_y
  181. } // slice_index
  182. debug_printf("basisu_backend::reoptimize_and_sort_endpoints_codebook: First old entry index: %u\n", first_old_entry_index);
  183. m_new_endpoint_was_used.clear();
  184. m_new_endpoint_was_used.resize(r.get_total_endpoint_clusters());
  185. m_endpoint_remap_table_new_to_old.clear();
  186. m_endpoint_remap_table_new_to_old.resize(r.get_total_endpoint_clusters());
  187. // Set unused entries in the new array to point to the first used entry in the old array.
  188. m_endpoint_remap_table_new_to_old.set_all(first_old_entry_index);
  189. for (uint32_t old_index = 0; old_index < m_endpoint_remap_table_old_to_new.size(); old_index++)
  190. {
  191. if (m_old_endpoint_was_used[old_index])
  192. {
  193. const uint32_t new_index = m_endpoint_remap_table_old_to_new[old_index];
  194. m_new_endpoint_was_used[new_index] = true;
  195. m_endpoint_remap_table_new_to_old[new_index] = old_index;
  196. }
  197. }
  198. }
  199. void basisu_backend::sort_selector_codebook()
  200. {
  201. basisu_frontend& r = *m_pFront_end;
  202. m_selector_remap_table_new_to_old.resize(r.get_total_selector_clusters());
  203. if ((m_params.m_compression_level == 0) || (m_params.m_used_global_codebooks))
  204. {
  205. for (uint32_t i = 0; i < r.get_total_selector_clusters(); i++)
  206. m_selector_remap_table_new_to_old[i] = i;
  207. }
  208. else
  209. {
  210. m_selector_remap_table_new_to_old[0] = 0;
  211. uint32_t prev_selector_index = 0;
  212. int_vec remaining_selectors;
  213. remaining_selectors.reserve(r.get_total_selector_clusters() - 1);
  214. for (uint32_t i = 1; i < r.get_total_selector_clusters(); i++)
  215. remaining_selectors.push_back(i);
  216. uint_vec selector_palette_bytes(m_selector_palette.size());
  217. for (uint32_t i = 0; i < m_selector_palette.size(); i++)
  218. selector_palette_bytes[i] = m_selector_palette[i].get_byte(0) | (m_selector_palette[i].get_byte(1) << 8) | (m_selector_palette[i].get_byte(2) << 16) | (m_selector_palette[i].get_byte(3) << 24);
  219. // This is the traveling salesman problem.
  220. for (uint32_t i = 1; i < r.get_total_selector_clusters(); i++)
  221. {
  222. uint32_t best_hamming_dist = 100;
  223. uint32_t best_index = 0;
  224. #if BASISU_FASTER_SELECTOR_REORDERING
  225. const uint32_t step = (remaining_selectors.size() > 16) ? 16 : 1;
  226. for (uint32_t j = 0; j < remaining_selectors.size(); j += step)
  227. #else
  228. for (uint32_t j = 0; j < remaining_selectors.size(); j++)
  229. #endif
  230. {
  231. int selector_index = remaining_selectors[j];
  232. uint32_t k = selector_palette_bytes[prev_selector_index] ^ selector_palette_bytes[selector_index];
  233. uint32_t hamming_dist = g_hamming_dist[k & 0xFF] + g_hamming_dist[(k >> 8) & 0xFF] + g_hamming_dist[(k >> 16) & 0xFF] + g_hamming_dist[k >> 24];
  234. if (hamming_dist < best_hamming_dist)
  235. {
  236. best_hamming_dist = hamming_dist;
  237. best_index = j;
  238. if (best_hamming_dist <= 1)
  239. break;
  240. }
  241. }
  242. prev_selector_index = remaining_selectors[best_index];
  243. m_selector_remap_table_new_to_old[i] = prev_selector_index;
  244. remaining_selectors[best_index] = remaining_selectors.back();
  245. remaining_selectors.resize(remaining_selectors.size() - 1);
  246. }
  247. }
  248. m_selector_remap_table_old_to_new.resize(r.get_total_selector_clusters());
  249. for (uint32_t i = 0; i < m_selector_remap_table_new_to_old.size(); i++)
  250. m_selector_remap_table_old_to_new[m_selector_remap_table_new_to_old[i]] = i;
  251. }
  252. int basisu_backend::find_video_frame(int slice_index, int delta)
  253. {
  254. for (uint32_t s = 0; s < m_slices.size(); s++)
  255. {
  256. if ((int)m_slices[s].m_source_file_index != ((int)m_slices[slice_index].m_source_file_index + delta))
  257. continue;
  258. if (m_slices[s].m_mip_index != m_slices[slice_index].m_mip_index)
  259. continue;
  260. // Being super paranoid here.
  261. if (m_slices[s].m_num_blocks_x != (m_slices[slice_index].m_num_blocks_x))
  262. continue;
  263. if (m_slices[s].m_num_blocks_y != (m_slices[slice_index].m_num_blocks_y))
  264. continue;
  265. if (m_slices[s].m_alpha != (m_slices[slice_index].m_alpha))
  266. continue;
  267. return s;
  268. }
  269. return -1;
  270. }
  271. void basisu_backend::check_for_valid_cr_blocks()
  272. {
  273. basisu_frontend& r = *m_pFront_end;
  274. const bool is_video = r.get_params().m_tex_type == basist::cBASISTexTypeVideoFrames;
  275. if (!is_video)
  276. return;
  277. debug_printf("basisu_backend::check_for_valid_cr_blocks\n");
  278. uint32_t total_crs = 0;
  279. uint32_t total_invalid_crs = 0;
  280. for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
  281. {
  282. const bool is_iframe = m_slices[slice_index].m_iframe;
  283. //const uint32_t first_block_index = m_slices[slice_index].m_first_block_index;
  284. //const uint32_t width = m_slices[slice_index].m_width;
  285. //const uint32_t height = m_slices[slice_index].m_height;
  286. const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;
  287. const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;
  288. const int prev_frame_slice_index = find_video_frame(slice_index, -1);
  289. // If we don't have a previous frame, and we're not an i-frame, something is wrong.
  290. if ((prev_frame_slice_index < 0) && (!is_iframe))
  291. {
  292. BASISU_BACKEND_VERIFY(0);
  293. }
  294. if ((is_iframe) || (prev_frame_slice_index < 0))
  295. {
  296. // Ensure no blocks use CR's
  297. for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
  298. {
  299. for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
  300. {
  301. encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);
  302. BASISU_BACKEND_VERIFY(m.m_endpoint_predictor != basist::CR_ENDPOINT_PRED_INDEX);
  303. }
  304. }
  305. }
  306. else
  307. {
  308. // For blocks that use CR's, make sure the endpoints/selectors haven't really changed.
  309. for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
  310. {
  311. for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
  312. {
  313. encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);
  314. if (m.m_endpoint_predictor == basist::CR_ENDPOINT_PRED_INDEX)
  315. {
  316. total_crs++;
  317. encoder_block& prev_m = m_slice_encoder_blocks[prev_frame_slice_index](block_x, block_y);
  318. if ((m.m_endpoint_index != prev_m.m_endpoint_index) || (m.m_selector_index != prev_m.m_selector_index))
  319. {
  320. total_invalid_crs++;
  321. }
  322. }
  323. } // block_x
  324. } // block_y
  325. } // !slice_index
  326. } // slice_index
  327. debug_printf("Total CR's: %u, Total invalid CR's: %u\n", total_crs, total_invalid_crs);
  328. BASISU_BACKEND_VERIFY(total_invalid_crs == 0);
  329. }
  330. void basisu_backend::create_encoder_blocks()
  331. {
  332. debug_printf("basisu_backend::create_encoder_blocks\n");
  333. interval_timer tm;
  334. tm.start();
  335. basisu_frontend& r = *m_pFront_end;
  336. const bool is_video = r.get_params().m_tex_type == basist::cBASISTexTypeVideoFrames;
  337. m_slice_encoder_blocks.resize(m_slices.size());
  338. uint32_t total_endpoint_pred_missed = 0, total_endpoint_pred_hits = 0, total_block_endpoints_remapped = 0;
  339. uint_vec all_endpoint_indices;
  340. all_endpoint_indices.reserve(get_total_blocks());
  341. for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
  342. {
  343. const int prev_frame_slice_index = is_video ? find_video_frame(slice_index, -1) : -1;
  344. const bool is_iframe = m_slices[slice_index].m_iframe;
  345. const uint32_t first_block_index = m_slices[slice_index].m_first_block_index;
  346. //const uint32_t width = m_slices[slice_index].m_width;
  347. //const uint32_t height = m_slices[slice_index].m_height;
  348. const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;
  349. const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;
  350. m_slice_encoder_blocks[slice_index].resize(num_blocks_x, num_blocks_y);
  351. for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
  352. {
  353. for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
  354. {
  355. const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x;
  356. encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);
  357. m.m_endpoint_index = r.get_subblock_endpoint_cluster_index(block_index, 0);
  358. BASISU_BACKEND_VERIFY(r.get_subblock_endpoint_cluster_index(block_index, 0) == r.get_subblock_endpoint_cluster_index(block_index, 1));
  359. m.m_selector_index = r.get_block_selector_cluster_index(block_index);
  360. m.m_endpoint_predictor = basist::NO_ENDPOINT_PRED_INDEX;
  361. const uint32_t block_endpoint = m.m_endpoint_index;
  362. uint32_t best_endpoint_pred = UINT32_MAX;
  363. for (uint32_t endpoint_pred = 0; endpoint_pred < basist::NUM_ENDPOINT_PREDS; endpoint_pred++)
  364. {
  365. if ((is_video) && (endpoint_pred == basist::CR_ENDPOINT_PRED_INDEX))
  366. {
  367. if ((prev_frame_slice_index != -1) && (!is_iframe))
  368. {
  369. const uint32_t cur_endpoint = m_slice_encoder_blocks[slice_index](block_x, block_y).m_endpoint_index;
  370. const uint32_t cur_selector = m_slice_encoder_blocks[slice_index](block_x, block_y).m_selector_index;
  371. const uint32_t prev_endpoint = m_slice_encoder_blocks[prev_frame_slice_index](block_x, block_y).m_endpoint_index;
  372. const uint32_t prev_selector = m_slice_encoder_blocks[prev_frame_slice_index](block_x, block_y).m_selector_index;
  373. if ((cur_endpoint == prev_endpoint) && (cur_selector == prev_selector))
  374. {
  375. best_endpoint_pred = basist::CR_ENDPOINT_PRED_INDEX;
  376. m_slice_encoder_blocks[prev_frame_slice_index](block_x, block_y).m_is_cr_target = true;
  377. }
  378. }
  379. }
  380. else
  381. {
  382. int pred_block_x = block_x + g_endpoint_preds[endpoint_pred].m_dx;
  383. if ((pred_block_x < 0) || (pred_block_x >= (int)num_blocks_x))
  384. continue;
  385. int pred_block_y = block_y + g_endpoint_preds[endpoint_pred].m_dy;
  386. if ((pred_block_y < 0) || (pred_block_y >= (int)num_blocks_y))
  387. continue;
  388. uint32_t pred_endpoint = m_slice_encoder_blocks[slice_index](pred_block_x, pred_block_y).m_endpoint_index;
  389. if (pred_endpoint == block_endpoint)
  390. {
  391. if (endpoint_pred < best_endpoint_pred)
  392. {
  393. best_endpoint_pred = endpoint_pred;
  394. }
  395. }
  396. }
  397. } // endpoint_pred
  398. if (best_endpoint_pred != UINT32_MAX)
  399. {
  400. m.m_endpoint_predictor = best_endpoint_pred;
  401. total_endpoint_pred_hits++;
  402. }
  403. else if (m_params.m_endpoint_rdo_quality_thresh > 0.0f)
  404. {
  405. const pixel_block& src_pixels = r.get_source_pixel_block(block_index);
  406. etc_block etc_blk(r.get_output_block(block_index));
  407. uint64_t cur_err = etc_blk.evaluate_etc1_error(src_pixels.get_ptr(), r.get_params().m_perceptual);
  408. if (cur_err)
  409. {
  410. const uint64_t thresh_err = (uint64_t)(cur_err * maximum(1.0f, m_params.m_endpoint_rdo_quality_thresh));
  411. etc_block trial_etc_block(etc_blk);
  412. uint64_t best_err = UINT64_MAX;
  413. uint32_t best_endpoint_index = 0;
  414. best_endpoint_pred = UINT32_MAX;
  415. for (uint32_t endpoint_pred = 0; endpoint_pred < basist::NUM_ENDPOINT_PREDS; endpoint_pred++)
  416. {
  417. if ((is_video) && (endpoint_pred == basist::CR_ENDPOINT_PRED_INDEX))
  418. continue;
  419. int pred_block_x = block_x + g_endpoint_preds[endpoint_pred].m_dx;
  420. if ((pred_block_x < 0) || (pred_block_x >= (int)num_blocks_x))
  421. continue;
  422. int pred_block_y = block_y + g_endpoint_preds[endpoint_pred].m_dy;
  423. if ((pred_block_y < 0) || (pred_block_y >= (int)num_blocks_y))
  424. continue;
  425. uint32_t pred_endpoint_index = m_slice_encoder_blocks[slice_index](pred_block_x, pred_block_y).m_endpoint_index;
  426. uint32_t pred_inten = r.get_endpoint_cluster_inten_table(pred_endpoint_index, false);
  427. color_rgba pred_color = r.get_endpoint_cluster_unscaled_color(pred_endpoint_index, false);
  428. trial_etc_block.set_block_color5(pred_color, pred_color);
  429. trial_etc_block.set_inten_table(0, pred_inten);
  430. trial_etc_block.set_inten_table(1, pred_inten);
  431. color_rgba trial_colors[16];
  432. unpack_etc1(trial_etc_block, trial_colors);
  433. uint64_t trial_err = 0;
  434. if (r.get_params().m_perceptual)
  435. {
  436. for (uint32_t p = 0; p < 16; p++)
  437. {
  438. trial_err += color_distance(true, src_pixels.get_ptr()[p], trial_colors[p], false);
  439. if (trial_err > thresh_err)
  440. break;
  441. }
  442. }
  443. else
  444. {
  445. for (uint32_t p = 0; p < 16; p++)
  446. {
  447. trial_err += color_distance(false, src_pixels.get_ptr()[p], trial_colors[p], false);
  448. if (trial_err > thresh_err)
  449. break;
  450. }
  451. }
  452. if (trial_err <= thresh_err)
  453. {
  454. if ((trial_err < best_err) || ((trial_err == best_err) && (endpoint_pred < best_endpoint_pred)))
  455. {
  456. best_endpoint_pred = endpoint_pred;
  457. best_err = trial_err;
  458. best_endpoint_index = pred_endpoint_index;
  459. }
  460. }
  461. } // endpoint_pred
  462. if (best_endpoint_pred != UINT32_MAX)
  463. {
  464. m.m_endpoint_index = best_endpoint_index;
  465. m.m_endpoint_predictor = best_endpoint_pred;
  466. total_endpoint_pred_hits++;
  467. total_block_endpoints_remapped++;
  468. }
  469. else
  470. {
  471. total_endpoint_pred_missed++;
  472. }
  473. }
  474. }
  475. else
  476. {
  477. total_endpoint_pred_missed++;
  478. }
  479. if (m.m_endpoint_predictor == basist::NO_ENDPOINT_PRED_INDEX)
  480. {
  481. all_endpoint_indices.push_back(m.m_endpoint_index);
  482. }
  483. } // block_x
  484. } // block_y
  485. } // slice
  486. debug_printf("total_endpoint_pred_missed: %u (%3.2f%%) total_endpoint_pred_hit: %u (%3.2f%%), total_block_endpoints_remapped: %u (%3.2f%%)\n",
  487. total_endpoint_pred_missed, total_endpoint_pred_missed * 100.0f / get_total_blocks(),
  488. total_endpoint_pred_hits, total_endpoint_pred_hits * 100.0f / get_total_blocks(),
  489. total_block_endpoints_remapped, total_block_endpoints_remapped * 100.0f / get_total_blocks());
  490. reoptimize_and_sort_endpoints_codebook(total_block_endpoints_remapped, all_endpoint_indices);
  491. sort_selector_codebook();
  492. check_for_valid_cr_blocks();
  493. debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
  494. }
  495. void basisu_backend::compute_slice_crcs()
  496. {
  497. for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
  498. {
  499. //const uint32_t first_block_index = m_slices[slice_index].m_first_block_index;
  500. const uint32_t width = m_slices[slice_index].m_width;
  501. const uint32_t height = m_slices[slice_index].m_height;
  502. const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;
  503. const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;
  504. gpu_image gi;
  505. gi.init(texture_format::cETC1, width, height);
  506. for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
  507. {
  508. for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
  509. {
  510. //const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x;
  511. encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);
  512. {
  513. etc_block& output_block = *(etc_block*)gi.get_block_ptr(block_x, block_y);
  514. output_block.set_diff_bit(true);
  515. // Setting the flip bit to false to be compatible with the Khronos KDFS.
  516. //output_block.set_flip_bit(true);
  517. output_block.set_flip_bit(false);
  518. const uint32_t endpoint_index = m.m_endpoint_index;
  519. output_block.set_block_color5_etc1s(m_endpoint_palette[endpoint_index].m_color5);
  520. output_block.set_inten_tables_etc1s(m_endpoint_palette[endpoint_index].m_inten5);
  521. const uint32_t selector_idx = m.m_selector_index;
  522. const etc1_selector_palette_entry& selectors = m_selector_palette[selector_idx];
  523. for (uint32_t sy = 0; sy < 4; sy++)
  524. for (uint32_t sx = 0; sx < 4; sx++)
  525. output_block.set_selector(sx, sy, selectors(sx, sy));
  526. }
  527. } // block_x
  528. } // block_y
  529. m_output.m_slice_image_crcs[slice_index] = basist::crc16(gi.get_ptr(), gi.get_size_in_bytes(), 0);
  530. if (m_params.m_debug_images)
  531. {
  532. image gi_unpacked;
  533. gi.unpack(gi_unpacked);
  534. char buf[256];
  535. #ifdef _WIN32
  536. sprintf_s(buf, sizeof(buf), "basisu_backend_slice_%u.png", slice_index);
  537. #else
  538. snprintf(buf, sizeof(buf), "basisu_backend_slice_%u.png", slice_index);
  539. #endif
  540. save_png(buf, gi_unpacked);
  541. }
  542. } // slice_index
  543. }
  544. //uint32_t g_color_delta_hist[255 * 3 + 1];
  545. //uint32_t g_color_delta_bad_hist[255 * 3 + 1];
  546. // TODO: Split this into multiple methods.
  547. bool basisu_backend::encode_image()
  548. {
  549. basisu_frontend& r = *m_pFront_end;
  550. const bool is_video = r.get_params().m_tex_type == basist::cBASISTexTypeVideoFrames;
  551. uint32_t total_used_selector_history_buf = 0;
  552. uint32_t total_selector_indices_remapped = 0;
  553. basist::approx_move_to_front selector_history_buf(basist::MAX_SELECTOR_HISTORY_BUF_SIZE);
  554. histogram selector_history_buf_histogram(basist::MAX_SELECTOR_HISTORY_BUF_SIZE);
  555. histogram selector_histogram(r.get_total_selector_clusters() + basist::MAX_SELECTOR_HISTORY_BUF_SIZE + 1);
  556. histogram selector_history_buf_rle_histogram(1 << basist::SELECTOR_HISTORY_BUF_RLE_COUNT_BITS);
  557. basisu::vector<uint_vec> selector_syms(m_slices.size());
  558. const uint32_t SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX = r.get_total_selector_clusters();
  559. const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + basist::MAX_SELECTOR_HISTORY_BUF_SIZE;
  560. m_output.m_slice_image_crcs.resize(m_slices.size());
  561. histogram delta_endpoint_histogram(r.get_total_endpoint_clusters());
  562. histogram endpoint_pred_histogram(basist::ENDPOINT_PRED_TOTAL_SYMBOLS);
  563. basisu::vector<uint_vec> endpoint_pred_syms(m_slices.size());
  564. uint32_t total_endpoint_indices_remapped = 0;
  565. uint_vec block_endpoint_indices, block_selector_indices;
  566. interval_timer tm;
  567. tm.start();
  568. const int COLOR_DELTA_THRESH = 8;
  569. const int SEL_DIFF_THRESHOLD = 11;
  570. for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
  571. {
  572. //const int prev_frame_slice_index = is_video ? find_video_frame(slice_index, -1) : -1;
  573. //const int next_frame_slice_index = is_video ? find_video_frame(slice_index, 1) : -1;
  574. const uint32_t first_block_index = m_slices[slice_index].m_first_block_index;
  575. //const uint32_t width = m_slices[slice_index].m_width;
  576. //const uint32_t height = m_slices[slice_index].m_height;
  577. const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;
  578. const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;
  579. selector_history_buf.reset();
  580. int selector_history_buf_rle_count = 0;
  581. int prev_endpoint_pred_sym_bits = -1, endpoint_pred_repeat_count = 0;
  582. uint32_t prev_endpoint_index = 0;
  583. vector2D<uint8_t> block_endpoints_are_referenced(num_blocks_x, num_blocks_y);
  584. for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
  585. {
  586. for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
  587. {
  588. //const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x;
  589. encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);
  590. if (m.m_endpoint_predictor == 0)
  591. block_endpoints_are_referenced(block_x - 1, block_y) = true;
  592. else if (m.m_endpoint_predictor == 1)
  593. block_endpoints_are_referenced(block_x, block_y - 1) = true;
  594. else if (m.m_endpoint_predictor == 2)
  595. {
  596. if (!is_video)
  597. block_endpoints_are_referenced(block_x - 1, block_y - 1) = true;
  598. }
  599. if (is_video)
  600. {
  601. if (m.m_is_cr_target)
  602. block_endpoints_are_referenced(block_x, block_y) = true;
  603. }
  604. } // block_x
  605. } // block_y
  606. for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
  607. {
  608. for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
  609. {
  610. const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x;
  611. encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);
  612. if (((block_x & 1) == 0) && ((block_y & 1) == 0))
  613. {
  614. uint32_t endpoint_pred_cur_sym_bits = 0;
  615. for (uint32_t y = 0; y < 2; y++)
  616. {
  617. for (uint32_t x = 0; x < 2; x++)
  618. {
  619. const uint32_t bx = block_x + x;
  620. const uint32_t by = block_y + y;
  621. uint32_t pred = basist::NO_ENDPOINT_PRED_INDEX;
  622. if ((bx < num_blocks_x) && (by < num_blocks_y))
  623. pred = m_slice_encoder_blocks[slice_index](bx, by).m_endpoint_predictor;
  624. endpoint_pred_cur_sym_bits |= (pred << (x * 2 + y * 4));
  625. }
  626. }
  627. if ((int)endpoint_pred_cur_sym_bits == prev_endpoint_pred_sym_bits)
  628. {
  629. endpoint_pred_repeat_count++;
  630. }
  631. else
  632. {
  633. if (endpoint_pred_repeat_count > 0)
  634. {
  635. if (endpoint_pred_repeat_count > (int)basist::ENDPOINT_PRED_MIN_REPEAT_COUNT)
  636. {
  637. endpoint_pred_histogram.inc(basist::ENDPOINT_PRED_REPEAT_LAST_SYMBOL);
  638. endpoint_pred_syms[slice_index].push_back(basist::ENDPOINT_PRED_REPEAT_LAST_SYMBOL);
  639. endpoint_pred_syms[slice_index].push_back(endpoint_pred_repeat_count);
  640. }
  641. else
  642. {
  643. for (int j = 0; j < endpoint_pred_repeat_count; j++)
  644. {
  645. endpoint_pred_histogram.inc(prev_endpoint_pred_sym_bits);
  646. endpoint_pred_syms[slice_index].push_back(prev_endpoint_pred_sym_bits);
  647. }
  648. }
  649. endpoint_pred_repeat_count = 0;
  650. }
  651. endpoint_pred_histogram.inc(endpoint_pred_cur_sym_bits);
  652. endpoint_pred_syms[slice_index].push_back(endpoint_pred_cur_sym_bits);
  653. prev_endpoint_pred_sym_bits = endpoint_pred_cur_sym_bits;
  654. }
  655. }
  656. int new_endpoint_index = m_endpoint_remap_table_old_to_new[m.m_endpoint_index];
  657. if (m.m_endpoint_predictor == basist::NO_ENDPOINT_PRED_INDEX)
  658. {
  659. int endpoint_delta = new_endpoint_index - prev_endpoint_index;
  660. if ((m_params.m_endpoint_rdo_quality_thresh > 1.0f) && (iabs(endpoint_delta) > 1) && (!block_endpoints_are_referenced(block_x, block_y)))
  661. {
  662. const pixel_block& src_pixels = r.get_source_pixel_block(block_index);
  663. etc_block etc_blk(r.get_output_block(block_index));
  664. const uint64_t cur_err = etc_blk.evaluate_etc1_error(src_pixels.get_ptr(), r.get_params().m_perceptual);
  665. const uint32_t cur_inten5 = etc_blk.get_inten_table(0);
  666. const etc1_endpoint_palette_entry& cur_endpoints = m_endpoint_palette[m.m_endpoint_index];
  667. if (cur_err)
  668. {
  669. const float endpoint_remap_thresh = maximum(1.0f, m_params.m_endpoint_rdo_quality_thresh);
  670. const uint64_t thresh_err = (uint64_t)(cur_err * endpoint_remap_thresh);
  671. //const int MAX_ENDPOINT_SEARCH_DIST = (m_params.m_compression_level >= 2) ? 64 : 32;
  672. const int MAX_ENDPOINT_SEARCH_DIST = (m_params.m_compression_level >= 2) ? 64 : 16;
  673. if (!g_cpu_supports_sse41)
  674. {
  675. const uint64_t initial_best_trial_err = UINT64_MAX;
  676. uint64_t best_trial_err = initial_best_trial_err;
  677. int best_trial_idx = 0;
  678. etc_block trial_etc_blk(etc_blk);
  679. const int search_dist = minimum<int>(iabs(endpoint_delta) - 1, MAX_ENDPOINT_SEARCH_DIST);
  680. for (int d = -search_dist; d < search_dist; d++)
  681. {
  682. int trial_idx = prev_endpoint_index + d;
  683. if (trial_idx < 0)
  684. trial_idx += (int)r.get_total_endpoint_clusters();
  685. else if (trial_idx >= (int)r.get_total_endpoint_clusters())
  686. trial_idx -= (int)r.get_total_endpoint_clusters();
  687. if (trial_idx == new_endpoint_index)
  688. continue;
  689. // Skip it if this new endpoint palette entry is actually never used.
  690. if (!m_new_endpoint_was_used[trial_idx])
  691. continue;
  692. const etc1_endpoint_palette_entry& p = m_endpoint_palette[m_endpoint_remap_table_new_to_old[trial_idx]];
  693. if (m_params.m_compression_level <= 1)
  694. {
  695. if (p.m_inten5 > cur_inten5)
  696. continue;
  697. int delta_r = iabs(cur_endpoints.m_color5.r - p.m_color5.r);
  698. int delta_g = iabs(cur_endpoints.m_color5.g - p.m_color5.g);
  699. int delta_b = iabs(cur_endpoints.m_color5.b - p.m_color5.b);
  700. int color_delta = delta_r + delta_g + delta_b;
  701. if (color_delta > COLOR_DELTA_THRESH)
  702. continue;
  703. }
  704. trial_etc_blk.set_block_color5_etc1s(p.m_color5);
  705. trial_etc_blk.set_inten_tables_etc1s(p.m_inten5);
  706. uint64_t trial_err = trial_etc_blk.evaluate_etc1_error(src_pixels.get_ptr(), r.get_params().m_perceptual);
  707. if ((trial_err < best_trial_err) && (trial_err <= thresh_err))
  708. {
  709. best_trial_err = trial_err;
  710. best_trial_idx = trial_idx;
  711. }
  712. }
  713. if (best_trial_err != initial_best_trial_err)
  714. {
  715. m.m_endpoint_index = m_endpoint_remap_table_new_to_old[best_trial_idx];
  716. new_endpoint_index = best_trial_idx;
  717. endpoint_delta = new_endpoint_index - prev_endpoint_index;
  718. total_endpoint_indices_remapped++;
  719. }
  720. }
  721. else
  722. {
  723. #if BASISU_SUPPORT_SSE
  724. uint8_t block_selectors[16];
  725. for (uint32_t i = 0; i < 16; i++)
  726. block_selectors[i] = (uint8_t)etc_blk.get_selector(i & 3, i >> 2);
  727. const int64_t initial_best_trial_err = INT64_MAX;
  728. int64_t best_trial_err = initial_best_trial_err;
  729. int best_trial_idx = 0;
  730. const int search_dist = minimum<int>(iabs(endpoint_delta) - 1, MAX_ENDPOINT_SEARCH_DIST);
  731. for (int d = -search_dist; d < search_dist; d++)
  732. {
  733. int trial_idx = prev_endpoint_index + d;
  734. if (trial_idx < 0)
  735. trial_idx += (int)r.get_total_endpoint_clusters();
  736. else if (trial_idx >= (int)r.get_total_endpoint_clusters())
  737. trial_idx -= (int)r.get_total_endpoint_clusters();
  738. if (trial_idx == new_endpoint_index)
  739. continue;
  740. // Skip it if this new endpoint palette entry is actually never used.
  741. if (!m_new_endpoint_was_used[trial_idx])
  742. continue;
  743. const etc1_endpoint_palette_entry& p = m_endpoint_palette[m_endpoint_remap_table_new_to_old[trial_idx]];
  744. if (m_params.m_compression_level <= 1)
  745. {
  746. if (p.m_inten5 > cur_inten5)
  747. continue;
  748. int delta_r = iabs(cur_endpoints.m_color5.r - p.m_color5.r);
  749. int delta_g = iabs(cur_endpoints.m_color5.g - p.m_color5.g);
  750. int delta_b = iabs(cur_endpoints.m_color5.b - p.m_color5.b);
  751. int color_delta = delta_r + delta_g + delta_b;
  752. if (color_delta > COLOR_DELTA_THRESH)
  753. continue;
  754. }
  755. color_rgba block_colors[4];
  756. etc_block::get_block_colors_etc1s(block_colors, p.m_color5, p.m_inten5);
  757. int64_t trial_err;
  758. if (r.get_params().m_perceptual)
  759. {
  760. perceptual_distance_rgb_4_N_sse41(&trial_err, block_selectors, block_colors, src_pixels.get_ptr(), 16, best_trial_err);
  761. }
  762. else
  763. {
  764. linear_distance_rgb_4_N_sse41(&trial_err, block_selectors, block_colors, src_pixels.get_ptr(), 16, best_trial_err);
  765. }
  766. //if (trial_err > thresh_err)
  767. // g_color_delta_bad_hist[color_delta]++;
  768. if ((trial_err < best_trial_err) && (trial_err <= (int64_t)thresh_err))
  769. {
  770. best_trial_err = trial_err;
  771. best_trial_idx = trial_idx;
  772. }
  773. }
  774. if (best_trial_err != initial_best_trial_err)
  775. {
  776. m.m_endpoint_index = m_endpoint_remap_table_new_to_old[best_trial_idx];
  777. new_endpoint_index = best_trial_idx;
  778. endpoint_delta = new_endpoint_index - prev_endpoint_index;
  779. total_endpoint_indices_remapped++;
  780. }
  781. #endif // BASISU_SUPPORT_SSE
  782. } // if (!g_cpu_supports_sse41)
  783. } // if (cur_err)
  784. } // if ((m_params.m_endpoint_rdo_quality_thresh > 1.0f) && (iabs(endpoint_delta) > 1) && (!block_endpoints_are_referenced(block_x, block_y)))
  785. if (endpoint_delta < 0)
  786. endpoint_delta += (int)r.get_total_endpoint_clusters();
  787. delta_endpoint_histogram.inc(endpoint_delta);
  788. } // if (m.m_endpoint_predictor == basist::NO_ENDPOINT_PRED_INDEX)
  789. block_endpoint_indices.push_back(m_endpoint_remap_table_new_to_old[new_endpoint_index]);
  790. prev_endpoint_index = new_endpoint_index;
  791. if ((!is_video) || (m.m_endpoint_predictor != basist::CR_ENDPOINT_PRED_INDEX))
  792. {
  793. int new_selector_index = m_selector_remap_table_old_to_new[m.m_selector_index];
  794. const float selector_remap_thresh = maximum(1.0f, m_params.m_selector_rdo_quality_thresh); //2.5f;
  795. int selector_history_buf_index = -1;
  796. // At low comp levels this hurts compression a tiny amount, but is significantly faster so it's a good tradeoff.
  797. if ((m.m_is_cr_target) || (m_params.m_compression_level <= 1))
  798. {
  799. for (uint32_t j = 0; j < selector_history_buf.size(); j++)
  800. {
  801. const int trial_idx = selector_history_buf[j];
  802. if (trial_idx == new_selector_index)
  803. {
  804. total_used_selector_history_buf++;
  805. selector_history_buf_index = j;
  806. selector_history_buf_histogram.inc(j);
  807. break;
  808. }
  809. }
  810. }
  811. // If the block is a CR target we can't override its selectors.
  812. if ((!m.m_is_cr_target) && (selector_history_buf_index == -1))
  813. {
  814. const pixel_block& src_pixels = r.get_source_pixel_block(block_index);
  815. etc_block etc_blk = r.get_output_block(block_index);
  816. // This is new code - the initial release just used the endpoints from the frontend, which isn't correct/accurate.
  817. const etc1_endpoint_palette_entry& q = m_endpoint_palette[m_endpoint_remap_table_new_to_old[new_endpoint_index]];
  818. etc_blk.set_block_color5_etc1s(q.m_color5);
  819. etc_blk.set_inten_tables_etc1s(q.m_inten5);
  820. color_rgba block_colors[4];
  821. etc_blk.get_block_colors(block_colors, 0);
  822. const uint8_t* pCur_selectors = &m_selector_palette[m.m_selector_index][0];
  823. uint64_t cur_err = 0;
  824. if (r.get_params().m_perceptual)
  825. {
  826. for (uint32_t p = 0; p < 16; p++)
  827. cur_err += color_distance(true, src_pixels.get_ptr()[p], block_colors[pCur_selectors[p]], false);
  828. }
  829. else
  830. {
  831. for (uint32_t p = 0; p < 16; p++)
  832. cur_err += color_distance(false, src_pixels.get_ptr()[p], block_colors[pCur_selectors[p]], false);
  833. }
  834. const uint64_t limit_err = (uint64_t)ceilf(cur_err * selector_remap_thresh);
  835. // Even if cur_err==limit_err, we still want to scan the history buffer because there may be equivalent entries that are cheaper to code.
  836. uint64_t best_trial_err = UINT64_MAX;
  837. int best_trial_idx = 0;
  838. uint32_t best_trial_history_buf_idx = 0;
  839. for (uint32_t j = 0; j < selector_history_buf.size(); j++)
  840. {
  841. const int trial_idx = selector_history_buf[j];
  842. const uint8_t* pSelectors = &m_selector_palette[m_selector_remap_table_new_to_old[trial_idx]][0];
  843. if (m_params.m_compression_level <= 1)
  844. {
  845. // Predict if evaluating the full color error would cause an early out, by summing the abs err of the selector indices.
  846. int sel_diff = 0;
  847. for (uint32_t p = 0; p < 16; p += 4)
  848. {
  849. sel_diff += iabs(pCur_selectors[p + 0] - pSelectors[p + 0]);
  850. sel_diff += iabs(pCur_selectors[p + 1] - pSelectors[p + 1]);
  851. sel_diff += iabs(pCur_selectors[p + 2] - pSelectors[p + 2]);
  852. sel_diff += iabs(pCur_selectors[p + 3] - pSelectors[p + 3]);
  853. if (sel_diff >= SEL_DIFF_THRESHOLD)
  854. break;
  855. }
  856. if (sel_diff >= SEL_DIFF_THRESHOLD)
  857. continue;
  858. }
  859. const uint64_t thresh_err = minimum(limit_err, best_trial_err);
  860. uint64_t trial_err = 0;
  861. // This tends to early out quickly, so SSE has a hard time competing.
  862. if (r.get_params().m_perceptual)
  863. {
  864. for (uint32_t p = 0; p < 16; p++)
  865. {
  866. uint32_t sel = pSelectors[p];
  867. trial_err += color_distance(true, src_pixels.get_ptr()[p], block_colors[sel], false);
  868. if (trial_err > thresh_err)
  869. break;
  870. }
  871. }
  872. else
  873. {
  874. for (uint32_t p = 0; p < 16; p++)
  875. {
  876. uint32_t sel = pSelectors[p];
  877. trial_err += color_distance(false, src_pixels.get_ptr()[p], block_colors[sel], false);
  878. if (trial_err > thresh_err)
  879. break;
  880. }
  881. }
  882. if ((trial_err < best_trial_err) && (trial_err <= thresh_err))
  883. {
  884. assert(trial_err <= limit_err);
  885. best_trial_err = trial_err;
  886. best_trial_idx = trial_idx;
  887. best_trial_history_buf_idx = j;
  888. }
  889. }
  890. if (best_trial_err != UINT64_MAX)
  891. {
  892. if (new_selector_index != best_trial_idx)
  893. total_selector_indices_remapped++;
  894. new_selector_index = best_trial_idx;
  895. total_used_selector_history_buf++;
  896. selector_history_buf_index = best_trial_history_buf_idx;
  897. selector_history_buf_histogram.inc(best_trial_history_buf_idx);
  898. }
  899. } // if (m_params.m_selector_rdo_quality_thresh > 0.0f)
  900. m.m_selector_index = m_selector_remap_table_new_to_old[new_selector_index];
  901. if ((selector_history_buf_rle_count) && (selector_history_buf_index != 0))
  902. {
  903. if (selector_history_buf_rle_count >= (int)basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH)
  904. {
  905. selector_syms[slice_index].push_back(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX);
  906. selector_syms[slice_index].push_back(selector_history_buf_rle_count);
  907. int run_sym = selector_history_buf_rle_count - basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
  908. if (run_sym >= ((int)basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1))
  909. selector_history_buf_rle_histogram.inc(basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1);
  910. else
  911. selector_history_buf_rle_histogram.inc(run_sym);
  912. selector_histogram.inc(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX);
  913. }
  914. else
  915. {
  916. for (int k = 0; k < selector_history_buf_rle_count; k++)
  917. {
  918. uint32_t sym_index = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + 0;
  919. selector_syms[slice_index].push_back(sym_index);
  920. selector_histogram.inc(sym_index);
  921. }
  922. }
  923. selector_history_buf_rle_count = 0;
  924. }
  925. if (selector_history_buf_index >= 0)
  926. {
  927. if (selector_history_buf_index == 0)
  928. selector_history_buf_rle_count++;
  929. else
  930. {
  931. uint32_t history_buf_sym = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + selector_history_buf_index;
  932. selector_syms[slice_index].push_back(history_buf_sym);
  933. selector_histogram.inc(history_buf_sym);
  934. }
  935. }
  936. else
  937. {
  938. selector_syms[slice_index].push_back(new_selector_index);
  939. selector_histogram.inc(new_selector_index);
  940. }
  941. m.m_selector_history_buf_index = selector_history_buf_index;
  942. if (selector_history_buf_index < 0)
  943. selector_history_buf.add(new_selector_index);
  944. else if (selector_history_buf.size())
  945. selector_history_buf.use(selector_history_buf_index);
  946. }
  947. block_selector_indices.push_back(m.m_selector_index);
  948. } // block_x
  949. } // block_y
  950. if (endpoint_pred_repeat_count > 0)
  951. {
  952. if (endpoint_pred_repeat_count > (int)basist::ENDPOINT_PRED_MIN_REPEAT_COUNT)
  953. {
  954. endpoint_pred_histogram.inc(basist::ENDPOINT_PRED_REPEAT_LAST_SYMBOL);
  955. endpoint_pred_syms[slice_index].push_back(basist::ENDPOINT_PRED_REPEAT_LAST_SYMBOL);
  956. endpoint_pred_syms[slice_index].push_back(endpoint_pred_repeat_count);
  957. }
  958. else
  959. {
  960. for (int j = 0; j < endpoint_pred_repeat_count; j++)
  961. {
  962. endpoint_pred_histogram.inc(prev_endpoint_pred_sym_bits);
  963. endpoint_pred_syms[slice_index].push_back(prev_endpoint_pred_sym_bits);
  964. }
  965. }
  966. endpoint_pred_repeat_count = 0;
  967. }
  968. if (selector_history_buf_rle_count)
  969. {
  970. if (selector_history_buf_rle_count >= (int)basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH)
  971. {
  972. selector_syms[slice_index].push_back(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX);
  973. selector_syms[slice_index].push_back(selector_history_buf_rle_count);
  974. int run_sym = selector_history_buf_rle_count - basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
  975. if (run_sym >= ((int)basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1))
  976. selector_history_buf_rle_histogram.inc(basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1);
  977. else
  978. selector_history_buf_rle_histogram.inc(run_sym);
  979. selector_histogram.inc(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX);
  980. }
  981. else
  982. {
  983. for (int i = 0; i < selector_history_buf_rle_count; i++)
  984. {
  985. uint32_t sym_index = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + 0;
  986. selector_syms[slice_index].push_back(sym_index);
  987. selector_histogram.inc(sym_index);
  988. }
  989. }
  990. selector_history_buf_rle_count = 0;
  991. }
  992. } // slice_index
  993. //for (int i = 0; i <= 255 * 3; i++)
  994. //{
  995. // printf("%u, %u, %f\n", g_color_delta_bad_hist[i], g_color_delta_hist[i], g_color_delta_hist[i] ? g_color_delta_bad_hist[i] / (float)g_color_delta_hist[i] : 0);
  996. //}
  997. double total_prep_time = tm.get_elapsed_secs();
  998. debug_printf("basisu_backend::encode_image: Total prep time: %3.2f\n", total_prep_time);
  999. debug_printf("Endpoint pred RDO total endpoint indices remapped: %u %3.2f%%\n",
  1000. total_endpoint_indices_remapped, total_endpoint_indices_remapped * 100.0f / get_total_blocks());
  1001. debug_printf("Selector history RDO total selector indices remapped: %u %3.2f%%, Used history buf: %u %3.2f%%\n",
  1002. total_selector_indices_remapped, total_selector_indices_remapped * 100.0f / get_total_blocks(),
  1003. total_used_selector_history_buf, total_used_selector_history_buf * 100.0f / get_total_blocks());
  1004. //if ((total_endpoint_indices_remapped) && (m_params.m_compression_level > 0))
  1005. if ((total_endpoint_indices_remapped) && (m_params.m_compression_level > 1) && (!m_params.m_used_global_codebooks))
  1006. {
  1007. int_vec unused;
  1008. r.reoptimize_remapped_endpoints(block_endpoint_indices, unused, false, &block_selector_indices);
  1009. create_endpoint_palette();
  1010. }
  1011. check_for_valid_cr_blocks();
  1012. compute_slice_crcs();
  1013. double endpoint_pred_entropy = endpoint_pred_histogram.get_entropy() / endpoint_pred_histogram.get_total();
  1014. double delta_endpoint_entropy = delta_endpoint_histogram.get_entropy() / delta_endpoint_histogram.get_total();
  1015. double selector_entropy = selector_histogram.get_entropy() / selector_histogram.get_total();
  1016. debug_printf("Histogram entropy: EndpointPred: %3.3f DeltaEndpoint: %3.3f DeltaSelector: %3.3f\n", endpoint_pred_entropy, delta_endpoint_entropy, selector_entropy);
  1017. if (!endpoint_pred_histogram.get_total())
  1018. endpoint_pred_histogram.inc(0);
  1019. huffman_encoding_table endpoint_pred_model;
  1020. if (!endpoint_pred_model.init(endpoint_pred_histogram, 16))
  1021. {
  1022. error_printf("endpoint_pred_model.init() failed!");
  1023. return false;
  1024. }
  1025. if (!delta_endpoint_histogram.get_total())
  1026. delta_endpoint_histogram.inc(0);
  1027. huffman_encoding_table delta_endpoint_model;
  1028. if (!delta_endpoint_model.init(delta_endpoint_histogram, 16))
  1029. {
  1030. error_printf("delta_endpoint_model.init() failed!");
  1031. return false;
  1032. }
  1033. if (!selector_histogram.get_total())
  1034. selector_histogram.inc(0);
  1035. huffman_encoding_table selector_model;
  1036. if (!selector_model.init(selector_histogram, 16))
  1037. {
  1038. error_printf("selector_model.init() failed!");
  1039. return false;
  1040. }
  1041. if (!selector_history_buf_rle_histogram.get_total())
  1042. selector_history_buf_rle_histogram.inc(0);
  1043. huffman_encoding_table selector_history_buf_rle_model;
  1044. if (!selector_history_buf_rle_model.init(selector_history_buf_rle_histogram, 16))
  1045. {
  1046. error_printf("selector_history_buf_rle_model.init() failed!");
  1047. return false;
  1048. }
  1049. bitwise_coder coder;
  1050. coder.init(1024 * 1024 * 4);
  1051. uint32_t endpoint_pred_model_bits = coder.emit_huffman_table(endpoint_pred_model);
  1052. uint32_t delta_endpoint_bits = coder.emit_huffman_table(delta_endpoint_model);
  1053. uint32_t selector_model_bits = coder.emit_huffman_table(selector_model);
  1054. uint32_t selector_history_buf_run_sym_bits = coder.emit_huffman_table(selector_history_buf_rle_model);
  1055. coder.put_bits(basist::MAX_SELECTOR_HISTORY_BUF_SIZE, 13);
  1056. debug_printf("Model sizes: EndpointPred: %u bits %u bytes (%3.3f bpp) DeltaEndpoint: %u bits %u bytes (%3.3f bpp) Selector: %u bits %u bytes (%3.3f bpp) SelectorHistBufRLE: %u bits %u bytes (%3.3f bpp)\n",
  1057. endpoint_pred_model_bits, (endpoint_pred_model_bits + 7) / 8, endpoint_pred_model_bits / float(get_total_input_texels()),
  1058. delta_endpoint_bits, (delta_endpoint_bits + 7) / 8, delta_endpoint_bits / float(get_total_input_texels()),
  1059. selector_model_bits, (selector_model_bits + 7) / 8, selector_model_bits / float(get_total_input_texels()),
  1060. selector_history_buf_run_sym_bits, (selector_history_buf_run_sym_bits + 7) / 8, selector_history_buf_run_sym_bits / float(get_total_input_texels()));
  1061. coder.flush();
  1062. m_output.m_slice_image_tables = coder.get_bytes();
  1063. uint32_t total_endpoint_pred_bits = 0, total_delta_endpoint_bits = 0, total_selector_bits = 0;
  1064. uint32_t total_image_bytes = 0;
  1065. m_output.m_slice_image_data.resize(m_slices.size());
  1066. for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
  1067. {
  1068. //const uint32_t width = m_slices[slice_index].m_width;
  1069. //const uint32_t height = m_slices[slice_index].m_height;
  1070. const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;
  1071. const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;
  1072. coder.init(1024 * 1024 * 4);
  1073. uint32_t cur_selector_sym_ofs = 0;
  1074. uint32_t selector_rle_count = 0;
  1075. int endpoint_pred_repeat_count = 0;
  1076. uint32_t cur_endpoint_pred_sym_ofs = 0;
  1077. // uint32_t prev_endpoint_pred_sym = 0;
  1078. uint32_t prev_endpoint_index = 0;
  1079. for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
  1080. {
  1081. for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
  1082. {
  1083. const encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);
  1084. if (((block_x & 1) == 0) && ((block_y & 1) == 0))
  1085. {
  1086. if (endpoint_pred_repeat_count > 0)
  1087. {
  1088. endpoint_pred_repeat_count--;
  1089. }
  1090. else
  1091. {
  1092. uint32_t sym = endpoint_pred_syms[slice_index][cur_endpoint_pred_sym_ofs++];
  1093. if (sym == basist::ENDPOINT_PRED_REPEAT_LAST_SYMBOL)
  1094. {
  1095. total_endpoint_pred_bits += coder.put_code(sym, endpoint_pred_model);
  1096. endpoint_pred_repeat_count = endpoint_pred_syms[slice_index][cur_endpoint_pred_sym_ofs++];
  1097. assert(endpoint_pred_repeat_count >= (int)basist::ENDPOINT_PRED_MIN_REPEAT_COUNT);
  1098. total_endpoint_pred_bits += coder.put_vlc(endpoint_pred_repeat_count - basist::ENDPOINT_PRED_MIN_REPEAT_COUNT, basist::ENDPOINT_PRED_COUNT_VLC_BITS);
  1099. endpoint_pred_repeat_count--;
  1100. }
  1101. else
  1102. {
  1103. total_endpoint_pred_bits += coder.put_code(sym, endpoint_pred_model);
  1104. //prev_endpoint_pred_sym = sym;
  1105. }
  1106. }
  1107. }
  1108. const int new_endpoint_index = m_endpoint_remap_table_old_to_new[m.m_endpoint_index];
  1109. if (m.m_endpoint_predictor == basist::NO_ENDPOINT_PRED_INDEX)
  1110. {
  1111. int endpoint_delta = new_endpoint_index - prev_endpoint_index;
  1112. if (endpoint_delta < 0)
  1113. endpoint_delta += (int)r.get_total_endpoint_clusters();
  1114. total_delta_endpoint_bits += coder.put_code(endpoint_delta, delta_endpoint_model);
  1115. }
  1116. prev_endpoint_index = new_endpoint_index;
  1117. if ((!is_video) || (m.m_endpoint_predictor != basist::CR_ENDPOINT_PRED_INDEX))
  1118. {
  1119. if (!selector_rle_count)
  1120. {
  1121. uint32_t selector_sym_index = selector_syms[slice_index][cur_selector_sym_ofs++];
  1122. if (selector_sym_index == SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX)
  1123. selector_rle_count = selector_syms[slice_index][cur_selector_sym_ofs++];
  1124. total_selector_bits += coder.put_code(selector_sym_index, selector_model);
  1125. if (selector_sym_index == SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX)
  1126. {
  1127. int run_sym = selector_rle_count - basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
  1128. if (run_sym >= ((int)basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1))
  1129. {
  1130. total_selector_bits += coder.put_code(basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1, selector_history_buf_rle_model);
  1131. uint32_t n = selector_rle_count - basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
  1132. total_selector_bits += coder.put_vlc(n, 7);
  1133. }
  1134. else
  1135. total_selector_bits += coder.put_code(run_sym, selector_history_buf_rle_model);
  1136. }
  1137. }
  1138. if (selector_rle_count)
  1139. selector_rle_count--;
  1140. }
  1141. } // block_x
  1142. } // block_y
  1143. BASISU_BACKEND_VERIFY(cur_endpoint_pred_sym_ofs == endpoint_pred_syms[slice_index].size());
  1144. BASISU_BACKEND_VERIFY(cur_selector_sym_ofs == selector_syms[slice_index].size());
  1145. coder.flush();
  1146. m_output.m_slice_image_data[slice_index] = coder.get_bytes();
  1147. total_image_bytes += (uint32_t)coder.get_bytes().size();
  1148. debug_printf("Slice %u compressed size: %u bytes, %3.3f bits per slice texel\n", slice_index, m_output.m_slice_image_data[slice_index].size(), m_output.m_slice_image_data[slice_index].size() * 8.0f / (m_slices[slice_index].m_orig_width * m_slices[slice_index].m_orig_height));
  1149. } // slice_index
  1150. const double total_texels = static_cast<double>(get_total_input_texels());
  1151. const double total_blocks = static_cast<double>(get_total_blocks());
  1152. debug_printf("Total endpoint pred bits: %u bytes: %u bits/texel: %3.3f bits/block: %3.3f\n", total_endpoint_pred_bits, total_endpoint_pred_bits / 8, total_endpoint_pred_bits / total_texels, total_endpoint_pred_bits / total_blocks);
  1153. debug_printf("Total delta endpoint bits: %u bytes: %u bits/texel: %3.3f bits/block: %3.3f\n", total_delta_endpoint_bits, total_delta_endpoint_bits / 8, total_delta_endpoint_bits / total_texels, total_delta_endpoint_bits / total_blocks);
  1154. debug_printf("Total selector bits: %u bytes: %u bits/texel: %3.3f bits/block: %3.3f\n", total_selector_bits, total_selector_bits / 8, total_selector_bits / total_texels, total_selector_bits / total_blocks);
  1155. debug_printf("Total table bytes: %u, %3.3f bits/texel\n", m_output.m_slice_image_tables.size(), m_output.m_slice_image_tables.size() * 8.0f / total_texels);
  1156. debug_printf("Total image bytes: %u, %3.3f bits/texel\n", total_image_bytes, total_image_bytes * 8.0f / total_texels);
  1157. return true;
  1158. }
  1159. bool basisu_backend::encode_endpoint_palette()
  1160. {
  1161. const basisu_frontend& r = *m_pFront_end;
  1162. // The endpoint indices may have been changed by the backend's RDO step, so go and figure out which ones are actually used again.
  1163. bool_vec old_endpoint_was_used(r.get_total_endpoint_clusters());
  1164. uint32_t first_old_entry_index = UINT32_MAX;
  1165. for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
  1166. {
  1167. const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x, num_blocks_y = m_slices[slice_index].m_num_blocks_y;
  1168. for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
  1169. {
  1170. for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
  1171. {
  1172. encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);
  1173. const uint32_t old_endpoint_index = m.m_endpoint_index;
  1174. old_endpoint_was_used[old_endpoint_index] = true;
  1175. first_old_entry_index = basisu::minimum(first_old_entry_index, old_endpoint_index);
  1176. } // block_x
  1177. } // block_y
  1178. } // slice_index
  1179. debug_printf("basisu_backend::encode_endpoint_palette: first_old_entry_index: %u\n", first_old_entry_index);
  1180. // Maps NEW to OLD endpoints
  1181. uint_vec endpoint_remap_table_new_to_old(r.get_total_endpoint_clusters());
  1182. endpoint_remap_table_new_to_old.set_all(first_old_entry_index);
  1183. bool_vec new_endpoint_was_used(r.get_total_endpoint_clusters());
  1184. for (uint32_t old_endpoint_index = 0; old_endpoint_index < m_endpoint_remap_table_old_to_new.size(); old_endpoint_index++)
  1185. {
  1186. if (old_endpoint_was_used[old_endpoint_index])
  1187. {
  1188. const uint32_t new_endpoint_index = m_endpoint_remap_table_old_to_new[old_endpoint_index];
  1189. new_endpoint_was_used[new_endpoint_index] = true;
  1190. endpoint_remap_table_new_to_old[new_endpoint_index] = old_endpoint_index;
  1191. }
  1192. }
  1193. // TODO: Some new endpoint palette entries may actually be unused and aren't worth coding. Fix that.
  1194. uint32_t total_unused_new_entries = 0;
  1195. for (uint32_t i = 0; i < new_endpoint_was_used.size(); i++)
  1196. if (!new_endpoint_was_used[i])
  1197. total_unused_new_entries++;
  1198. debug_printf("basisu_backend::encode_endpoint_palette: total_unused_new_entries: %u out of %u\n", total_unused_new_entries, new_endpoint_was_used.size());
  1199. bool is_grayscale = true;
  1200. for (uint32_t old_endpoint_index = 0; old_endpoint_index < (uint32_t)m_endpoint_palette.size(); old_endpoint_index++)
  1201. {
  1202. int r5 = m_endpoint_palette[old_endpoint_index].m_color5[0];
  1203. int g5 = m_endpoint_palette[old_endpoint_index].m_color5[1];
  1204. int b5 = m_endpoint_palette[old_endpoint_index].m_color5[2];
  1205. if ((r5 != g5) || (r5 != b5))
  1206. {
  1207. is_grayscale = false;
  1208. break;
  1209. }
  1210. }
  1211. histogram color5_delta_hist0(32); // prev 0-9, delta is -9 to 31
  1212. histogram color5_delta_hist1(32); // prev 10-21, delta is -21 to 21
  1213. histogram color5_delta_hist2(32); // prev 22-31, delta is -31 to 9
  1214. histogram inten_delta_hist(8);
  1215. color_rgba prev_color5(16, 16, 16, 0);
  1216. uint32_t prev_inten = 0;
  1217. for (uint32_t new_endpoint_index = 0; new_endpoint_index < r.get_total_endpoint_clusters(); new_endpoint_index++)
  1218. {
  1219. const uint32_t old_endpoint_index = endpoint_remap_table_new_to_old[new_endpoint_index];
  1220. int delta_inten = m_endpoint_palette[old_endpoint_index].m_inten5 - prev_inten;
  1221. inten_delta_hist.inc(delta_inten & 7);
  1222. prev_inten = m_endpoint_palette[old_endpoint_index].m_inten5;
  1223. for (uint32_t i = 0; i < (is_grayscale ? 1U : 3U); i++)
  1224. {
  1225. const int delta = (m_endpoint_palette[old_endpoint_index].m_color5[i] - prev_color5[i]) & 31;
  1226. if (prev_color5[i] <= basist::COLOR5_PAL0_PREV_HI)
  1227. color5_delta_hist0.inc(delta);
  1228. else if (prev_color5[i] <= basist::COLOR5_PAL1_PREV_HI)
  1229. color5_delta_hist1.inc(delta);
  1230. else
  1231. color5_delta_hist2.inc(delta);
  1232. prev_color5[i] = m_endpoint_palette[old_endpoint_index].m_color5[i];
  1233. }
  1234. }
  1235. if (!color5_delta_hist0.get_total()) color5_delta_hist0.inc(0);
  1236. if (!color5_delta_hist1.get_total()) color5_delta_hist1.inc(0);
  1237. if (!color5_delta_hist2.get_total()) color5_delta_hist2.inc(0);
  1238. huffman_encoding_table color5_delta_model0, color5_delta_model1, color5_delta_model2, inten_delta_model;
  1239. if (!color5_delta_model0.init(color5_delta_hist0, 16))
  1240. {
  1241. error_printf("color5_delta_model.init() failed!");
  1242. return false;
  1243. }
  1244. if (!color5_delta_model1.init(color5_delta_hist1, 16))
  1245. {
  1246. error_printf("color5_delta_model.init() failed!");
  1247. return false;
  1248. }
  1249. if (!color5_delta_model2.init(color5_delta_hist2, 16))
  1250. {
  1251. error_printf("color5_delta_model.init() failed!");
  1252. return false;
  1253. }
  1254. if (!inten_delta_model.init(inten_delta_hist, 16))
  1255. {
  1256. error_printf("inten3_model.init() failed!");
  1257. return false;
  1258. }
  1259. bitwise_coder coder;
  1260. coder.init(8192);
  1261. coder.emit_huffman_table(color5_delta_model0);
  1262. coder.emit_huffman_table(color5_delta_model1);
  1263. coder.emit_huffman_table(color5_delta_model2);
  1264. coder.emit_huffman_table(inten_delta_model);
  1265. coder.put_bits(is_grayscale, 1);
  1266. prev_color5.set(16, 16, 16, 0);
  1267. prev_inten = 0;
  1268. for (uint32_t new_endpoint_index = 0; new_endpoint_index < r.get_total_endpoint_clusters(); new_endpoint_index++)
  1269. {
  1270. const uint32_t old_endpoint_index = endpoint_remap_table_new_to_old[new_endpoint_index];
  1271. int delta_inten = (m_endpoint_palette[old_endpoint_index].m_inten5 - prev_inten) & 7;
  1272. coder.put_code(delta_inten, inten_delta_model);
  1273. prev_inten = m_endpoint_palette[old_endpoint_index].m_inten5;
  1274. for (uint32_t i = 0; i < (is_grayscale ? 1U : 3U); i++)
  1275. {
  1276. const int delta = (m_endpoint_palette[old_endpoint_index].m_color5[i] - prev_color5[i]) & 31;
  1277. if (prev_color5[i] <= basist::COLOR5_PAL0_PREV_HI)
  1278. coder.put_code(delta, color5_delta_model0);
  1279. else if (prev_color5[i] <= basist::COLOR5_PAL1_PREV_HI)
  1280. coder.put_code(delta, color5_delta_model1);
  1281. else
  1282. coder.put_code(delta, color5_delta_model2);
  1283. prev_color5[i] = m_endpoint_palette[old_endpoint_index].m_color5[i];
  1284. }
  1285. } // q
  1286. coder.flush();
  1287. m_output.m_endpoint_palette = coder.get_bytes();
  1288. debug_printf("Endpoint codebook size: %u bits %u bytes, Bits per entry: %3.1f, Avg bits/texel: %3.3f\n",
  1289. 8 * (int)m_output.m_endpoint_palette.size(), (int)m_output.m_endpoint_palette.size(), m_output.m_endpoint_palette.size() * 8.0f / r.get_total_endpoint_clusters(), m_output.m_endpoint_palette.size() * 8.0f / get_total_input_texels());
  1290. return true;
  1291. }
  1292. bool basisu_backend::encode_selector_palette()
  1293. {
  1294. const basisu_frontend& r = *m_pFront_end;
  1295. histogram delta_selector_pal_histogram(256);
  1296. for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++)
  1297. {
  1298. if (!q)
  1299. continue;
  1300. const etc1_selector_palette_entry& cur = m_selector_palette[m_selector_remap_table_new_to_old[q]];
  1301. const etc1_selector_palette_entry predictor(m_selector_palette[m_selector_remap_table_new_to_old[q - 1]]);
  1302. for (uint32_t j = 0; j < 4; j++)
  1303. delta_selector_pal_histogram.inc(cur.get_byte(j) ^ predictor.get_byte(j));
  1304. }
  1305. if (!delta_selector_pal_histogram.get_total())
  1306. delta_selector_pal_histogram.inc(0);
  1307. huffman_encoding_table delta_selector_pal_model;
  1308. if (!delta_selector_pal_model.init(delta_selector_pal_histogram, 16))
  1309. {
  1310. error_printf("delta_selector_pal_model.init() failed!");
  1311. return false;
  1312. }
  1313. bitwise_coder coder;
  1314. coder.init(1024 * 1024);
  1315. coder.put_bits(0, 1); // use global codebook
  1316. coder.put_bits(0, 1); // uses hybrid codebooks
  1317. coder.put_bits(0, 1); // raw bytes
  1318. coder.emit_huffman_table(delta_selector_pal_model);
  1319. for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++)
  1320. {
  1321. if (!q)
  1322. {
  1323. for (uint32_t j = 0; j < 4; j++)
  1324. coder.put_bits(m_selector_palette[m_selector_remap_table_new_to_old[q]].get_byte(j), 8);
  1325. continue;
  1326. }
  1327. const etc1_selector_palette_entry& cur = m_selector_palette[m_selector_remap_table_new_to_old[q]];
  1328. const etc1_selector_palette_entry predictor(m_selector_palette[m_selector_remap_table_new_to_old[q - 1]]);
  1329. for (uint32_t j = 0; j < 4; j++)
  1330. coder.put_code(cur.get_byte(j) ^ predictor.get_byte(j), delta_selector_pal_model);
  1331. }
  1332. coder.flush();
  1333. m_output.m_selector_palette = coder.get_bytes();
  1334. if (m_output.m_selector_palette.size() >= r.get_total_selector_clusters() * 4)
  1335. {
  1336. coder.init(1024 * 1024);
  1337. coder.put_bits(0, 1); // use global codebook
  1338. coder.put_bits(0, 1); // uses hybrid codebooks
  1339. coder.put_bits(1, 1); // raw bytes
  1340. for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++)
  1341. {
  1342. const uint32_t i = m_selector_remap_table_new_to_old[q];
  1343. for (uint32_t j = 0; j < 4; j++)
  1344. coder.put_bits(m_selector_palette[i].get_byte(j), 8);
  1345. }
  1346. coder.flush();
  1347. m_output.m_selector_palette = coder.get_bytes();
  1348. }
  1349. debug_printf("Selector codebook bits: %u bytes: %u, Bits per entry: %3.1f, Avg bits/texel: %3.3f\n",
  1350. (int)m_output.m_selector_palette.size() * 8, (int)m_output.m_selector_palette.size(),
  1351. m_output.m_selector_palette.size() * 8.0f / r.get_total_selector_clusters(), m_output.m_selector_palette.size() * 8.0f / get_total_input_texels());
  1352. return true;
  1353. }
  1354. uint32_t basisu_backend::encode()
  1355. {
  1356. //const bool is_video = m_pFront_end->get_params().m_tex_type == basist::cBASISTexTypeVideoFrames;
  1357. m_output.m_slice_desc = m_slices;
  1358. m_output.m_etc1s = m_params.m_etc1s;
  1359. m_output.m_uses_global_codebooks = m_params.m_used_global_codebooks;
  1360. m_output.m_srgb = m_pFront_end->get_params().m_perceptual;
  1361. create_endpoint_palette();
  1362. create_selector_palette();
  1363. create_encoder_blocks();
  1364. if (!encode_image())
  1365. return 0;
  1366. if (!encode_endpoint_palette())
  1367. return 0;
  1368. if (!encode_selector_palette())
  1369. return 0;
  1370. uint32_t total_compressed_bytes = (uint32_t)(m_output.m_slice_image_tables.size() + m_output.m_endpoint_palette.size() + m_output.m_selector_palette.size());
  1371. for (uint32_t i = 0; i < m_output.m_slice_image_data.size(); i++)
  1372. total_compressed_bytes += (uint32_t)m_output.m_slice_image_data[i].size();
  1373. debug_printf("Wrote %u bytes, %3.3f bits/texel\n", total_compressed_bytes, total_compressed_bytes * 8.0f / get_total_input_texels());
  1374. return total_compressed_bytes;
  1375. }
  1376. } // namespace basisu