image_compress_betsy.cpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780
  1. /**************************************************************************/
  2. /* image_compress_betsy.cpp */
  3. /**************************************************************************/
  4. /* This file is part of: */
  5. /* GODOT ENGINE */
  6. /* https://godotengine.org */
  7. /**************************************************************************/
  8. /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
  9. /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
  10. /* */
  11. /* Permission is hereby granted, free of charge, to any person obtaining */
  12. /* a copy of this software and associated documentation files (the */
  13. /* "Software"), to deal in the Software without restriction, including */
  14. /* without limitation the rights to use, copy, modify, merge, publish, */
  15. /* distribute, sublicense, and/or sell copies of the Software, and to */
  16. /* permit persons to whom the Software is furnished to do so, subject to */
  17. /* the following conditions: */
  18. /* */
  19. /* The above copyright notice and this permission notice shall be */
  20. /* included in all copies or substantial portions of the Software. */
  21. /* */
  22. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
  23. /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
  24. /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
  25. /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
  26. /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
  27. /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
  28. /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
  29. /**************************************************************************/
  30. #include "image_compress_betsy.h"
  31. #include "core/config/project_settings.h"
  32. #include "betsy_bc1.h"
  33. #include "alpha_stitch.glsl.gen.h"
  34. #include "bc1.glsl.gen.h"
  35. #include "bc4.glsl.gen.h"
  36. #include "bc6h.glsl.gen.h"
  37. #include "servers/display_server.h"
  38. static Mutex betsy_mutex;
  39. static BetsyCompressor *betsy = nullptr;
  40. static const BetsyShaderType FORMAT_TO_TYPE[BETSY_FORMAT_MAX] = {
  41. BETSY_SHADER_BC1_STANDARD,
  42. BETSY_SHADER_BC1_DITHER,
  43. BETSY_SHADER_BC1_STANDARD,
  44. BETSY_SHADER_BC4_SIGNED,
  45. BETSY_SHADER_BC4_UNSIGNED,
  46. BETSY_SHADER_BC4_SIGNED,
  47. BETSY_SHADER_BC4_UNSIGNED,
  48. BETSY_SHADER_BC6_SIGNED,
  49. BETSY_SHADER_BC6_UNSIGNED,
  50. };
  51. static const RD::DataFormat BETSY_TO_RD_FORMAT[BETSY_FORMAT_MAX] = {
  52. RD::DATA_FORMAT_R32G32_UINT,
  53. RD::DATA_FORMAT_R32G32_UINT,
  54. RD::DATA_FORMAT_R32G32_UINT,
  55. RD::DATA_FORMAT_R32G32_UINT,
  56. RD::DATA_FORMAT_R32G32_UINT,
  57. RD::DATA_FORMAT_R32G32_UINT,
  58. RD::DATA_FORMAT_R32G32_UINT,
  59. RD::DATA_FORMAT_R32G32B32A32_UINT,
  60. RD::DATA_FORMAT_R32G32B32A32_UINT,
  61. };
  62. static const Image::Format BETSY_TO_IMAGE_FORMAT[BETSY_FORMAT_MAX] = {
  63. Image::FORMAT_DXT1,
  64. Image::FORMAT_DXT1,
  65. Image::FORMAT_DXT5,
  66. Image::FORMAT_RGTC_R,
  67. Image::FORMAT_RGTC_R,
  68. Image::FORMAT_RGTC_RG,
  69. Image::FORMAT_RGTC_RG,
  70. Image::FORMAT_BPTC_RGBF,
  71. Image::FORMAT_BPTC_RGBFU,
  72. };
  73. void BetsyCompressor::_init() {
  74. if (!DisplayServer::can_create_rendering_device()) {
  75. return;
  76. }
  77. // Create local RD.
  78. RenderingContextDriver *rcd = nullptr;
  79. RenderingDevice *rd = RenderingServer::get_singleton()->create_local_rendering_device();
  80. if (rd == nullptr) {
  81. #if defined(RD_ENABLED)
  82. #if defined(METAL_ENABLED)
  83. rcd = memnew(RenderingContextDriverMetal);
  84. rd = memnew(RenderingDevice);
  85. #endif
  86. #if defined(VULKAN_ENABLED)
  87. if (rcd == nullptr) {
  88. rcd = memnew(RenderingContextDriverVulkan);
  89. rd = memnew(RenderingDevice);
  90. }
  91. #endif
  92. #endif
  93. if (rcd != nullptr && rd != nullptr) {
  94. Error err = rcd->initialize();
  95. if (err == OK) {
  96. err = rd->initialize(rcd);
  97. }
  98. if (err != OK) {
  99. memdelete(rd);
  100. memdelete(rcd);
  101. rd = nullptr;
  102. rcd = nullptr;
  103. }
  104. }
  105. }
  106. ERR_FAIL_NULL_MSG(rd, "Unable to create a local RenderingDevice.");
  107. compress_rd = rd;
  108. compress_rcd = rcd;
  109. // Create the sampler state.
  110. RD::SamplerState src_sampler_state;
  111. {
  112. src_sampler_state.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
  113. src_sampler_state.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
  114. src_sampler_state.mag_filter = RD::SAMPLER_FILTER_NEAREST;
  115. src_sampler_state.min_filter = RD::SAMPLER_FILTER_NEAREST;
  116. src_sampler_state.mip_filter = RD::SAMPLER_FILTER_NEAREST;
  117. }
  118. src_sampler = compress_rd->sampler_create(src_sampler_state);
  119. // Initialize RDShaderFiles.
  120. {
  121. Ref<RDShaderFile> bc1_shader;
  122. bc1_shader.instantiate();
  123. Error err = bc1_shader->parse_versions_from_text(bc1_shader_glsl);
  124. if (err != OK) {
  125. bc1_shader->print_errors("Betsy BC1 compress shader");
  126. }
  127. // Standard BC1 compression.
  128. cached_shaders[BETSY_SHADER_BC1_STANDARD].compiled = compress_rd->shader_create_from_spirv(bc1_shader->get_spirv_stages("standard"));
  129. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC1_STANDARD].compiled.is_null());
  130. cached_shaders[BETSY_SHADER_BC1_STANDARD].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC1_STANDARD].compiled);
  131. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC1_STANDARD].pipeline.is_null());
  132. // Dither BC1 variant. Unused, so comment out for now.
  133. //cached_shaders[BETSY_SHADER_BC1_DITHER].compiled = compress_rd->shader_create_from_spirv(bc1_shader->get_spirv_stages("dithered"));
  134. //ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC1_DITHER].compiled.is_null());
  135. //cached_shaders[BETSY_SHADER_BC1_DITHER].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC1_DITHER].compiled);
  136. //ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC1_DITHER].pipeline.is_null());
  137. }
  138. {
  139. Ref<RDShaderFile> bc4_shader;
  140. bc4_shader.instantiate();
  141. Error err = bc4_shader->parse_versions_from_text(bc4_shader_glsl);
  142. if (err != OK) {
  143. bc4_shader->print_errors("Betsy BC4 compress shader");
  144. }
  145. // Signed BC4 compression. Unused, so comment out for now.
  146. //cached_shaders[BETSY_SHADER_BC4_SIGNED].compiled = compress_rd->shader_create_from_spirv(bc4_shader->get_spirv_stages("signed"));
  147. //ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC4_SIGNED].compiled.is_null());
  148. //cached_shaders[BETSY_SHADER_BC4_SIGNED].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC4_SIGNED].compiled);
  149. //ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC4_SIGNED].pipeline.is_null());
  150. // Unsigned BC4 compression.
  151. cached_shaders[BETSY_SHADER_BC4_UNSIGNED].compiled = compress_rd->shader_create_from_spirv(bc4_shader->get_spirv_stages("unsigned"));
  152. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC4_UNSIGNED].compiled.is_null());
  153. cached_shaders[BETSY_SHADER_BC4_UNSIGNED].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC4_UNSIGNED].compiled);
  154. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC4_UNSIGNED].pipeline.is_null());
  155. }
  156. {
  157. Ref<RDShaderFile> bc6h_shader;
  158. bc6h_shader.instantiate();
  159. Error err = bc6h_shader->parse_versions_from_text(bc6h_shader_glsl);
  160. if (err != OK) {
  161. bc6h_shader->print_errors("Betsy BC6 compress shader");
  162. }
  163. // Signed BC6 compression.
  164. cached_shaders[BETSY_SHADER_BC6_SIGNED].compiled = compress_rd->shader_create_from_spirv(bc6h_shader->get_spirv_stages("signed"));
  165. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC6_SIGNED].compiled.is_null());
  166. cached_shaders[BETSY_SHADER_BC6_SIGNED].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC6_SIGNED].compiled);
  167. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC6_SIGNED].pipeline.is_null());
  168. // Unsigned BC6 compression.
  169. cached_shaders[BETSY_SHADER_BC6_UNSIGNED].compiled = compress_rd->shader_create_from_spirv(bc6h_shader->get_spirv_stages("unsigned"));
  170. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC6_UNSIGNED].compiled.is_null());
  171. cached_shaders[BETSY_SHADER_BC6_UNSIGNED].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC6_UNSIGNED].compiled);
  172. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC6_UNSIGNED].pipeline.is_null());
  173. }
  174. {
  175. Ref<RDShaderFile> alpha_stitch_shader;
  176. alpha_stitch_shader.instantiate();
  177. Error err = alpha_stitch_shader->parse_versions_from_text(alpha_stitch_shader_glsl);
  178. if (err != OK) {
  179. alpha_stitch_shader->print_errors("Betsy alpha stitch shader");
  180. }
  181. cached_shaders[BETSY_SHADER_ALPHA_STITCH].compiled = compress_rd->shader_create_from_spirv(alpha_stitch_shader->get_spirv_stages());
  182. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_ALPHA_STITCH].compiled.is_null());
  183. cached_shaders[BETSY_SHADER_ALPHA_STITCH].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_ALPHA_STITCH].compiled);
  184. ERR_FAIL_COND(cached_shaders[BETSY_SHADER_ALPHA_STITCH].pipeline.is_null());
  185. }
  186. }
  187. void BetsyCompressor::init() {
  188. WorkerThreadPool::TaskID tid = WorkerThreadPool::get_singleton()->add_task(callable_mp(this, &BetsyCompressor::_thread_loop), true);
  189. command_queue.set_pump_task_id(tid);
  190. command_queue.push(this, &BetsyCompressor::_assign_mt_ids, tid);
  191. command_queue.push_and_sync(this, &BetsyCompressor::_init);
  192. DEV_ASSERT(task_id == tid);
  193. }
  194. void BetsyCompressor::_assign_mt_ids(WorkerThreadPool::TaskID p_pump_task_id) {
  195. task_id = p_pump_task_id;
  196. }
  197. // Yield thread to WTP so other tasks can be done on it.
  198. // Automatically regains control as soon a task is pushed to the command queue.
  199. void BetsyCompressor::_thread_loop() {
  200. while (!exit) {
  201. WorkerThreadPool::get_singleton()->yield();
  202. command_queue.flush_all();
  203. }
  204. }
  205. void BetsyCompressor::_thread_exit() {
  206. exit = true;
  207. if (compress_rd != nullptr) {
  208. if (dxt1_encoding_table_buffer.is_valid()) {
  209. compress_rd->free(dxt1_encoding_table_buffer);
  210. }
  211. compress_rd->free(src_sampler);
  212. // Clear the shader cache, pipelines will be unreferenced automatically.
  213. for (int i = 0; i < BETSY_SHADER_MAX; i++) {
  214. if (cached_shaders[i].compiled.is_valid()) {
  215. compress_rd->free(cached_shaders[i].compiled);
  216. }
  217. }
  218. // Free the RD (and RCD if necessary).
  219. memdelete(compress_rd);
  220. compress_rd = nullptr;
  221. if (compress_rcd != nullptr) {
  222. memdelete(compress_rcd);
  223. compress_rcd = nullptr;
  224. }
  225. }
  226. }
  227. void BetsyCompressor::finish() {
  228. command_queue.push(this, &BetsyCompressor::_thread_exit);
  229. if (task_id != WorkerThreadPool::INVALID_TASK_ID) {
  230. WorkerThreadPool::get_singleton()->wait_for_task_completion(task_id);
  231. task_id = WorkerThreadPool::INVALID_TASK_ID;
  232. }
  233. }
  234. // Helper functions.
  235. static int get_next_multiple(int n, int m) {
  236. return n + (m - (n % m));
  237. }
  238. static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) {
  239. switch (r_img->get_format()) {
  240. case Image::FORMAT_L8:
  241. r_img->convert(Image::FORMAT_RGBA8);
  242. r_format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
  243. break;
  244. case Image::FORMAT_LA8:
  245. r_img->convert(Image::FORMAT_RGBA8);
  246. r_format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
  247. break;
  248. case Image::FORMAT_R8:
  249. r_format = RD::DATA_FORMAT_R8_UNORM;
  250. break;
  251. case Image::FORMAT_RG8:
  252. r_format = RD::DATA_FORMAT_R8G8_UNORM;
  253. break;
  254. case Image::FORMAT_RGB8:
  255. r_img->convert(Image::FORMAT_RGBA8);
  256. r_format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
  257. break;
  258. case Image::FORMAT_RGBA8:
  259. r_format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
  260. break;
  261. case Image::FORMAT_RH:
  262. r_format = RD::DATA_FORMAT_R16_SFLOAT;
  263. break;
  264. case Image::FORMAT_RGH:
  265. r_format = RD::DATA_FORMAT_R16G16_SFLOAT;
  266. break;
  267. case Image::FORMAT_RGBH:
  268. r_img->convert(Image::FORMAT_RGBAH);
  269. r_format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
  270. break;
  271. case Image::FORMAT_RGBAH:
  272. r_format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
  273. break;
  274. case Image::FORMAT_RF:
  275. r_format = RD::DATA_FORMAT_R32_SFLOAT;
  276. break;
  277. case Image::FORMAT_RGF:
  278. r_format = RD::DATA_FORMAT_R32G32_SFLOAT;
  279. break;
  280. case Image::FORMAT_RGBF:
  281. r_img->convert(Image::FORMAT_RGBAF);
  282. r_format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
  283. break;
  284. case Image::FORMAT_RGBAF:
  285. r_format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
  286. break;
  287. case Image::FORMAT_RGBE9995:
  288. r_format = RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32;
  289. break;
  290. default: {
  291. return ERR_UNAVAILABLE;
  292. }
  293. }
  294. return OK;
  295. }
  296. Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) {
  297. uint64_t start_time = OS::get_singleton()->get_ticks_msec();
  298. // Return an error so that the compression can fall back to cpu compression
  299. if (compress_rd == nullptr) {
  300. return ERR_CANT_CREATE;
  301. }
  302. if (r_img->is_compressed()) {
  303. return ERR_INVALID_DATA;
  304. }
  305. int img_width = r_img->get_width();
  306. int img_height = r_img->get_height();
  307. if (img_width % 4 != 0 || img_height % 4 != 0) {
  308. img_width = img_width <= 2 ? img_width : (img_width + 3) & ~3;
  309. img_height = img_height <= 2 ? img_height : (img_height + 3) & ~3;
  310. }
  311. Error err = OK;
  312. // Destination format.
  313. Image::Format dest_format = BETSY_TO_IMAGE_FORMAT[p_format];
  314. RD::DataFormat dst_rd_format = BETSY_TO_RD_FORMAT[p_format];
  315. BetsyShaderType shader_type = FORMAT_TO_TYPE[p_format];
  316. BetsyShader shader = cached_shaders[shader_type];
  317. BetsyShader secondary_shader; // The secondary shader is used for alpha blocks. For BC it's BC4U and for ETC it's ETC2_RU (8-bit variant).
  318. BetsyShader stitch_shader;
  319. bool needs_alpha_block = false;
  320. switch (p_format) {
  321. case BETSY_FORMAT_BC3:
  322. case BETSY_FORMAT_BC5_UNSIGNED:
  323. needs_alpha_block = true;
  324. secondary_shader = cached_shaders[BETSY_SHADER_BC4_UNSIGNED];
  325. stitch_shader = cached_shaders[BETSY_SHADER_ALPHA_STITCH];
  326. break;
  327. default:
  328. break;
  329. }
  330. // src_texture format information.
  331. RD::TextureFormat src_texture_format;
  332. {
  333. src_texture_format.array_layers = 1;
  334. src_texture_format.depth = 1;
  335. src_texture_format.mipmaps = 1;
  336. src_texture_format.texture_type = RD::TEXTURE_TYPE_2D;
  337. src_texture_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT;
  338. }
  339. err = get_src_texture_format(r_img, src_texture_format.format);
  340. if (err != OK) {
  341. return err;
  342. }
  343. // For the destination format just copy the source format and change the usage bits.
  344. RD::TextureFormat dst_texture_format = src_texture_format;
  345. dst_texture_format.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT;
  346. dst_texture_format.format = dst_rd_format;
  347. RD::TextureFormat dst_texture_format_alpha;
  348. RD::TextureFormat dst_texture_format_combined;
  349. if (needs_alpha_block) {
  350. dst_texture_format_combined = dst_texture_format;
  351. dst_texture_format_combined.format = RD::DATA_FORMAT_R32G32B32A32_UINT;
  352. dst_texture_format.usage_bits |= RD::TEXTURE_USAGE_SAMPLING_BIT;
  353. dst_texture_format_alpha = dst_texture_format;
  354. dst_texture_format_alpha.format = RD::DATA_FORMAT_R32G32_UINT;
  355. }
  356. // Encoding table setup.
  357. if ((dest_format == Image::FORMAT_DXT1 || dest_format == Image::FORMAT_DXT5) && dxt1_encoding_table_buffer.is_null()) {
  358. Vector<uint8_t> data;
  359. data.resize(1024 * 4);
  360. memcpy(data.ptrw(), dxt1_encoding_table, 1024 * 4);
  361. dxt1_encoding_table_buffer = compress_rd->storage_buffer_create(1024 * 4, data);
  362. }
  363. const int mip_count = r_img->get_mipmap_count() + 1;
  364. // Container for the compressed data.
  365. Vector<uint8_t> dst_data;
  366. dst_data.resize(Image::get_image_data_size(img_width, img_height, dest_format, r_img->has_mipmaps()));
  367. uint8_t *dst_data_ptr = dst_data.ptrw();
  368. Vector<Vector<uint8_t>> src_images;
  369. src_images.push_back(Vector<uint8_t>());
  370. Vector<uint8_t> *src_image_ptr = src_images.ptrw();
  371. // Compress each mipmap.
  372. for (int i = 0; i < mip_count; i++) {
  373. int width, height;
  374. Image::get_image_mipmap_offset_and_dimensions(img_width, img_height, dest_format, i, width, height);
  375. int64_t src_mip_ofs, src_mip_size;
  376. int src_mip_w, src_mip_h;
  377. r_img->get_mipmap_offset_size_and_dimensions(i, src_mip_ofs, src_mip_size, src_mip_w, src_mip_h);
  378. // Set the source texture width and size.
  379. src_texture_format.height = height;
  380. src_texture_format.width = width;
  381. // Set the destination texture width and size.
  382. dst_texture_format.height = (height + 3) >> 2;
  383. dst_texture_format.width = (width + 3) >> 2;
  384. // Pad textures to nearest block by smearing.
  385. if (width != src_mip_w || height != src_mip_h) {
  386. const uint8_t *src_mip_read = r_img->ptr() + src_mip_ofs;
  387. // Reserve the buffer for padded image data.
  388. int px_size = Image::get_format_pixel_size(r_img->get_format());
  389. src_image_ptr[0].resize(width * height * px_size);
  390. uint8_t *ptrw = src_image_ptr[0].ptrw();
  391. int x = 0, y = 0;
  392. for (y = 0; y < src_mip_h; y++) {
  393. for (x = 0; x < src_mip_w; x++) {
  394. memcpy(ptrw + (width * y + x) * px_size, src_mip_read + (src_mip_w * y + x) * px_size, px_size);
  395. }
  396. // First, smear in x.
  397. for (; x < width; x++) {
  398. memcpy(ptrw + (width * y + x) * px_size, ptrw + (width * y + x - 1) * px_size, px_size);
  399. }
  400. }
  401. // Then, smear in y.
  402. for (; y < height; y++) {
  403. for (x = 0; x < width; x++) {
  404. memcpy(ptrw + (width * y + x) * px_size, ptrw + (width * y + x - width) * px_size, px_size);
  405. }
  406. }
  407. } else {
  408. // Create a buffer filled with the source mip layer data.
  409. src_image_ptr[0].resize(src_mip_size);
  410. memcpy(src_image_ptr[0].ptrw(), r_img->ptr() + src_mip_ofs, src_mip_size);
  411. }
  412. // Create the textures on the GPU.
  413. RID src_texture = compress_rd->texture_create(src_texture_format, RD::TextureView(), src_images);
  414. RID dst_texture_primary = compress_rd->texture_create(dst_texture_format, RD::TextureView());
  415. {
  416. Vector<RD::Uniform> uniforms;
  417. {
  418. {
  419. RD::Uniform u;
  420. u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
  421. u.binding = 0;
  422. u.append_id(src_sampler);
  423. u.append_id(src_texture);
  424. uniforms.push_back(u);
  425. }
  426. {
  427. RD::Uniform u;
  428. u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
  429. u.binding = 1;
  430. u.append_id(dst_texture_primary);
  431. uniforms.push_back(u);
  432. }
  433. if (dest_format == Image::FORMAT_DXT1 || dest_format == Image::FORMAT_DXT5) {
  434. RD::Uniform u;
  435. u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
  436. u.binding = 2;
  437. u.append_id(dxt1_encoding_table_buffer);
  438. uniforms.push_back(u);
  439. }
  440. }
  441. RID uniform_set = compress_rd->uniform_set_create(uniforms, shader.compiled, 0);
  442. RD::ComputeListID compute_list = compress_rd->compute_list_begin();
  443. compress_rd->compute_list_bind_compute_pipeline(compute_list, shader.pipeline);
  444. compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
  445. switch (shader_type) {
  446. case BETSY_SHADER_BC6_SIGNED:
  447. case BETSY_SHADER_BC6_UNSIGNED: {
  448. BC6PushConstant push_constant;
  449. push_constant.sizeX = 1.0f / width;
  450. push_constant.sizeY = 1.0f / height;
  451. compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC6PushConstant));
  452. compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1);
  453. } break;
  454. case BETSY_SHADER_BC1_STANDARD: {
  455. BC1PushConstant push_constant;
  456. push_constant.num_refines = 2;
  457. compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC1PushConstant));
  458. compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1);
  459. } break;
  460. case BETSY_SHADER_BC4_UNSIGNED: {
  461. BC4PushConstant push_constant;
  462. push_constant.channel_idx = 0;
  463. compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC4PushConstant));
  464. compress_rd->compute_list_dispatch(compute_list, 1, get_next_multiple(width, 16) / 16, get_next_multiple(height, 16) / 16);
  465. } break;
  466. default: {
  467. } break;
  468. }
  469. compress_rd->compute_list_end();
  470. if (!needs_alpha_block) {
  471. compress_rd->submit();
  472. compress_rd->sync();
  473. }
  474. }
  475. RID dst_texture_rid = dst_texture_primary;
  476. if (needs_alpha_block) {
  477. // Set the destination texture width and size.
  478. dst_texture_format_alpha.height = (height + 3) >> 2;
  479. dst_texture_format_alpha.width = (width + 3) >> 2;
  480. RID dst_texture_alpha = compress_rd->texture_create(dst_texture_format_alpha, RD::TextureView());
  481. {
  482. Vector<RD::Uniform> uniforms;
  483. {
  484. {
  485. RD::Uniform u;
  486. u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
  487. u.binding = 0;
  488. u.append_id(src_sampler);
  489. u.append_id(src_texture);
  490. uniforms.push_back(u);
  491. }
  492. {
  493. RD::Uniform u;
  494. u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
  495. u.binding = 1;
  496. u.append_id(dst_texture_alpha);
  497. uniforms.push_back(u);
  498. }
  499. }
  500. RID uniform_set = compress_rd->uniform_set_create(uniforms, secondary_shader.compiled, 0);
  501. RD::ComputeListID compute_list = compress_rd->compute_list_begin();
  502. compress_rd->compute_list_bind_compute_pipeline(compute_list, secondary_shader.pipeline);
  503. compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
  504. BC4PushConstant push_constant;
  505. push_constant.channel_idx = dest_format == Image::FORMAT_DXT5 ? 3 : 1;
  506. compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC4PushConstant));
  507. compress_rd->compute_list_dispatch(compute_list, 1, get_next_multiple(width, 16) / 16, get_next_multiple(height, 16) / 16);
  508. compress_rd->compute_list_end();
  509. }
  510. // Stitching
  511. // Set the destination texture width and size.
  512. dst_texture_format_combined.height = (height + 3) >> 2;
  513. dst_texture_format_combined.width = (width + 3) >> 2;
  514. RID dst_texture_combined = compress_rd->texture_create(dst_texture_format_combined, RD::TextureView());
  515. {
  516. Vector<RD::Uniform> uniforms;
  517. {
  518. {
  519. RD::Uniform u;
  520. u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
  521. u.binding = 0;
  522. u.append_id(src_sampler);
  523. u.append_id(dest_format == Image::FORMAT_DXT5 ? dst_texture_alpha : dst_texture_primary);
  524. uniforms.push_back(u);
  525. }
  526. {
  527. RD::Uniform u;
  528. u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
  529. u.binding = 1;
  530. u.append_id(src_sampler);
  531. u.append_id(dest_format == Image::FORMAT_DXT5 ? dst_texture_primary : dst_texture_alpha);
  532. uniforms.push_back(u);
  533. }
  534. {
  535. RD::Uniform u;
  536. u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
  537. u.binding = 2;
  538. u.append_id(dst_texture_combined);
  539. uniforms.push_back(u);
  540. }
  541. }
  542. RID uniform_set = compress_rd->uniform_set_create(uniforms, stitch_shader.compiled, 0);
  543. RD::ComputeListID compute_list = compress_rd->compute_list_begin();
  544. compress_rd->compute_list_bind_compute_pipeline(compute_list, stitch_shader.pipeline);
  545. compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
  546. compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1);
  547. compress_rd->compute_list_end();
  548. compress_rd->submit();
  549. compress_rd->sync();
  550. }
  551. dst_texture_rid = dst_texture_combined;
  552. compress_rd->free(dst_texture_primary);
  553. compress_rd->free(dst_texture_alpha);
  554. }
  555. // Copy data from the GPU to the buffer.
  556. const Vector<uint8_t> texture_data = compress_rd->texture_get_data(dst_texture_rid, 0);
  557. int64_t dst_ofs = Image::get_image_mipmap_offset(img_width, img_height, dest_format, i);
  558. memcpy(dst_data_ptr + dst_ofs, texture_data.ptr(), texture_data.size());
  559. // Free the source and dest texture.
  560. compress_rd->free(src_texture);
  561. compress_rd->free(dst_texture_rid);
  562. }
  563. src_images.clear();
  564. // Set the compressed data to the image.
  565. r_img->set_data(img_width, img_height, r_img->has_mipmaps(), dest_format, dst_data);
  566. print_verbose(
  567. vformat("Betsy: Encoding a %dx%d image with %d mipmaps as %s took %d ms.",
  568. img_width,
  569. img_height,
  570. r_img->get_mipmap_count(),
  571. Image::get_format_name(dest_format),
  572. OS::get_singleton()->get_ticks_msec() - start_time));
  573. return OK;
  574. }
  575. void ensure_betsy_exists() {
  576. betsy_mutex.lock();
  577. if (betsy == nullptr) {
  578. betsy = memnew(BetsyCompressor);
  579. betsy->init();
  580. }
  581. betsy_mutex.unlock();
  582. }
  583. Error _betsy_compress_bptc(Image *r_img, Image::UsedChannels p_channels) {
  584. ensure_betsy_exists();
  585. Image::Format format = r_img->get_format();
  586. Error result = ERR_UNAVAILABLE;
  587. if (format >= Image::FORMAT_RF && format <= Image::FORMAT_RGBE9995) {
  588. if (r_img->detect_signed()) {
  589. result = betsy->compress(BETSY_FORMAT_BC6_SIGNED, r_img);
  590. } else {
  591. result = betsy->compress(BETSY_FORMAT_BC6_UNSIGNED, r_img);
  592. }
  593. }
  594. if (!GLOBAL_GET("rendering/textures/vram_compression/cache_gpu_compressor")) {
  595. free_device();
  596. }
  597. return result;
  598. }
  599. Error _betsy_compress_s3tc(Image *r_img, Image::UsedChannels p_channels) {
  600. ensure_betsy_exists();
  601. Error result = ERR_UNAVAILABLE;
  602. switch (p_channels) {
  603. case Image::USED_CHANNELS_RGB:
  604. case Image::USED_CHANNELS_L:
  605. result = betsy->compress(BETSY_FORMAT_BC1, r_img);
  606. break;
  607. case Image::USED_CHANNELS_RGBA:
  608. case Image::USED_CHANNELS_LA:
  609. result = betsy->compress(BETSY_FORMAT_BC3, r_img);
  610. break;
  611. case Image::USED_CHANNELS_R:
  612. result = betsy->compress(BETSY_FORMAT_BC4_UNSIGNED, r_img);
  613. break;
  614. case Image::USED_CHANNELS_RG:
  615. result = betsy->compress(BETSY_FORMAT_BC5_UNSIGNED, r_img);
  616. break;
  617. default:
  618. break;
  619. }
  620. if (!GLOBAL_GET("rendering/textures/vram_compression/cache_gpu_compressor")) {
  621. free_device();
  622. }
  623. return result;
  624. }
  625. void free_device() {
  626. if (betsy != nullptr) {
  627. betsy->finish();
  628. memdelete(betsy);
  629. }
  630. }