vertex_cache_optimizer.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. /**************************************************************************/
  2. /* vertex_cache_optimizer.cpp */
  3. /**************************************************************************/
  4. /* This file is part of: */
  5. /* GODOT ENGINE */
  6. /* https://godotengine.org */
  7. /**************************************************************************/
  8. /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
  9. /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
  10. /* */
  11. /* Permission is hereby granted, free of charge, to any person obtaining */
  12. /* a copy of this software and associated documentation files (the */
  13. /* "Software"), to deal in the Software without restriction, including */
  14. /* without limitation the rights to use, copy, modify, merge, publish, */
  15. /* distribute, sublicense, and/or sell copies of the Software, and to */
  16. /* permit persons to whom the Software is furnished to do so, subject to */
  17. /* the following conditions: */
  18. /* */
  19. /* The above copyright notice and this permission notice shall be */
  20. /* included in all copies or substantial portions of the Software. */
  21. /* */
  22. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
  23. /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
  24. /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
  25. /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
  26. /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
  27. /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
  28. /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
  29. /**************************************************************************/
  30. #include "vertex_cache_optimizer.h"
  31. #include "core/math/geometry.h"
  32. #include "core/math/math_funcs.h"
  33. // Precalculate the tables.
  34. void VertexCacheOptimizer::init() {
  35. for (int i = 0; i < Constants::CACHE_SCORE_TABLE_SIZE; i++) {
  36. float score = 0;
  37. if (i < 3) {
  38. // This vertex was used in the last triangle,
  39. // so it has a fixed score, which ever of the three
  40. // it's in. Otherwise, you can get very different
  41. // answers depending on whether you add
  42. // the triangle 1,2,3 or 3,1,2 - which is silly.
  43. score = Constants::LAST_TRI_SCORE;
  44. } else {
  45. // Points for being high in the cache.
  46. const float scaler = 1.0f / (Constants::CACHE_FUNCTION_LENGTH - 3);
  47. score = 1.0f - (i - 3) * scaler;
  48. score = Math::pow(score, Constants::CACHE_DECAY_POWER);
  49. }
  50. _cache_position_score[i] = (SCORE_TYPE)(Constants::SCORE_SCALING * score);
  51. }
  52. for (int i = 1; i < Constants::VALENCE_SCORE_TABLE_SIZE; i++) {
  53. // Bonus points for having a low number of tris still to
  54. // use the vert, so we get rid of lone verts quickly.
  55. float valence_boost = Math::pow(i, -Constants::VALENCE_BOOST_POWER);
  56. float score = Constants::VALENCE_BOOST_SCALE * valence_boost;
  57. _valence_score[i] = (SCORE_TYPE)(Constants::SCORE_SCALING * score);
  58. }
  59. }
  60. VertexCacheOptimizer::SCORE_TYPE VertexCacheOptimizer::find_vertex_score(int p_num_active_tris, int p_cache_position) {
  61. if (p_num_active_tris == 0) {
  62. // No triangles need this vertex!
  63. return 0;
  64. }
  65. SCORE_TYPE score = 0;
  66. if (p_cache_position < 0) {
  67. // Vertex is not in LRU cache - no score.
  68. } else {
  69. score = _cache_position_score[p_cache_position];
  70. }
  71. if (p_num_active_tris < Constants::VALENCE_SCORE_TABLE_SIZE) {
  72. score += _valence_score[p_num_active_tris];
  73. }
  74. return score;
  75. }
  76. VertexCacheOptimizer::VERTEX_INDEX_TYPE *VertexCacheOptimizer::_reorder_indices(VERTEX_INDEX_TYPE *r_dest_indices, const VERTEX_INDEX_TYPE *p_source_indices, int p_num_triangles, int p_num_vertices) {
  77. ADJACENCY_TYPE *num_active_tris = (ADJACENCY_TYPE *)memalloc(sizeof(ADJACENCY_TYPE) * p_num_vertices);
  78. memset(num_active_tris, 0, sizeof(ADJACENCY_TYPE) * p_num_vertices);
  79. // First scan over the vertex data, count the total number of
  80. // occurrances of each vertex.
  81. for (int i = 0; i < 3 * p_num_triangles; i++) {
  82. if (num_active_tris[p_source_indices[i]] == Constants::MAX_ADJACENCY) {
  83. // Unsupported mesh,
  84. // vertex shared by too many triangles.
  85. memfree(num_active_tris);
  86. return nullptr;
  87. }
  88. num_active_tris[p_source_indices[i]]++;
  89. }
  90. // Allocate the rest of the arrays.
  91. ARRAY_INDEX_TYPE *offsets = (ARRAY_INDEX_TYPE *)memalloc(sizeof(ARRAY_INDEX_TYPE) * p_num_vertices);
  92. SCORE_TYPE *last_score = (SCORE_TYPE *)memalloc(sizeof(SCORE_TYPE) * p_num_vertices);
  93. CACHE_POS_TYPE *cache_tag = (CACHE_POS_TYPE *)memalloc(sizeof(CACHE_POS_TYPE) * p_num_vertices);
  94. uint8_t *triangle_added = (uint8_t *)memalloc((p_num_triangles + 7) / 8);
  95. SCORE_TYPE *triangle_score = (SCORE_TYPE *)memalloc(sizeof(SCORE_TYPE) * p_num_triangles);
  96. TRIANGLE_INDEX_TYPE *triangle_indices = (TRIANGLE_INDEX_TYPE *)memalloc(sizeof(TRIANGLE_INDEX_TYPE) * 3 * p_num_triangles);
  97. memset(triangle_added, 0, sizeof(uint8_t) * ((p_num_triangles + 7) / 8));
  98. memset(triangle_score, 0, sizeof(SCORE_TYPE) * p_num_triangles);
  99. memset(triangle_indices, 0, sizeof(TRIANGLE_INDEX_TYPE) * 3 * p_num_triangles);
  100. // Count the triangle array offset for each vertex,
  101. // initialize the rest of the data.
  102. int sum = 0;
  103. for (int i = 0; i < p_num_vertices; i++) {
  104. offsets[i] = sum;
  105. sum += num_active_tris[i];
  106. num_active_tris[i] = 0;
  107. cache_tag[i] = -1;
  108. }
  109. // Fill the vertex data structures with indices to the triangles
  110. // using each vertex.
  111. for (int i = 0; i < p_num_triangles; i++) {
  112. for (int j = 0; j < 3; j++) {
  113. int v = p_source_indices[3 * i + j];
  114. triangle_indices[offsets[v] + num_active_tris[v]] = i;
  115. num_active_tris[v]++;
  116. }
  117. }
  118. // Initialize the score for all vertices.
  119. for (int i = 0; i < p_num_vertices; i++) {
  120. last_score[i] = find_vertex_score(num_active_tris[i], cache_tag[i]);
  121. for (int j = 0; j < num_active_tris[i]; j++) {
  122. triangle_score[triangle_indices[offsets[i] + j]] += last_score[i];
  123. }
  124. }
  125. // Find the best triangle.
  126. int best_triangle = -1;
  127. int best_score = -1;
  128. for (int i = 0; i < p_num_triangles; i++) {
  129. if (triangle_score[i] > best_score) {
  130. best_score = triangle_score[i];
  131. best_triangle = i;
  132. }
  133. }
  134. // Allocate the output array.
  135. TRIANGLE_INDEX_TYPE *out_triangles = (TRIANGLE_INDEX_TYPE *)memalloc(sizeof(TRIANGLE_INDEX_TYPE) * p_num_triangles);
  136. int out_pos = 0;
  137. // Initialize the cache.
  138. int cache[Constants::VERTEX_CACHE_SIZE + 3];
  139. for (int i = 0; i < Constants::VERTEX_CACHE_SIZE + 3; i++) {
  140. cache[i] = -1;
  141. }
  142. int scan_pos = 0;
  143. // Output the currently best triangle, as long as there
  144. // are triangles left to output.
  145. while (best_triangle >= 0) {
  146. // Mark the triangle as added.
  147. set_added(triangle_added, best_triangle);
  148. // Output this triangle.
  149. out_triangles[out_pos++] = best_triangle;
  150. for (int i = 0; i < 3; i++) {
  151. // Update this vertex.
  152. int v = p_source_indices[3 * best_triangle + i];
  153. // Check the current cache position, if it
  154. // is in the cache.
  155. int endpos = cache_tag[v];
  156. if (endpos < 0) {
  157. endpos = Constants::VERTEX_CACHE_SIZE + i;
  158. }
  159. if (endpos > i) {
  160. // Move all cache entries from the previous position
  161. // in the cache to the new target position (i) one
  162. // step backwards.
  163. for (int j = endpos; j > i; j--) {
  164. cache[j] = cache[j - 1];
  165. // If this cache slot contains a real
  166. // vertex, update its cache tag.
  167. if (cache[j] >= 0) {
  168. cache_tag[cache[j]]++;
  169. }
  170. }
  171. // Insert the current vertex into its new target
  172. // slot.
  173. cache[i] = v;
  174. cache_tag[v] = i;
  175. }
  176. // Find the current triangle in the list of active
  177. // triangles and remove it (moving the last
  178. // triangle in the list to the slot of this triangle).
  179. for (int j = 0; j < num_active_tris[v]; j++) {
  180. if (triangle_indices[offsets[v] + j] == best_triangle) {
  181. triangle_indices[offsets[v] + j] = triangle_indices[offsets[v] + num_active_tris[v] - 1];
  182. break;
  183. }
  184. }
  185. // Shorten the list.
  186. num_active_tris[v]--;
  187. }
  188. // Update the scores of all triangles in the cache.
  189. for (int i = 0; i < Constants::VERTEX_CACHE_SIZE + 3; i++) {
  190. int v = cache[i];
  191. if (v < 0) {
  192. break;
  193. }
  194. // This vertex has been pushed outside of the
  195. // actual cache.
  196. if (i >= Constants::VERTEX_CACHE_SIZE) {
  197. cache_tag[v] = -1;
  198. cache[i] = -1;
  199. }
  200. SCORE_TYPE newScore = find_vertex_score(num_active_tris[v], cache_tag[v]);
  201. SCORE_TYPE diff = newScore - last_score[v];
  202. for (int j = 0; j < num_active_tris[v]; j++) {
  203. triangle_score[triangle_indices[offsets[v] + j]] += diff;
  204. }
  205. last_score[v] = newScore;
  206. }
  207. // Find the best triangle referenced by vertices in the cache.
  208. best_triangle = -1;
  209. best_score = -1;
  210. for (int i = 0; i < Constants::VERTEX_CACHE_SIZE; i++) {
  211. if (cache[i] < 0) {
  212. break;
  213. }
  214. int v = cache[i];
  215. for (int j = 0; j < num_active_tris[v]; j++) {
  216. int t = triangle_indices[offsets[v] + j];
  217. if (triangle_score[t] > best_score) {
  218. best_triangle = t;
  219. best_score = triangle_score[t];
  220. }
  221. }
  222. }
  223. // If no active triangle was found at all, continue
  224. // scanning the whole list of triangles.
  225. if (best_triangle < 0) {
  226. for (; scan_pos < p_num_triangles; scan_pos++) {
  227. if (!is_added(triangle_added, scan_pos)) {
  228. best_triangle = scan_pos;
  229. break;
  230. }
  231. }
  232. }
  233. }
  234. // Convert the triangle index array into a full triangle list.
  235. out_pos = 0;
  236. for (int i = 0; i < p_num_triangles; i++) {
  237. int t = out_triangles[i];
  238. for (int j = 0; j < 3; j++) {
  239. int v = p_source_indices[3 * t + j];
  240. r_dest_indices[out_pos++] = v;
  241. }
  242. }
  243. // Clean up.
  244. memfree(triangle_indices);
  245. memfree(offsets);
  246. memfree(last_score);
  247. memfree(num_active_tris);
  248. memfree(cache_tag);
  249. memfree(triangle_added);
  250. memfree(triangle_score);
  251. memfree(out_triangles);
  252. return r_dest_indices;
  253. }
  254. bool VertexCacheOptimizer::reorder_indices_pool(PoolVector<int> &r_indices, uint32_t p_num_triangles, uint32_t p_num_verts) {
  255. LocalVector<int> temp;
  256. temp = r_indices;
  257. if (reorder_indices(temp, p_num_triangles, p_num_verts)) {
  258. r_indices = temp;
  259. return true;
  260. }
  261. return false;
  262. }
  263. bool VertexCacheOptimizer::reorder_indices(LocalVector<int> &r_indices, uint32_t p_num_triangles, uint32_t p_num_verts) {
  264. // If the mesh contains invalid indices, abort.
  265. ERR_FAIL_COND_V(!Geometry::verify_indices(r_indices.ptr(), r_indices.size(), p_num_verts), false);
  266. LocalVector<int> temp;
  267. temp.resize(r_indices.size());
  268. if (_reorder_indices((VERTEX_INDEX_TYPE *)temp.ptr(), (VERTEX_INDEX_TYPE *)r_indices.ptr(), p_num_triangles, p_num_verts)) {
  269. #if 0
  270. uint32_t show = MIN(r_indices.size(), 16);
  271. for (uint32_t n = 0; n < show; n++) {
  272. print_line(itos(n) + " : " + itos(r_indices[n]) + " to " + itos(temp[n]));
  273. }
  274. #endif
  275. r_indices = temp;
  276. return true;
  277. }
  278. return false;
  279. }