cluster_render.glsl 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. #[vertex]
  2. #version 450
  3. #VERSION_DEFINES
  4. layout(location = 0) in vec3 vertex_attrib;
  5. layout(location = 0) out float depth_interp;
  6. layout(location = 1) out flat uint element_index;
  7. layout(push_constant, std430) uniform Params {
  8. uint base_index;
  9. uint pad0;
  10. uint pad1;
  11. uint pad2;
  12. }
  13. params;
  14. layout(set = 0, binding = 1, std140) uniform State {
  15. mat4 projection;
  16. float inv_z_far;
  17. uint screen_to_clusters_shift; // shift to obtain coordinates in block indices
  18. uint cluster_screen_width; //
  19. uint cluster_data_size; // how much data for a single cluster takes
  20. uint cluster_depth_offset;
  21. uint pad0;
  22. uint pad1;
  23. uint pad2;
  24. }
  25. state;
  26. struct RenderElement {
  27. uint type; //0-4
  28. bool touches_near;
  29. bool touches_far;
  30. uint original_index;
  31. mat3x4 transform_inv;
  32. vec3 scale;
  33. uint pad;
  34. };
  35. layout(set = 0, binding = 2, std430) buffer restrict readonly RenderElements {
  36. RenderElement data[];
  37. }
  38. render_elements;
  39. void main() {
  40. element_index = params.base_index + gl_InstanceIndex;
  41. vec3 vertex = vertex_attrib;
  42. vertex *= render_elements.data[element_index].scale;
  43. vertex = vec4(vertex, 1.0) * render_elements.data[element_index].transform_inv;
  44. depth_interp = -vertex.z;
  45. gl_Position = state.projection * vec4(vertex, 1.0);
  46. }
  47. #[fragment]
  48. #version 450
  49. #VERSION_DEFINES
  50. #ifndef MOLTENVK_USED // Metal will corrupt GPU state otherwise
  51. #if defined(has_GL_KHR_shader_subgroup_ballot) && defined(has_GL_KHR_shader_subgroup_arithmetic) && defined(has_GL_KHR_shader_subgroup_vote)
  52. #extension GL_KHR_shader_subgroup_ballot : enable
  53. #extension GL_KHR_shader_subgroup_arithmetic : enable
  54. #extension GL_KHR_shader_subgroup_vote : enable
  55. #define USE_SUBGROUPS
  56. #endif
  57. #endif
  58. layout(location = 0) in float depth_interp;
  59. layout(location = 1) in flat uint element_index;
  60. layout(set = 0, binding = 1, std140) uniform State {
  61. mat4 projection;
  62. float inv_z_far;
  63. uint screen_to_clusters_shift; // shift to obtain coordinates in block indices
  64. uint cluster_screen_width; //
  65. uint cluster_data_size; // how much data for a single cluster takes
  66. uint cluster_depth_offset;
  67. uint pad0;
  68. uint pad1;
  69. uint pad2;
  70. }
  71. state;
  72. //cluster data is layout linearly, each cell contains the follow information:
  73. // - list of bits for every element to mark as used, so (max_elem_count/32)*4 uints
  74. // - a uint for each element to mark the depth bits used when rendering (0-31)
  75. layout(set = 0, binding = 3, std430) buffer restrict ClusterRender {
  76. uint data[];
  77. }
  78. cluster_render;
  79. #ifdef USE_ATTACHMENT
  80. layout(location = 0) out vec4 frag_color;
  81. #endif
  82. void main() {
  83. //convert from screen to cluster
  84. uvec2 cluster = uvec2(gl_FragCoord.xy) >> state.screen_to_clusters_shift;
  85. //get linear cluster offset from screen poss
  86. uint cluster_offset = cluster.x + state.cluster_screen_width * cluster.y;
  87. //multiply by data size to position at the beginning of the element list for this cluster
  88. cluster_offset *= state.cluster_data_size;
  89. //find the current element in the list and plot the bit to mark it as used
  90. uint usage_write_offset = cluster_offset + (element_index >> 5);
  91. uint usage_write_bit = 1 << (element_index & 0x1F);
  92. uint aux = 0;
  93. #ifdef USE_SUBGROUPS
  94. uint cluster_thread_group_index;
  95. if (!gl_HelperInvocation) {
  96. //https://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf
  97. uvec4 mask;
  98. while (true) {
  99. // find the cluster offset of the first active thread
  100. // threads that did break; go inactive and no longer count
  101. uint first = subgroupBroadcastFirst(cluster_offset);
  102. // update the mask for thread that match this cluster
  103. mask = subgroupBallot(first == cluster_offset);
  104. if (first == cluster_offset) {
  105. // This thread belongs to the group of threads that match this offset,
  106. // so exit the loop.
  107. break;
  108. }
  109. }
  110. cluster_thread_group_index = subgroupBallotExclusiveBitCount(mask);
  111. if (cluster_thread_group_index == 0) {
  112. aux = atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
  113. }
  114. }
  115. #else
  116. // MoltenVK/Metal fails to compile shaders using gl_HelperInvocation for some GPUs
  117. #ifndef MOLTENVK_USED
  118. if (!gl_HelperInvocation)
  119. #endif
  120. {
  121. aux = atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
  122. }
  123. #endif
  124. //find the current element in the depth usage list and mark the current depth as used
  125. float unit_depth = depth_interp * state.inv_z_far;
  126. uint z_bit = clamp(uint(floor(unit_depth * 32.0)), 0, 31);
  127. uint z_write_offset = cluster_offset + state.cluster_depth_offset + element_index;
  128. uint z_write_bit = 1 << z_bit;
  129. #ifdef USE_SUBGROUPS
  130. if (!gl_HelperInvocation) {
  131. z_write_bit = subgroupOr(z_write_bit); //merge all Zs
  132. if (cluster_thread_group_index == 0) {
  133. aux = atomicOr(cluster_render.data[z_write_offset], z_write_bit);
  134. }
  135. }
  136. #else
  137. // MoltenVK/Metal fails to compile shaders using gl_HelperInvocation for some GPUs
  138. #ifndef MOLTENVK_USED
  139. if (!gl_HelperInvocation)
  140. #endif
  141. {
  142. aux = atomicOr(cluster_render.data[z_write_offset], z_write_bit);
  143. }
  144. #endif
  145. #ifdef USE_ATTACHMENT
  146. frag_color = vec4(float(aux));
  147. #endif
  148. }