kernel_do_volume.h 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. /*
  2. * Copyright 2011-2017 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. CCL_NAMESPACE_BEGIN
  17. #if defined(__BRANCHED_PATH__) && defined(__VOLUME__)
  18. ccl_device_inline void kernel_split_branched_path_volume_indirect_light_init(KernelGlobals *kg,
  19. int ray_index)
  20. {
  21. kernel_split_branched_path_indirect_loop_init(kg, ray_index);
  22. ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT);
  23. }
  24. ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(KernelGlobals *kg,
  25. int ray_index)
  26. {
  27. SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
  28. ShaderData *sd = kernel_split_sd(sd, ray_index);
  29. PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
  30. ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
  31. /* GPU: no decoupled ray marching, scatter probalistically */
  32. int num_samples = kernel_data.integrator.volume_samples;
  33. float num_samples_inv = 1.0f / num_samples;
  34. Ray volume_ray = branched_state->ray;
  35. volume_ray.t = (!IS_STATE(&branched_state->ray_state, 0, RAY_HIT_BACKGROUND)) ?
  36. branched_state->isect.t :
  37. FLT_MAX;
  38. bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack);
  39. for (int j = branched_state->next_sample; j < num_samples; j++) {
  40. ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
  41. *ps = branched_state->path_state;
  42. ccl_global Ray *pray = &kernel_split_state.ray[ray_index];
  43. *pray = branched_state->ray;
  44. ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
  45. *tp = branched_state->throughput * num_samples_inv;
  46. /* branch RNG state */
  47. path_state_branch(ps, j, num_samples);
  48. /* integrate along volume segment with distance sampling */
  49. VolumeIntegrateResult result = kernel_volume_integrate(
  50. kg, ps, sd, &volume_ray, L, tp, heterogeneous);
  51. # ifdef __VOLUME_SCATTER__
  52. if (result == VOLUME_PATH_SCATTERED) {
  53. /* direct lighting */
  54. kernel_path_volume_connect_light(kg, sd, emission_sd, *tp, &branched_state->path_state, L);
  55. /* indirect light bounce */
  56. if (!kernel_path_volume_bounce(kg, sd, tp, ps, &L->state, pray)) {
  57. continue;
  58. }
  59. /* start the indirect path */
  60. branched_state->next_closure = 0;
  61. branched_state->next_sample = j + 1;
  62. /* Attempting to share too many samples is slow for volumes as it causes us to
  63. * loop here more and have many calls to kernel_volume_integrate which evaluates
  64. * shaders. The many expensive shader evaluations cause the work load to become
  65. * unbalanced and many threads to become idle in this kernel. Limiting the
  66. * number of shared samples here helps quite a lot.
  67. */
  68. if (branched_state->shared_sample_count < 2) {
  69. if (kernel_split_branched_indirect_start_shared(kg, ray_index)) {
  70. continue;
  71. }
  72. }
  73. return true;
  74. }
  75. # endif
  76. }
  77. branched_state->next_sample = num_samples;
  78. branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
  79. if (branched_state->waiting_on_shared_samples) {
  80. return true;
  81. }
  82. kernel_split_branched_path_indirect_loop_end(kg, ray_index);
  83. /* todo: avoid this calculation using decoupled ray marching */
  84. float3 throughput = kernel_split_state.throughput[ray_index];
  85. kernel_volume_shadow(
  86. kg, emission_sd, &kernel_split_state.path_state[ray_index], &volume_ray, &throughput);
  87. kernel_split_state.throughput[ray_index] = throughput;
  88. return false;
  89. }
  90. #endif /* __BRANCHED_PATH__ && __VOLUME__ */
  91. ccl_device void kernel_do_volume(KernelGlobals *kg)
  92. {
  93. #ifdef __VOLUME__
  94. /* We will empty this queue in this kernel. */
  95. if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
  96. kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
  97. # ifdef __BRANCHED_PATH__
  98. kernel_split_params.queue_index[QUEUE_VOLUME_INDIRECT_ITER] = 0;
  99. # endif /* __BRANCHED_PATH__ */
  100. }
  101. int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
  102. if (*kernel_split_params.use_queues_flag) {
  103. ray_index = get_ray_index(kg,
  104. ray_index,
  105. QUEUE_ACTIVE_AND_REGENERATED_RAYS,
  106. kernel_split_state.queue_data,
  107. kernel_split_params.queue_size,
  108. 1);
  109. }
  110. ccl_global char *ray_state = kernel_split_state.ray_state;
  111. PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
  112. ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
  113. if (IS_STATE(ray_state, ray_index, RAY_ACTIVE) ||
  114. IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
  115. ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
  116. ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
  117. ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
  118. ShaderData *sd = kernel_split_sd(sd, ray_index);
  119. ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
  120. bool hit = !IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
  121. /* Sanitize volume stack. */
  122. if (!hit) {
  123. kernel_volume_clean_stack(kg, state->volume_stack);
  124. }
  125. /* volume attenuation, emission, scatter */
  126. if (state->volume_stack[0].shader != SHADER_NONE) {
  127. Ray volume_ray = *ray;
  128. volume_ray.t = (hit) ? isect->t : FLT_MAX;
  129. # ifdef __BRANCHED_PATH__
  130. if (!kernel_data.integrator.branched ||
  131. IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
  132. # endif /* __BRANCHED_PATH__ */
  133. bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
  134. {
  135. /* integrate along volume segment with distance sampling */
  136. VolumeIntegrateResult result = kernel_volume_integrate(
  137. kg, state, sd, &volume_ray, L, throughput, heterogeneous);
  138. # ifdef __VOLUME_SCATTER__
  139. if (result == VOLUME_PATH_SCATTERED) {
  140. /* direct lighting */
  141. kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
  142. /* indirect light bounce */
  143. if (kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) {
  144. ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
  145. }
  146. else {
  147. kernel_split_path_end(kg, ray_index);
  148. }
  149. }
  150. # endif /* __VOLUME_SCATTER__ */
  151. }
  152. # ifdef __BRANCHED_PATH__
  153. }
  154. else {
  155. kernel_split_branched_path_volume_indirect_light_init(kg, ray_index);
  156. if (kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
  157. ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
  158. }
  159. }
  160. # endif /* __BRANCHED_PATH__ */
  161. }
  162. }
  163. # ifdef __BRANCHED_PATH__
  164. /* iter loop */
  165. ray_index = get_ray_index(kg,
  166. ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
  167. QUEUE_VOLUME_INDIRECT_ITER,
  168. kernel_split_state.queue_data,
  169. kernel_split_params.queue_size,
  170. 1);
  171. if (IS_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER)) {
  172. /* for render passes, sum and reset indirect light pass variables
  173. * for the next samples */
  174. path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
  175. path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
  176. if (kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
  177. ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
  178. }
  179. }
  180. # endif /* __BRANCHED_PATH__ */
  181. #endif /* __VOLUME__ */
  182. }
  183. CCL_NAMESPACE_END