bvh_local.h 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. /*
  2. * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
  3. * and code copyright 2009-2012 Intel Corporation
  4. *
  5. * Modifications Copyright 2011-2013, Blender Foundation.
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. */
  19. #ifdef __QBVH__
  20. # include "kernel/bvh/qbvh_local.h"
  21. # ifdef __KERNEL_AVX2__
  22. # include "kernel/bvh/obvh_local.h"
  23. # endif
  24. #endif
  25. #if BVH_FEATURE(BVH_HAIR)
  26. # define NODE_INTERSECT bvh_node_intersect
  27. #else
  28. # define NODE_INTERSECT bvh_aligned_node_intersect
  29. #endif
  30. /* This is a template BVH traversal function for finding local intersections
  31. * around the shading point, for subsurface scattering and bevel. We disable
  32. * various features for performance, and for instanced objects avoid traversing
  33. * other parts of the scene.
  34. *
  35. * BVH_MOTION: motion blur rendering
  36. */
  37. #ifndef __KERNEL_GPU__
  38. ccl_device
  39. #else
  40. ccl_device_inline
  41. #endif
  42. bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
  43. const Ray *ray,
  44. LocalIntersection *local_isect,
  45. int local_object,
  46. uint *lcg_state,
  47. int max_hits)
  48. {
  49. /* todo:
  50. * - test if pushing distance on the stack helps (for non shadow rays)
  51. * - separate version for shadow rays
  52. * - likely and unlikely for if() statements
  53. * - test restrict attribute for pointers
  54. */
  55. /* traversal stack in CUDA thread-local memory */
  56. int traversal_stack[BVH_STACK_SIZE];
  57. traversal_stack[0] = ENTRYPOINT_SENTINEL;
  58. /* traversal variables in registers */
  59. int stack_ptr = 0;
  60. int node_addr = kernel_tex_fetch(__object_node, local_object);
  61. /* ray parameters in registers */
  62. float3 P = ray->P;
  63. float3 dir = bvh_clamp_direction(ray->D);
  64. float3 idir = bvh_inverse_direction(dir);
  65. int object = OBJECT_NONE;
  66. float isect_t = ray->t;
  67. if (local_isect != NULL) {
  68. local_isect->num_hits = 0;
  69. }
  70. kernel_assert((local_isect == NULL) == (max_hits == 0));
  71. const int object_flag = kernel_tex_fetch(__object_flag, local_object);
  72. if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
  73. #if BVH_FEATURE(BVH_MOTION)
  74. Transform ob_itfm;
  75. isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
  76. #else
  77. isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
  78. #endif
  79. object = local_object;
  80. }
  81. #if defined(__KERNEL_SSE2__)
  82. const shuffle_swap_t shuf_identity = shuffle_swap_identity();
  83. const shuffle_swap_t shuf_swap = shuffle_swap_swap();
  84. const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
  85. ssef Psplat[3], idirsplat[3];
  86. # if BVH_FEATURE(BVH_HAIR)
  87. ssef tnear(0.0f), tfar(isect_t);
  88. # endif
  89. shuffle_swap_t shufflexyz[3];
  90. Psplat[0] = ssef(P.x);
  91. Psplat[1] = ssef(P.y);
  92. Psplat[2] = ssef(P.z);
  93. ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
  94. gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
  95. #endif
  96. /* traversal loop */
  97. do {
  98. do {
  99. /* traverse internal nodes */
  100. while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
  101. int node_addr_child1, traverse_mask;
  102. float dist[2];
  103. float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
  104. #if !defined(__KERNEL_SSE2__)
  105. traverse_mask = NODE_INTERSECT(kg,
  106. P,
  107. # if BVH_FEATURE(BVH_HAIR)
  108. dir,
  109. # endif
  110. idir,
  111. isect_t,
  112. node_addr,
  113. PATH_RAY_ALL_VISIBILITY,
  114. dist);
  115. #else // __KERNEL_SSE2__
  116. traverse_mask = NODE_INTERSECT(kg,
  117. P,
  118. dir,
  119. # if BVH_FEATURE(BVH_HAIR)
  120. tnear,
  121. tfar,
  122. # endif
  123. tsplat,
  124. Psplat,
  125. idirsplat,
  126. shufflexyz,
  127. node_addr,
  128. PATH_RAY_ALL_VISIBILITY,
  129. dist);
  130. #endif // __KERNEL_SSE2__
  131. node_addr = __float_as_int(cnodes.z);
  132. node_addr_child1 = __float_as_int(cnodes.w);
  133. if (traverse_mask == 3) {
  134. /* Both children were intersected, push the farther one. */
  135. bool is_closest_child1 = (dist[1] < dist[0]);
  136. if (is_closest_child1) {
  137. int tmp = node_addr;
  138. node_addr = node_addr_child1;
  139. node_addr_child1 = tmp;
  140. }
  141. ++stack_ptr;
  142. kernel_assert(stack_ptr < BVH_STACK_SIZE);
  143. traversal_stack[stack_ptr] = node_addr_child1;
  144. }
  145. else {
  146. /* One child was intersected. */
  147. if (traverse_mask == 2) {
  148. node_addr = node_addr_child1;
  149. }
  150. else if (traverse_mask == 0) {
  151. /* Neither child was intersected. */
  152. node_addr = traversal_stack[stack_ptr];
  153. --stack_ptr;
  154. }
  155. }
  156. }
  157. /* if node is leaf, fetch triangle list */
  158. if (node_addr < 0) {
  159. float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
  160. int prim_addr = __float_as_int(leaf.x);
  161. const int prim_addr2 = __float_as_int(leaf.y);
  162. const uint type = __float_as_int(leaf.w);
  163. /* pop */
  164. node_addr = traversal_stack[stack_ptr];
  165. --stack_ptr;
  166. /* primitive intersection */
  167. switch (type & PRIMITIVE_ALL) {
  168. case PRIMITIVE_TRIANGLE: {
  169. /* intersect ray against primitive */
  170. for (; prim_addr < prim_addr2; prim_addr++) {
  171. kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
  172. if (triangle_intersect_local(kg,
  173. local_isect,
  174. P,
  175. dir,
  176. object,
  177. local_object,
  178. prim_addr,
  179. isect_t,
  180. lcg_state,
  181. max_hits)) {
  182. return true;
  183. }
  184. }
  185. break;
  186. }
  187. #if BVH_FEATURE(BVH_MOTION)
  188. case PRIMITIVE_MOTION_TRIANGLE: {
  189. /* intersect ray against primitive */
  190. for (; prim_addr < prim_addr2; prim_addr++) {
  191. kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
  192. if (motion_triangle_intersect_local(kg,
  193. local_isect,
  194. P,
  195. dir,
  196. ray->time,
  197. object,
  198. local_object,
  199. prim_addr,
  200. isect_t,
  201. lcg_state,
  202. max_hits)) {
  203. return true;
  204. }
  205. }
  206. break;
  207. }
  208. #endif
  209. default: {
  210. break;
  211. }
  212. }
  213. }
  214. } while (node_addr != ENTRYPOINT_SENTINEL);
  215. } while (node_addr != ENTRYPOINT_SENTINEL);
  216. return false;
  217. }
  218. ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
  219. const Ray *ray,
  220. LocalIntersection *local_isect,
  221. int local_object,
  222. uint *lcg_state,
  223. int max_hits)
  224. {
  225. switch (kernel_data.bvh.bvh_layout) {
  226. #ifdef __KERNEL_AVX2__
  227. case BVH_LAYOUT_BVH8:
  228. return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
  229. #endif
  230. #ifdef __QBVH__
  231. case BVH_LAYOUT_BVH4:
  232. return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
  233. #endif
  234. case BVH_LAYOUT_BVH2:
  235. return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
  236. }
  237. kernel_assert(!"Should not happen");
  238. return false;
  239. }
  240. #undef BVH_FUNCTION_NAME
  241. #undef BVH_FUNCTION_FEATURES
  242. #undef NODE_INTERSECT