bvh_shadow_all.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. /*
  2. * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
  3. * and code copyright 2009-2012 Intel Corporation
  4. *
  5. * Modifications Copyright 2011-2013, Blender Foundation.
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. */
  19. #ifdef __QBVH__
  20. # include "kernel/bvh/qbvh_shadow_all.h"
  21. # ifdef __KERNEL_AVX2__
  22. # include "kernel/bvh/obvh_shadow_all.h"
  23. # endif
  24. #endif
  25. #if BVH_FEATURE(BVH_HAIR)
  26. # define NODE_INTERSECT bvh_node_intersect
  27. #else
  28. # define NODE_INTERSECT bvh_aligned_node_intersect
  29. #endif
  30. /* This is a template BVH traversal function, where various features can be
  31. * enabled/disabled. This way we can compile optimized versions for each case
  32. * without new features slowing things down.
  33. *
  34. * BVH_INSTANCING: object instancing
  35. * BVH_HAIR: hair curve rendering
  36. * BVH_MOTION: motion blur rendering
  37. */
  38. #ifndef __KERNEL_GPU__
  39. ccl_device
  40. #else
  41. ccl_device_inline
  42. #endif
  43. bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
  44. const Ray *ray,
  45. Intersection *isect_array,
  46. const uint visibility,
  47. const uint max_hits,
  48. uint *num_hits)
  49. {
  50. /* todo:
  51. * - likely and unlikely for if() statements
  52. * - test restrict attribute for pointers
  53. */
  54. /* traversal stack in CUDA thread-local memory */
  55. int traversal_stack[BVH_STACK_SIZE];
  56. traversal_stack[0] = ENTRYPOINT_SENTINEL;
  57. /* traversal variables in registers */
  58. int stack_ptr = 0;
  59. int node_addr = kernel_data.bvh.root;
  60. /* ray parameters in registers */
  61. const float tmax = ray->t;
  62. float3 P = ray->P;
  63. float3 dir = bvh_clamp_direction(ray->D);
  64. float3 idir = bvh_inverse_direction(dir);
  65. int object = OBJECT_NONE;
  66. float isect_t = tmax;
  67. #if BVH_FEATURE(BVH_MOTION)
  68. Transform ob_itfm;
  69. #endif
  70. #if BVH_FEATURE(BVH_INSTANCING)
  71. int num_hits_in_instance = 0;
  72. #endif
  73. *num_hits = 0;
  74. isect_array->t = tmax;
  75. #if defined(__KERNEL_SSE2__)
  76. const shuffle_swap_t shuf_identity = shuffle_swap_identity();
  77. const shuffle_swap_t shuf_swap = shuffle_swap_swap();
  78. const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
  79. ssef Psplat[3], idirsplat[3];
  80. # if BVH_FEATURE(BVH_HAIR)
  81. ssef tnear(0.0f), tfar(isect_t);
  82. # endif
  83. shuffle_swap_t shufflexyz[3];
  84. Psplat[0] = ssef(P.x);
  85. Psplat[1] = ssef(P.y);
  86. Psplat[2] = ssef(P.z);
  87. ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
  88. gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
  89. #endif /* __KERNEL_SSE2__ */
  90. /* traversal loop */
  91. do {
  92. do {
  93. /* traverse internal nodes */
  94. while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
  95. int node_addr_child1, traverse_mask;
  96. float dist[2];
  97. float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
  98. #if !defined(__KERNEL_SSE2__)
  99. traverse_mask = NODE_INTERSECT(kg,
  100. P,
  101. # if BVH_FEATURE(BVH_HAIR)
  102. dir,
  103. # endif
  104. idir,
  105. isect_t,
  106. node_addr,
  107. visibility,
  108. dist);
  109. #else // __KERNEL_SSE2__
  110. traverse_mask = NODE_INTERSECT(kg,
  111. P,
  112. dir,
  113. # if BVH_FEATURE(BVH_HAIR)
  114. tnear,
  115. tfar,
  116. # endif
  117. tsplat,
  118. Psplat,
  119. idirsplat,
  120. shufflexyz,
  121. node_addr,
  122. visibility,
  123. dist);
  124. #endif // __KERNEL_SSE2__
  125. node_addr = __float_as_int(cnodes.z);
  126. node_addr_child1 = __float_as_int(cnodes.w);
  127. if (traverse_mask == 3) {
  128. /* Both children were intersected, push the farther one. */
  129. bool is_closest_child1 = (dist[1] < dist[0]);
  130. if (is_closest_child1) {
  131. int tmp = node_addr;
  132. node_addr = node_addr_child1;
  133. node_addr_child1 = tmp;
  134. }
  135. ++stack_ptr;
  136. kernel_assert(stack_ptr < BVH_STACK_SIZE);
  137. traversal_stack[stack_ptr] = node_addr_child1;
  138. }
  139. else {
  140. /* One child was intersected. */
  141. if (traverse_mask == 2) {
  142. node_addr = node_addr_child1;
  143. }
  144. else if (traverse_mask == 0) {
  145. /* Neither child was intersected. */
  146. node_addr = traversal_stack[stack_ptr];
  147. --stack_ptr;
  148. }
  149. }
  150. }
  151. /* if node is leaf, fetch triangle list */
  152. if (node_addr < 0) {
  153. float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
  154. int prim_addr = __float_as_int(leaf.x);
  155. #if BVH_FEATURE(BVH_INSTANCING)
  156. if (prim_addr >= 0) {
  157. #endif
  158. const int prim_addr2 = __float_as_int(leaf.y);
  159. const uint type = __float_as_int(leaf.w);
  160. const uint p_type = type & PRIMITIVE_ALL;
  161. /* pop */
  162. node_addr = traversal_stack[stack_ptr];
  163. --stack_ptr;
  164. /* primitive intersection */
  165. while (prim_addr < prim_addr2) {
  166. kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
  167. bool hit;
  168. /* todo: specialized intersect functions which don't fill in
  169. * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
  170. * might give a few % performance improvement */
  171. switch (p_type) {
  172. case PRIMITIVE_TRIANGLE: {
  173. hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
  174. break;
  175. }
  176. #if BVH_FEATURE(BVH_MOTION)
  177. case PRIMITIVE_MOTION_TRIANGLE: {
  178. hit = motion_triangle_intersect(
  179. kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
  180. break;
  181. }
  182. #endif
  183. #if BVH_FEATURE(BVH_HAIR)
  184. case PRIMITIVE_CURVE:
  185. case PRIMITIVE_MOTION_CURVE: {
  186. const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
  187. if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
  188. hit = cardinal_curve_intersect(kg,
  189. isect_array,
  190. P,
  191. dir,
  192. visibility,
  193. object,
  194. prim_addr,
  195. ray->time,
  196. curve_type);
  197. }
  198. else {
  199. hit = curve_intersect(kg,
  200. isect_array,
  201. P,
  202. dir,
  203. visibility,
  204. object,
  205. prim_addr,
  206. ray->time,
  207. curve_type);
  208. }
  209. break;
  210. }
  211. #endif
  212. default: {
  213. hit = false;
  214. break;
  215. }
  216. }
  217. /* shadow ray early termination */
  218. if (hit) {
  219. /* detect if this surface has a shader with transparent shadows */
  220. /* todo: optimize so primitive visibility flag indicates if
  221. * the primitive has a transparent shadow shader? */
  222. int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
  223. int shader = 0;
  224. #ifdef __HAIR__
  225. if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
  226. #endif
  227. {
  228. shader = kernel_tex_fetch(__tri_shader, prim);
  229. }
  230. #ifdef __HAIR__
  231. else {
  232. float4 str = kernel_tex_fetch(__curves, prim);
  233. shader = __float_as_int(str.z);
  234. }
  235. #endif
  236. int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
  237. /* if no transparent shadows, all light is blocked */
  238. if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
  239. return true;
  240. }
  241. /* if maximum number of hits reached, block all light */
  242. else if (*num_hits == max_hits) {
  243. return true;
  244. }
  245. /* move on to next entry in intersections array */
  246. isect_array++;
  247. (*num_hits)++;
  248. #if BVH_FEATURE(BVH_INSTANCING)
  249. num_hits_in_instance++;
  250. #endif
  251. isect_array->t = isect_t;
  252. }
  253. prim_addr++;
  254. }
  255. }
  256. #if BVH_FEATURE(BVH_INSTANCING)
  257. else {
  258. /* instance push */
  259. object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
  260. # if BVH_FEATURE(BVH_MOTION)
  261. isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
  262. # else
  263. isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
  264. # endif
  265. num_hits_in_instance = 0;
  266. isect_array->t = isect_t;
  267. # if defined(__KERNEL_SSE2__)
  268. Psplat[0] = ssef(P.x);
  269. Psplat[1] = ssef(P.y);
  270. Psplat[2] = ssef(P.z);
  271. tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
  272. # if BVH_FEATURE(BVH_HAIR)
  273. tfar = ssef(isect_t);
  274. # endif
  275. gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
  276. # endif
  277. ++stack_ptr;
  278. kernel_assert(stack_ptr < BVH_STACK_SIZE);
  279. traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
  280. node_addr = kernel_tex_fetch(__object_node, object);
  281. }
  282. }
  283. #endif /* FEATURE(BVH_INSTANCING) */
  284. } while (node_addr != ENTRYPOINT_SENTINEL);
  285. #if BVH_FEATURE(BVH_INSTANCING)
  286. if (stack_ptr >= 0) {
  287. kernel_assert(object != OBJECT_NONE);
  288. /* Instance pop. */
  289. if (num_hits_in_instance) {
  290. float t_fac;
  291. # if BVH_FEATURE(BVH_MOTION)
  292. bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
  293. # else
  294. bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
  295. # endif
  296. /* scale isect->t to adjust for instancing */
  297. for (int i = 0; i < num_hits_in_instance; i++) {
  298. (isect_array - i - 1)->t *= t_fac;
  299. }
  300. }
  301. else {
  302. # if BVH_FEATURE(BVH_MOTION)
  303. bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
  304. # else
  305. bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
  306. # endif
  307. }
  308. isect_t = tmax;
  309. isect_array->t = isect_t;
  310. # if defined(__KERNEL_SSE2__)
  311. Psplat[0] = ssef(P.x);
  312. Psplat[1] = ssef(P.y);
  313. Psplat[2] = ssef(P.z);
  314. tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
  315. # if BVH_FEATURE(BVH_HAIR)
  316. tfar = ssef(isect_t);
  317. # endif
  318. gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
  319. # endif
  320. object = OBJECT_NONE;
  321. node_addr = traversal_stack[stack_ptr];
  322. --stack_ptr;
  323. }
  324. #endif /* FEATURE(BVH_INSTANCING) */
  325. } while (node_addr != ENTRYPOINT_SENTINEL);
  326. return false;
  327. }
  328. ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
  329. const Ray *ray,
  330. Intersection *isect_array,
  331. const uint visibility,
  332. const uint max_hits,
  333. uint *num_hits)
  334. {
  335. switch (kernel_data.bvh.bvh_layout) {
  336. #ifdef __KERNEL_AVX2__
  337. case BVH_LAYOUT_BVH8:
  338. return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
  339. #endif
  340. #ifdef __QBVH__
  341. case BVH_LAYOUT_BVH4:
  342. return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
  343. #endif
  344. case BVH_LAYOUT_BVH2:
  345. return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
  346. }
  347. kernel_assert(!"Should not happen");
  348. return false;
  349. }
  350. #undef BVH_FUNCTION_NAME
  351. #undef BVH_FUNCTION_FEATURES
  352. #undef NODE_INTERSECT