bvh_volume_all.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. /*
  2. * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
  3. * and code copyright 2009-2012 Intel Corporation
  4. *
  5. * Modifications Copyright 2011-2014, Blender Foundation.
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. */
  19. #ifdef __QBVH__
  20. # include "kernel/bvh/qbvh_volume_all.h"
  21. # ifdef __KERNEL_AVX2__
  22. # include "kernel/bvh/obvh_volume_all.h"
  23. # endif
  24. #endif
  25. #if BVH_FEATURE(BVH_HAIR)
  26. # define NODE_INTERSECT bvh_node_intersect
  27. #else
  28. # define NODE_INTERSECT bvh_aligned_node_intersect
  29. #endif
  30. /* This is a template BVH traversal function for volumes, where
  31. * various features can be enabled/disabled. This way we can compile optimized
  32. * versions for each case without new features slowing things down.
  33. *
  34. * BVH_INSTANCING: object instancing
  35. * BVH_MOTION: motion blur rendering
  36. */
  37. #ifndef __KERNEL_GPU__
  38. ccl_device
  39. #else
  40. ccl_device_inline
  41. #endif
  42. uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
  43. const Ray *ray,
  44. Intersection *isect_array,
  45. const uint max_hits,
  46. const uint visibility)
  47. {
  48. /* todo:
  49. * - test if pushing distance on the stack helps (for non shadow rays)
  50. * - separate version for shadow rays
  51. * - likely and unlikely for if() statements
  52. * - test restrict attribute for pointers
  53. */
  54. /* traversal stack in CUDA thread-local memory */
  55. int traversal_stack[BVH_STACK_SIZE];
  56. traversal_stack[0] = ENTRYPOINT_SENTINEL;
  57. /* traversal variables in registers */
  58. int stack_ptr = 0;
  59. int node_addr = kernel_data.bvh.root;
  60. /* ray parameters in registers */
  61. const float tmax = ray->t;
  62. float3 P = ray->P;
  63. float3 dir = bvh_clamp_direction(ray->D);
  64. float3 idir = bvh_inverse_direction(dir);
  65. int object = OBJECT_NONE;
  66. float isect_t = tmax;
  67. #if BVH_FEATURE(BVH_MOTION)
  68. Transform ob_itfm;
  69. #endif
  70. #if BVH_FEATURE(BVH_INSTANCING)
  71. int num_hits_in_instance = 0;
  72. #endif
  73. uint num_hits = 0;
  74. isect_array->t = tmax;
  75. #if defined(__KERNEL_SSE2__)
  76. const shuffle_swap_t shuf_identity = shuffle_swap_identity();
  77. const shuffle_swap_t shuf_swap = shuffle_swap_swap();
  78. const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
  79. ssef Psplat[3], idirsplat[3];
  80. # if BVH_FEATURE(BVH_HAIR)
  81. ssef tnear(0.0f), tfar(isect_t);
  82. # endif
  83. shuffle_swap_t shufflexyz[3];
  84. Psplat[0] = ssef(P.x);
  85. Psplat[1] = ssef(P.y);
  86. Psplat[2] = ssef(P.z);
  87. ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
  88. gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
  89. #endif /* __KERNEL_SSE2__ */
  90. /* traversal loop */
  91. do {
  92. do {
  93. /* traverse internal nodes */
  94. while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
  95. int node_addr_child1, traverse_mask;
  96. float dist[2];
  97. float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
  98. #if !defined(__KERNEL_SSE2__)
  99. traverse_mask = NODE_INTERSECT(kg,
  100. P,
  101. # if BVH_FEATURE(BVH_HAIR)
  102. dir,
  103. # endif
  104. idir,
  105. isect_t,
  106. node_addr,
  107. visibility,
  108. dist);
  109. #else // __KERNEL_SSE2__
  110. traverse_mask = NODE_INTERSECT(kg,
  111. P,
  112. dir,
  113. # if BVH_FEATURE(BVH_HAIR)
  114. tnear,
  115. tfar,
  116. # endif
  117. tsplat,
  118. Psplat,
  119. idirsplat,
  120. shufflexyz,
  121. node_addr,
  122. visibility,
  123. dist);
  124. #endif // __KERNEL_SSE2__
  125. node_addr = __float_as_int(cnodes.z);
  126. node_addr_child1 = __float_as_int(cnodes.w);
  127. if (traverse_mask == 3) {
  128. /* Both children were intersected, push the farther one. */
  129. bool is_closest_child1 = (dist[1] < dist[0]);
  130. if (is_closest_child1) {
  131. int tmp = node_addr;
  132. node_addr = node_addr_child1;
  133. node_addr_child1 = tmp;
  134. }
  135. ++stack_ptr;
  136. kernel_assert(stack_ptr < BVH_STACK_SIZE);
  137. traversal_stack[stack_ptr] = node_addr_child1;
  138. }
  139. else {
  140. /* One child was intersected. */
  141. if (traverse_mask == 2) {
  142. node_addr = node_addr_child1;
  143. }
  144. else if (traverse_mask == 0) {
  145. /* Neither child was intersected. */
  146. node_addr = traversal_stack[stack_ptr];
  147. --stack_ptr;
  148. }
  149. }
  150. }
  151. /* if node is leaf, fetch triangle list */
  152. if (node_addr < 0) {
  153. float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
  154. int prim_addr = __float_as_int(leaf.x);
  155. #if BVH_FEATURE(BVH_INSTANCING)
  156. if (prim_addr >= 0) {
  157. #endif
  158. const int prim_addr2 = __float_as_int(leaf.y);
  159. const uint type = __float_as_int(leaf.w);
  160. bool hit;
  161. /* pop */
  162. node_addr = traversal_stack[stack_ptr];
  163. --stack_ptr;
  164. /* primitive intersection */
  165. switch (type & PRIMITIVE_ALL) {
  166. case PRIMITIVE_TRIANGLE: {
  167. /* intersect ray against primitive */
  168. for (; prim_addr < prim_addr2; prim_addr++) {
  169. kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
  170. /* only primitives from volume object */
  171. uint tri_object = (object == OBJECT_NONE) ?
  172. kernel_tex_fetch(__prim_object, prim_addr) :
  173. object;
  174. int object_flag = kernel_tex_fetch(__object_flag, tri_object);
  175. if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
  176. continue;
  177. }
  178. hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
  179. if (hit) {
  180. /* Move on to next entry in intersections array. */
  181. isect_array++;
  182. num_hits++;
  183. #if BVH_FEATURE(BVH_INSTANCING)
  184. num_hits_in_instance++;
  185. #endif
  186. isect_array->t = isect_t;
  187. if (num_hits == max_hits) {
  188. #if BVH_FEATURE(BVH_INSTANCING)
  189. if (object != OBJECT_NONE) {
  190. # if BVH_FEATURE(BVH_MOTION)
  191. float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
  192. # else
  193. Transform itfm = object_fetch_transform(
  194. kg, object, OBJECT_INVERSE_TRANSFORM);
  195. float t_fac = 1.0f / len(transform_direction(&itfm, dir));
  196. # endif
  197. for (int i = 0; i < num_hits_in_instance; i++) {
  198. (isect_array - i - 1)->t *= t_fac;
  199. }
  200. }
  201. #endif /* BVH_FEATURE(BVH_INSTANCING) */
  202. return num_hits;
  203. }
  204. }
  205. }
  206. break;
  207. }
  208. #if BVH_FEATURE(BVH_MOTION)
  209. case PRIMITIVE_MOTION_TRIANGLE: {
  210. /* intersect ray against primitive */
  211. for (; prim_addr < prim_addr2; prim_addr++) {
  212. kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
  213. /* only primitives from volume object */
  214. uint tri_object = (object == OBJECT_NONE) ?
  215. kernel_tex_fetch(__prim_object, prim_addr) :
  216. object;
  217. int object_flag = kernel_tex_fetch(__object_flag, tri_object);
  218. if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
  219. continue;
  220. }
  221. hit = motion_triangle_intersect(
  222. kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
  223. if (hit) {
  224. /* Move on to next entry in intersections array. */
  225. isect_array++;
  226. num_hits++;
  227. # if BVH_FEATURE(BVH_INSTANCING)
  228. num_hits_in_instance++;
  229. # endif
  230. isect_array->t = isect_t;
  231. if (num_hits == max_hits) {
  232. # if BVH_FEATURE(BVH_INSTANCING)
  233. if (object != OBJECT_NONE) {
  234. # if BVH_FEATURE(BVH_MOTION)
  235. float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
  236. # else
  237. Transform itfm = object_fetch_transform(
  238. kg, object, OBJECT_INVERSE_TRANSFORM);
  239. float t_fac = 1.0f / len(transform_direction(&itfm, dir));
  240. # endif
  241. for (int i = 0; i < num_hits_in_instance; i++) {
  242. (isect_array - i - 1)->t *= t_fac;
  243. }
  244. }
  245. # endif /* BVH_FEATURE(BVH_INSTANCING) */
  246. return num_hits;
  247. }
  248. }
  249. }
  250. break;
  251. }
  252. #endif /* BVH_MOTION */
  253. default: {
  254. break;
  255. }
  256. }
  257. }
  258. #if BVH_FEATURE(BVH_INSTANCING)
  259. else {
  260. /* instance push */
  261. object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
  262. int object_flag = kernel_tex_fetch(__object_flag, object);
  263. if (object_flag & SD_OBJECT_HAS_VOLUME) {
  264. # if BVH_FEATURE(BVH_MOTION)
  265. isect_t = bvh_instance_motion_push(
  266. kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
  267. # else
  268. isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
  269. # endif
  270. num_hits_in_instance = 0;
  271. isect_array->t = isect_t;
  272. # if defined(__KERNEL_SSE2__)
  273. Psplat[0] = ssef(P.x);
  274. Psplat[1] = ssef(P.y);
  275. Psplat[2] = ssef(P.z);
  276. tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
  277. # if BVH_FEATURE(BVH_HAIR)
  278. tfar = ssef(isect_t);
  279. # endif
  280. gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
  281. # endif
  282. ++stack_ptr;
  283. kernel_assert(stack_ptr < BVH_STACK_SIZE);
  284. traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
  285. node_addr = kernel_tex_fetch(__object_node, object);
  286. }
  287. else {
  288. /* pop */
  289. object = OBJECT_NONE;
  290. node_addr = traversal_stack[stack_ptr];
  291. --stack_ptr;
  292. }
  293. }
  294. }
  295. #endif /* FEATURE(BVH_INSTANCING) */
  296. } while (node_addr != ENTRYPOINT_SENTINEL);
  297. #if BVH_FEATURE(BVH_INSTANCING)
  298. if (stack_ptr >= 0) {
  299. kernel_assert(object != OBJECT_NONE);
  300. /* Instance pop. */
  301. if (num_hits_in_instance) {
  302. float t_fac;
  303. # if BVH_FEATURE(BVH_MOTION)
  304. bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
  305. # else
  306. bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
  307. # endif
  308. /* Scale isect->t to adjust for instancing. */
  309. for (int i = 0; i < num_hits_in_instance; i++) {
  310. (isect_array - i - 1)->t *= t_fac;
  311. }
  312. }
  313. else {
  314. # if BVH_FEATURE(BVH_MOTION)
  315. bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
  316. # else
  317. bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
  318. # endif
  319. }
  320. isect_t = tmax;
  321. isect_array->t = isect_t;
  322. # if defined(__KERNEL_SSE2__)
  323. Psplat[0] = ssef(P.x);
  324. Psplat[1] = ssef(P.y);
  325. Psplat[2] = ssef(P.z);
  326. tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
  327. # if BVH_FEATURE(BVH_HAIR)
  328. tfar = ssef(isect_t);
  329. # endif
  330. gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
  331. # endif
  332. object = OBJECT_NONE;
  333. node_addr = traversal_stack[stack_ptr];
  334. --stack_ptr;
  335. }
  336. #endif /* FEATURE(BVH_INSTANCING) */
  337. } while (node_addr != ENTRYPOINT_SENTINEL);
  338. return num_hits;
  339. }
  340. ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
  341. const Ray *ray,
  342. Intersection *isect_array,
  343. const uint max_hits,
  344. const uint visibility)
  345. {
  346. switch (kernel_data.bvh.bvh_layout) {
  347. #ifdef __KERNEL_AVX2__
  348. case BVH_LAYOUT_BVH8:
  349. return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect_array, max_hits, visibility);
  350. #endif
  351. #ifdef __QBVH__
  352. case BVH_LAYOUT_BVH4:
  353. return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect_array, max_hits, visibility);
  354. #endif
  355. case BVH_LAYOUT_BVH2:
  356. return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, max_hits, visibility);
  357. }
  358. kernel_assert(!"Should not happen");
  359. return 0;
  360. }
  361. #undef BVH_FUNCTION_NAME
  362. #undef BVH_FUNCTION_FEATURES
  363. #undef NODE_INTERSECT