qbvh_traversal.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. /*
  2. * Copyright 2011-2013 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /* This is a template BVH traversal function, where various features can be
  17. * enabled/disabled. This way we can compile optimized versions for each case
  18. * without new features slowing things down.
  19. *
  20. * BVH_INSTANCING: object instancing
  21. * BVH_HAIR: hair curve rendering
  22. * BVH_MOTION: motion blur rendering
  23. */
  24. #if BVH_FEATURE(BVH_HAIR)
  25. # define NODE_INTERSECT qbvh_node_intersect
  26. #else
  27. # define NODE_INTERSECT qbvh_aligned_node_intersect
  28. #endif
  29. ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
  30. const Ray *ray,
  31. Intersection *isect,
  32. const uint visibility)
  33. {
  34. /* TODO(sergey):
  35. * - Test if pushing distance on the stack helps (for non shadow rays).
  36. * - Separate version for shadow rays.
  37. * - Likely and unlikely for if() statements.
  38. * - Test restrict attribute for pointers.
  39. */
  40. /* Traversal stack in CUDA thread-local memory. */
  41. QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
  42. traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
  43. traversal_stack[0].dist = -FLT_MAX;
  44. /* Traversal variables in registers. */
  45. int stack_ptr = 0;
  46. int node_addr = kernel_data.bvh.root;
  47. float node_dist = -FLT_MAX;
  48. /* Ray parameters in registers. */
  49. float3 P = ray->P;
  50. float3 dir = bvh_clamp_direction(ray->D);
  51. float3 idir = bvh_inverse_direction(dir);
  52. int object = OBJECT_NONE;
  53. #if BVH_FEATURE(BVH_MOTION)
  54. Transform ob_itfm;
  55. #endif
  56. isect->t = ray->t;
  57. isect->u = 0.0f;
  58. isect->v = 0.0f;
  59. isect->prim = PRIM_NONE;
  60. isect->object = OBJECT_NONE;
  61. BVH_DEBUG_INIT();
  62. ssef tnear(0.0f), tfar(ray->t);
  63. #if BVH_FEATURE(BVH_HAIR)
  64. sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
  65. #endif
  66. sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
  67. #ifdef __KERNEL_AVX2__
  68. float3 P_idir = P * idir;
  69. sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
  70. #endif
  71. #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  72. sse3f org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
  73. #endif
  74. /* Offsets to select the side that becomes the lower or upper bound. */
  75. int near_x, near_y, near_z;
  76. int far_x, far_y, far_z;
  77. qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
  78. /* Traversal loop. */
  79. do {
  80. do {
  81. /* Traverse internal nodes. */
  82. while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
  83. float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
  84. (void)inodes;
  85. if (UNLIKELY(node_dist > isect->t)
  86. #if BVH_FEATURE(BVH_MOTION)
  87. || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
  88. #endif
  89. #ifdef __VISIBILITY_FLAG__
  90. || (__float_as_uint(inodes.x) & visibility) == 0
  91. #endif
  92. ) {
  93. /* Pop. */
  94. node_addr = traversal_stack[stack_ptr].addr;
  95. node_dist = traversal_stack[stack_ptr].dist;
  96. --stack_ptr;
  97. continue;
  98. }
  99. int child_mask;
  100. ssef dist;
  101. BVH_DEBUG_NEXT_NODE();
  102. {
  103. child_mask = NODE_INTERSECT(kg,
  104. tnear,
  105. tfar,
  106. #ifdef __KERNEL_AVX2__
  107. P_idir4,
  108. #endif
  109. #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  110. org4,
  111. #endif
  112. #if BVH_FEATURE(BVH_HAIR)
  113. dir4,
  114. #endif
  115. idir4,
  116. near_x,
  117. near_y,
  118. near_z,
  119. far_x,
  120. far_y,
  121. far_z,
  122. node_addr,
  123. &dist);
  124. }
  125. if (child_mask != 0) {
  126. float4 cnodes;
  127. /* TODO(sergey): Investigate whether moving cnodes upwards
  128. * gives a speedup (will be different cache pattern but will
  129. * avoid extra check here).
  130. */
  131. #if BVH_FEATURE(BVH_HAIR)
  132. if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
  133. cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
  134. }
  135. else
  136. #endif
  137. {
  138. cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
  139. }
  140. /* One child is hit, continue with that child. */
  141. int r = __bscf(child_mask);
  142. float d0 = ((float *)&dist)[r];
  143. if (child_mask == 0) {
  144. node_addr = __float_as_int(cnodes[r]);
  145. node_dist = d0;
  146. continue;
  147. }
  148. /* Two children are hit, push far child, and continue with
  149. * closer child.
  150. */
  151. int c0 = __float_as_int(cnodes[r]);
  152. r = __bscf(child_mask);
  153. int c1 = __float_as_int(cnodes[r]);
  154. float d1 = ((float *)&dist)[r];
  155. if (child_mask == 0) {
  156. if (d1 < d0) {
  157. node_addr = c1;
  158. node_dist = d1;
  159. ++stack_ptr;
  160. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  161. traversal_stack[stack_ptr].addr = c0;
  162. traversal_stack[stack_ptr].dist = d0;
  163. continue;
  164. }
  165. else {
  166. node_addr = c0;
  167. node_dist = d0;
  168. ++stack_ptr;
  169. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  170. traversal_stack[stack_ptr].addr = c1;
  171. traversal_stack[stack_ptr].dist = d1;
  172. continue;
  173. }
  174. }
  175. /* Here starts the slow path for 3 or 4 hit children. We push
  176. * all nodes onto the stack to sort them there.
  177. */
  178. ++stack_ptr;
  179. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  180. traversal_stack[stack_ptr].addr = c1;
  181. traversal_stack[stack_ptr].dist = d1;
  182. ++stack_ptr;
  183. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  184. traversal_stack[stack_ptr].addr = c0;
  185. traversal_stack[stack_ptr].dist = d0;
  186. /* Three children are hit, push all onto stack and sort 3
  187. * stack items, continue with closest child.
  188. */
  189. r = __bscf(child_mask);
  190. int c2 = __float_as_int(cnodes[r]);
  191. float d2 = ((float *)&dist)[r];
  192. if (child_mask == 0) {
  193. ++stack_ptr;
  194. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  195. traversal_stack[stack_ptr].addr = c2;
  196. traversal_stack[stack_ptr].dist = d2;
  197. qbvh_stack_sort(&traversal_stack[stack_ptr],
  198. &traversal_stack[stack_ptr - 1],
  199. &traversal_stack[stack_ptr - 2]);
  200. node_addr = traversal_stack[stack_ptr].addr;
  201. node_dist = traversal_stack[stack_ptr].dist;
  202. --stack_ptr;
  203. continue;
  204. }
  205. /* Four children are hit, push all onto stack and sort 4
  206. * stack items, continue with closest child.
  207. */
  208. r = __bscf(child_mask);
  209. int c3 = __float_as_int(cnodes[r]);
  210. float d3 = ((float *)&dist)[r];
  211. ++stack_ptr;
  212. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  213. traversal_stack[stack_ptr].addr = c3;
  214. traversal_stack[stack_ptr].dist = d3;
  215. ++stack_ptr;
  216. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  217. traversal_stack[stack_ptr].addr = c2;
  218. traversal_stack[stack_ptr].dist = d2;
  219. qbvh_stack_sort(&traversal_stack[stack_ptr],
  220. &traversal_stack[stack_ptr - 1],
  221. &traversal_stack[stack_ptr - 2],
  222. &traversal_stack[stack_ptr - 3]);
  223. }
  224. node_addr = traversal_stack[stack_ptr].addr;
  225. node_dist = traversal_stack[stack_ptr].dist;
  226. --stack_ptr;
  227. }
  228. /* If node is leaf, fetch triangle list. */
  229. if (node_addr < 0) {
  230. float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
  231. #ifdef __VISIBILITY_FLAG__
  232. if (UNLIKELY((node_dist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
  233. #else
  234. if (UNLIKELY((node_dist > isect->t)))
  235. #endif
  236. {
  237. /* Pop. */
  238. node_addr = traversal_stack[stack_ptr].addr;
  239. node_dist = traversal_stack[stack_ptr].dist;
  240. --stack_ptr;
  241. continue;
  242. }
  243. int prim_addr = __float_as_int(leaf.x);
  244. #if BVH_FEATURE(BVH_INSTANCING)
  245. if (prim_addr >= 0) {
  246. #endif
  247. int prim_addr2 = __float_as_int(leaf.y);
  248. const uint type = __float_as_int(leaf.w);
  249. /* Pop. */
  250. node_addr = traversal_stack[stack_ptr].addr;
  251. node_dist = traversal_stack[stack_ptr].dist;
  252. --stack_ptr;
  253. /* Primitive intersection. */
  254. switch (type & PRIMITIVE_ALL) {
  255. case PRIMITIVE_TRIANGLE: {
  256. for (; prim_addr < prim_addr2; prim_addr++) {
  257. BVH_DEBUG_NEXT_INTERSECTION();
  258. kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
  259. if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
  260. tfar = ssef(isect->t);
  261. /* Shadow ray early termination. */
  262. if (visibility & PATH_RAY_SHADOW_OPAQUE) {
  263. return true;
  264. }
  265. }
  266. }
  267. break;
  268. }
  269. #if BVH_FEATURE(BVH_MOTION)
  270. case PRIMITIVE_MOTION_TRIANGLE: {
  271. for (; prim_addr < prim_addr2; prim_addr++) {
  272. BVH_DEBUG_NEXT_INTERSECTION();
  273. kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
  274. if (motion_triangle_intersect(
  275. kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
  276. tfar = ssef(isect->t);
  277. /* Shadow ray early termination. */
  278. if (visibility & PATH_RAY_SHADOW_OPAQUE) {
  279. return true;
  280. }
  281. }
  282. }
  283. break;
  284. }
  285. #endif /* BVH_FEATURE(BVH_MOTION) */
  286. #if BVH_FEATURE(BVH_HAIR)
  287. case PRIMITIVE_CURVE:
  288. case PRIMITIVE_MOTION_CURVE: {
  289. for (; prim_addr < prim_addr2; prim_addr++) {
  290. BVH_DEBUG_NEXT_INTERSECTION();
  291. const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
  292. kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
  293. bool hit;
  294. if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
  295. hit = cardinal_curve_intersect(
  296. kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type);
  297. }
  298. else {
  299. hit = curve_intersect(
  300. kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type);
  301. }
  302. if (hit) {
  303. tfar = ssef(isect->t);
  304. /* Shadow ray early termination. */
  305. if (visibility & PATH_RAY_SHADOW_OPAQUE) {
  306. return true;
  307. }
  308. }
  309. }
  310. break;
  311. }
  312. #endif /* BVH_FEATURE(BVH_HAIR) */
  313. }
  314. }
  315. #if BVH_FEATURE(BVH_INSTANCING)
  316. else {
  317. /* Instance push. */
  318. object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
  319. # if BVH_FEATURE(BVH_MOTION)
  320. qbvh_instance_motion_push(
  321. kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
  322. # else
  323. qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
  324. # endif
  325. qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
  326. tfar = ssef(isect->t);
  327. # if BVH_FEATURE(BVH_HAIR)
  328. dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
  329. # endif
  330. idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
  331. # ifdef __KERNEL_AVX2__
  332. P_idir = P * idir;
  333. P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
  334. # endif
  335. # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  336. org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
  337. # endif
  338. ++stack_ptr;
  339. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  340. traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
  341. traversal_stack[stack_ptr].dist = -FLT_MAX;
  342. node_addr = kernel_tex_fetch(__object_node, object);
  343. BVH_DEBUG_NEXT_INSTANCE();
  344. }
  345. }
  346. #endif /* FEATURE(BVH_INSTANCING) */
  347. } while (node_addr != ENTRYPOINT_SENTINEL);
  348. #if BVH_FEATURE(BVH_INSTANCING)
  349. if (stack_ptr >= 0) {
  350. kernel_assert(object != OBJECT_NONE);
  351. /* Instance pop. */
  352. # if BVH_FEATURE(BVH_MOTION)
  353. isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
  354. # else
  355. isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
  356. # endif
  357. qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
  358. tfar = ssef(isect->t);
  359. # if BVH_FEATURE(BVH_HAIR)
  360. dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
  361. # endif
  362. idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
  363. # ifdef __KERNEL_AVX2__
  364. P_idir = P * idir;
  365. P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
  366. # endif
  367. # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  368. org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
  369. # endif
  370. object = OBJECT_NONE;
  371. node_addr = traversal_stack[stack_ptr].addr;
  372. node_dist = traversal_stack[stack_ptr].dist;
  373. --stack_ptr;
  374. }
  375. #endif /* FEATURE(BVH_INSTANCING) */
  376. } while (node_addr != ENTRYPOINT_SENTINEL);
  377. return (isect->prim != PRIM_NONE);
  378. }
  379. #undef NODE_INTERSECT