qbvh_volume.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. /*
  2. * Copyright 2011-2013 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /* This is a template BVH traversal function for volumes, where
  17. * various features can be enabled/disabled. This way we can compile optimized
  18. * versions for each case without new features slowing things down.
  19. *
  20. * BVH_INSTANCING: object instancing
  21. * BVH_MOTION: motion blur rendering
  22. */
  23. #if BVH_FEATURE(BVH_HAIR)
  24. # define NODE_INTERSECT qbvh_node_intersect
  25. #else
  26. # define NODE_INTERSECT qbvh_aligned_node_intersect
  27. #endif
  28. ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
  29. const Ray *ray,
  30. Intersection *isect,
  31. const uint visibility)
  32. {
  33. /* TODO(sergey):
  34. * - Test if pushing distance on the stack helps.
  35. * - Likely and unlikely for if() statements.
  36. * - Test restrict attribute for pointers.
  37. */
  38. /* Traversal stack in CUDA thread-local memory. */
  39. QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
  40. traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
  41. /* Traversal variables in registers. */
  42. int stack_ptr = 0;
  43. int node_addr = kernel_data.bvh.root;
  44. /* Ray parameters in registers. */
  45. float3 P = ray->P;
  46. float3 dir = bvh_clamp_direction(ray->D);
  47. float3 idir = bvh_inverse_direction(dir);
  48. int object = OBJECT_NONE;
  49. #if BVH_FEATURE(BVH_MOTION)
  50. Transform ob_itfm;
  51. #endif
  52. isect->t = ray->t;
  53. isect->u = 0.0f;
  54. isect->v = 0.0f;
  55. isect->prim = PRIM_NONE;
  56. isect->object = OBJECT_NONE;
  57. ssef tnear(0.0f), tfar(ray->t);
  58. #if BVH_FEATURE(BVH_HAIR)
  59. sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
  60. #endif
  61. sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
  62. #ifdef __KERNEL_AVX2__
  63. float3 P_idir = P * idir;
  64. sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
  65. #endif
  66. #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  67. sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
  68. #endif
  69. /* Offsets to select the side that becomes the lower or upper bound. */
  70. int near_x, near_y, near_z;
  71. int far_x, far_y, far_z;
  72. qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
  73. /* Traversal loop. */
  74. do {
  75. do {
  76. /* Traverse internal nodes. */
  77. while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
  78. float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
  79. #ifdef __VISIBILITY_FLAG__
  80. if ((__float_as_uint(inodes.x) & visibility) == 0) {
  81. /* Pop. */
  82. node_addr = traversal_stack[stack_ptr].addr;
  83. --stack_ptr;
  84. continue;
  85. }
  86. #endif
  87. ssef dist;
  88. int child_mask = NODE_INTERSECT(kg,
  89. tnear,
  90. tfar,
  91. #ifdef __KERNEL_AVX2__
  92. P_idir4,
  93. #endif
  94. #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  95. org4,
  96. #endif
  97. #if BVH_FEATURE(BVH_HAIR)
  98. dir4,
  99. #endif
  100. idir4,
  101. near_x,
  102. near_y,
  103. near_z,
  104. far_x,
  105. far_y,
  106. far_z,
  107. node_addr,
  108. &dist);
  109. if (child_mask != 0) {
  110. float4 cnodes;
  111. #if BVH_FEATURE(BVH_HAIR)
  112. if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
  113. cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
  114. }
  115. else
  116. #endif
  117. {
  118. cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
  119. }
  120. /* One child is hit, continue with that child. */
  121. int r = __bscf(child_mask);
  122. if (child_mask == 0) {
  123. node_addr = __float_as_int(cnodes[r]);
  124. continue;
  125. }
  126. /* Two children are hit, push far child, and continue with
  127. * closer child.
  128. */
  129. int c0 = __float_as_int(cnodes[r]);
  130. float d0 = ((float *)&dist)[r];
  131. r = __bscf(child_mask);
  132. int c1 = __float_as_int(cnodes[r]);
  133. float d1 = ((float *)&dist)[r];
  134. if (child_mask == 0) {
  135. if (d1 < d0) {
  136. node_addr = c1;
  137. ++stack_ptr;
  138. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  139. traversal_stack[stack_ptr].addr = c0;
  140. traversal_stack[stack_ptr].dist = d0;
  141. continue;
  142. }
  143. else {
  144. node_addr = c0;
  145. ++stack_ptr;
  146. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  147. traversal_stack[stack_ptr].addr = c1;
  148. traversal_stack[stack_ptr].dist = d1;
  149. continue;
  150. }
  151. }
  152. /* Here starts the slow path for 3 or 4 hit children. We push
  153. * all nodes onto the stack to sort them there.
  154. */
  155. ++stack_ptr;
  156. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  157. traversal_stack[stack_ptr].addr = c1;
  158. traversal_stack[stack_ptr].dist = d1;
  159. ++stack_ptr;
  160. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  161. traversal_stack[stack_ptr].addr = c0;
  162. traversal_stack[stack_ptr].dist = d0;
  163. /* Three children are hit, push all onto stack and sort 3
  164. * stack items, continue with closest child.
  165. */
  166. r = __bscf(child_mask);
  167. int c2 = __float_as_int(cnodes[r]);
  168. float d2 = ((float *)&dist)[r];
  169. if (child_mask == 0) {
  170. ++stack_ptr;
  171. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  172. traversal_stack[stack_ptr].addr = c2;
  173. traversal_stack[stack_ptr].dist = d2;
  174. qbvh_stack_sort(&traversal_stack[stack_ptr],
  175. &traversal_stack[stack_ptr - 1],
  176. &traversal_stack[stack_ptr - 2]);
  177. node_addr = traversal_stack[stack_ptr].addr;
  178. --stack_ptr;
  179. continue;
  180. }
  181. /* Four children are hit, push all onto stack and sort 4
  182. * stack items, continue with closest child.
  183. */
  184. r = __bscf(child_mask);
  185. int c3 = __float_as_int(cnodes[r]);
  186. float d3 = ((float *)&dist)[r];
  187. ++stack_ptr;
  188. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  189. traversal_stack[stack_ptr].addr = c3;
  190. traversal_stack[stack_ptr].dist = d3;
  191. ++stack_ptr;
  192. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  193. traversal_stack[stack_ptr].addr = c2;
  194. traversal_stack[stack_ptr].dist = d2;
  195. qbvh_stack_sort(&traversal_stack[stack_ptr],
  196. &traversal_stack[stack_ptr - 1],
  197. &traversal_stack[stack_ptr - 2],
  198. &traversal_stack[stack_ptr - 3]);
  199. }
  200. node_addr = traversal_stack[stack_ptr].addr;
  201. --stack_ptr;
  202. }
  203. /* If node is leaf, fetch triangle list. */
  204. if (node_addr < 0) {
  205. float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
  206. if ((__float_as_uint(leaf.z) & visibility) == 0) {
  207. /* Pop. */
  208. node_addr = traversal_stack[stack_ptr].addr;
  209. --stack_ptr;
  210. continue;
  211. }
  212. int prim_addr = __float_as_int(leaf.x);
  213. #if BVH_FEATURE(BVH_INSTANCING)
  214. if (prim_addr >= 0) {
  215. #endif
  216. int prim_addr2 = __float_as_int(leaf.y);
  217. const uint type = __float_as_int(leaf.w);
  218. const uint p_type = type & PRIMITIVE_ALL;
  219. /* Pop. */
  220. node_addr = traversal_stack[stack_ptr].addr;
  221. --stack_ptr;
  222. /* Primitive intersection. */
  223. switch (p_type) {
  224. case PRIMITIVE_TRIANGLE: {
  225. for (; prim_addr < prim_addr2; prim_addr++) {
  226. kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
  227. /* Only primitives from volume object. */
  228. uint tri_object = (object == OBJECT_NONE) ?
  229. kernel_tex_fetch(__prim_object, prim_addr) :
  230. object;
  231. int object_flag = kernel_tex_fetch(__object_flag, tri_object);
  232. if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
  233. continue;
  234. }
  235. /* Intersect ray against primitive. */
  236. triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
  237. }
  238. break;
  239. }
  240. #if BVH_FEATURE(BVH_MOTION)
  241. case PRIMITIVE_MOTION_TRIANGLE: {
  242. for (; prim_addr < prim_addr2; prim_addr++) {
  243. kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
  244. /* Only primitives from volume object. */
  245. uint tri_object = (object == OBJECT_NONE) ?
  246. kernel_tex_fetch(__prim_object, prim_addr) :
  247. object;
  248. int object_flag = kernel_tex_fetch(__object_flag, tri_object);
  249. if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
  250. continue;
  251. }
  252. /* Intersect ray against primitive. */
  253. motion_triangle_intersect(
  254. kg, isect, P, dir, ray->time, visibility, object, prim_addr);
  255. }
  256. break;
  257. }
  258. #endif
  259. }
  260. }
  261. #if BVH_FEATURE(BVH_INSTANCING)
  262. else {
  263. /* Instance push. */
  264. object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
  265. int object_flag = kernel_tex_fetch(__object_flag, object);
  266. if (object_flag & SD_OBJECT_HAS_VOLUME) {
  267. # if BVH_FEATURE(BVH_MOTION)
  268. isect->t = bvh_instance_motion_push(
  269. kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
  270. # else
  271. isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
  272. # endif
  273. qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
  274. tfar = ssef(isect->t);
  275. # if BVH_FEATURE(BVH_HAIR)
  276. dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
  277. # endif
  278. idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
  279. # ifdef __KERNEL_AVX2__
  280. P_idir = P * idir;
  281. P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
  282. # endif
  283. # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  284. org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
  285. # endif
  286. ++stack_ptr;
  287. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  288. traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
  289. node_addr = kernel_tex_fetch(__object_node, object);
  290. }
  291. else {
  292. /* Pop. */
  293. object = OBJECT_NONE;
  294. node_addr = traversal_stack[stack_ptr].addr;
  295. --stack_ptr;
  296. }
  297. }
  298. }
  299. #endif /* FEATURE(BVH_INSTANCING) */
  300. } while (node_addr != ENTRYPOINT_SENTINEL);
  301. #if BVH_FEATURE(BVH_INSTANCING)
  302. if (stack_ptr >= 0) {
  303. kernel_assert(object != OBJECT_NONE);
  304. /* Instance pop. */
  305. # if BVH_FEATURE(BVH_MOTION)
  306. isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
  307. # else
  308. isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
  309. # endif
  310. qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
  311. tfar = ssef(isect->t);
  312. # if BVH_FEATURE(BVH_HAIR)
  313. dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
  314. # endif
  315. idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
  316. # ifdef __KERNEL_AVX2__
  317. P_idir = P * idir;
  318. P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
  319. # endif
  320. # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  321. org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
  322. # endif
  323. object = OBJECT_NONE;
  324. node_addr = traversal_stack[stack_ptr].addr;
  325. --stack_ptr;
  326. }
  327. #endif /* FEATURE(BVH_INSTANCING) */
  328. } while (node_addr != ENTRYPOINT_SENTINEL);
  329. return (isect->prim != PRIM_NONE);
  330. }
  331. #undef NODE_INTERSECT