qbvh_volume_all.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. /*
  2. * Copyright 2011-2013 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /* This is a template BVH traversal function for volumes, where
  17. * various features can be enabled/disabled. This way we can compile optimized
  18. * versions for each case without new features slowing things down.
  19. *
  20. * BVH_INSTANCING: object instancing
  21. * BVH_MOTION: motion blur rendering
  22. */
  23. #if BVH_FEATURE(BVH_HAIR)
  24. # define NODE_INTERSECT qbvh_node_intersect
  25. #else
  26. # define NODE_INTERSECT qbvh_aligned_node_intersect
  27. #endif
  28. ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
  29. const Ray *ray,
  30. Intersection *isect_array,
  31. const uint max_hits,
  32. const uint visibility)
  33. {
  34. /* TODO(sergey):
  35. * - Test if pushing distance on the stack helps.
  36. * - Likely and unlikely for if() statements.
  37. * - Test restrict attribute for pointers.
  38. */
  39. /* Traversal stack in CUDA thread-local memory. */
  40. QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
  41. traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
  42. /* Traversal variables in registers. */
  43. int stack_ptr = 0;
  44. int node_addr = kernel_data.bvh.root;
  45. /* Ray parameters in registers. */
  46. const float tmax = ray->t;
  47. float3 P = ray->P;
  48. float3 dir = bvh_clamp_direction(ray->D);
  49. float3 idir = bvh_inverse_direction(dir);
  50. int object = OBJECT_NONE;
  51. float isect_t = tmax;
  52. #if BVH_FEATURE(BVH_MOTION)
  53. Transform ob_itfm;
  54. #endif
  55. uint num_hits = 0;
  56. isect_array->t = tmax;
  57. #if BVH_FEATURE(BVH_INSTANCING)
  58. int num_hits_in_instance = 0;
  59. #endif
  60. ssef tnear(0.0f), tfar(isect_t);
  61. #if BVH_FEATURE(BVH_HAIR)
  62. sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
  63. #endif
  64. sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
  65. #ifdef __KERNEL_AVX2__
  66. float3 P_idir = P * idir;
  67. sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
  68. #endif
  69. #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  70. sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
  71. #endif
  72. /* Offsets to select the side that becomes the lower or upper bound. */
  73. int near_x, near_y, near_z;
  74. int far_x, far_y, far_z;
  75. qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
  76. /* Traversal loop. */
  77. do {
  78. do {
  79. /* Traverse internal nodes. */
  80. while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
  81. float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
  82. #ifdef __VISIBILITY_FLAG__
  83. if ((__float_as_uint(inodes.x) & visibility) == 0) {
  84. /* Pop. */
  85. node_addr = traversal_stack[stack_ptr].addr;
  86. --stack_ptr;
  87. continue;
  88. }
  89. #endif
  90. ssef dist;
  91. int child_mask = NODE_INTERSECT(kg,
  92. tnear,
  93. tfar,
  94. #ifdef __KERNEL_AVX2__
  95. P_idir4,
  96. #endif
  97. #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  98. org4,
  99. #endif
  100. #if BVH_FEATURE(BVH_HAIR)
  101. dir4,
  102. #endif
  103. idir4,
  104. near_x,
  105. near_y,
  106. near_z,
  107. far_x,
  108. far_y,
  109. far_z,
  110. node_addr,
  111. &dist);
  112. if (child_mask != 0) {
  113. float4 cnodes;
  114. #if BVH_FEATURE(BVH_HAIR)
  115. if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
  116. cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
  117. }
  118. else
  119. #endif
  120. {
  121. cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
  122. }
  123. /* One child is hit, continue with that child. */
  124. int r = __bscf(child_mask);
  125. if (child_mask == 0) {
  126. node_addr = __float_as_int(cnodes[r]);
  127. continue;
  128. }
  129. /* Two children are hit, push far child, and continue with
  130. * closer child.
  131. */
  132. int c0 = __float_as_int(cnodes[r]);
  133. float d0 = ((float *)&dist)[r];
  134. r = __bscf(child_mask);
  135. int c1 = __float_as_int(cnodes[r]);
  136. float d1 = ((float *)&dist)[r];
  137. if (child_mask == 0) {
  138. if (d1 < d0) {
  139. node_addr = c1;
  140. ++stack_ptr;
  141. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  142. traversal_stack[stack_ptr].addr = c0;
  143. traversal_stack[stack_ptr].dist = d0;
  144. continue;
  145. }
  146. else {
  147. node_addr = c0;
  148. ++stack_ptr;
  149. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  150. traversal_stack[stack_ptr].addr = c1;
  151. traversal_stack[stack_ptr].dist = d1;
  152. continue;
  153. }
  154. }
  155. /* Here starts the slow path for 3 or 4 hit children. We push
  156. * all nodes onto the stack to sort them there.
  157. */
  158. ++stack_ptr;
  159. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  160. traversal_stack[stack_ptr].addr = c1;
  161. traversal_stack[stack_ptr].dist = d1;
  162. ++stack_ptr;
  163. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  164. traversal_stack[stack_ptr].addr = c0;
  165. traversal_stack[stack_ptr].dist = d0;
  166. /* Three children are hit, push all onto stack and sort 3
  167. * stack items, continue with closest child.
  168. */
  169. r = __bscf(child_mask);
  170. int c2 = __float_as_int(cnodes[r]);
  171. float d2 = ((float *)&dist)[r];
  172. if (child_mask == 0) {
  173. ++stack_ptr;
  174. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  175. traversal_stack[stack_ptr].addr = c2;
  176. traversal_stack[stack_ptr].dist = d2;
  177. qbvh_stack_sort(&traversal_stack[stack_ptr],
  178. &traversal_stack[stack_ptr - 1],
  179. &traversal_stack[stack_ptr - 2]);
  180. node_addr = traversal_stack[stack_ptr].addr;
  181. --stack_ptr;
  182. continue;
  183. }
  184. /* Four children are hit, push all onto stack and sort 4
  185. * stack items, continue with closest child.
  186. */
  187. r = __bscf(child_mask);
  188. int c3 = __float_as_int(cnodes[r]);
  189. float d3 = ((float *)&dist)[r];
  190. ++stack_ptr;
  191. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  192. traversal_stack[stack_ptr].addr = c3;
  193. traversal_stack[stack_ptr].dist = d3;
  194. ++stack_ptr;
  195. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  196. traversal_stack[stack_ptr].addr = c2;
  197. traversal_stack[stack_ptr].dist = d2;
  198. qbvh_stack_sort(&traversal_stack[stack_ptr],
  199. &traversal_stack[stack_ptr - 1],
  200. &traversal_stack[stack_ptr - 2],
  201. &traversal_stack[stack_ptr - 3]);
  202. }
  203. node_addr = traversal_stack[stack_ptr].addr;
  204. --stack_ptr;
  205. }
  206. /* If node is leaf, fetch triangle list. */
  207. if (node_addr < 0) {
  208. float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
  209. if ((__float_as_uint(leaf.z) & visibility) == 0) {
  210. /* Pop. */
  211. node_addr = traversal_stack[stack_ptr].addr;
  212. --stack_ptr;
  213. continue;
  214. }
  215. int prim_addr = __float_as_int(leaf.x);
  216. #if BVH_FEATURE(BVH_INSTANCING)
  217. if (prim_addr >= 0) {
  218. #endif
  219. int prim_addr2 = __float_as_int(leaf.y);
  220. const uint type = __float_as_int(leaf.w);
  221. const uint p_type = type & PRIMITIVE_ALL;
  222. bool hit;
  223. /* Pop. */
  224. node_addr = traversal_stack[stack_ptr].addr;
  225. --stack_ptr;
  226. /* Primitive intersection. */
  227. switch (p_type) {
  228. case PRIMITIVE_TRIANGLE: {
  229. for (; prim_addr < prim_addr2; prim_addr++) {
  230. kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
  231. /* Only primitives from volume object. */
  232. uint tri_object = (object == OBJECT_NONE) ?
  233. kernel_tex_fetch(__prim_object, prim_addr) :
  234. object;
  235. int object_flag = kernel_tex_fetch(__object_flag, tri_object);
  236. if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
  237. continue;
  238. }
  239. /* Intersect ray against primitive. */
  240. hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
  241. if (hit) {
  242. /* Move on to next entry in intersections array. */
  243. isect_array++;
  244. num_hits++;
  245. #if BVH_FEATURE(BVH_INSTANCING)
  246. num_hits_in_instance++;
  247. #endif
  248. isect_array->t = isect_t;
  249. if (num_hits == max_hits) {
  250. #if BVH_FEATURE(BVH_INSTANCING)
  251. if (object != OBJECT_NONE) {
  252. # if BVH_FEATURE(BVH_MOTION)
  253. float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
  254. # else
  255. Transform itfm = object_fetch_transform(
  256. kg, object, OBJECT_INVERSE_TRANSFORM);
  257. float t_fac = 1.0f / len(transform_direction(&itfm, dir));
  258. # endif
  259. for (int i = 0; i < num_hits_in_instance; i++) {
  260. (isect_array - i - 1)->t *= t_fac;
  261. }
  262. }
  263. #endif /* BVH_FEATURE(BVH_INSTANCING) */
  264. return num_hits;
  265. }
  266. }
  267. }
  268. break;
  269. }
  270. #if BVH_FEATURE(BVH_MOTION)
  271. case PRIMITIVE_MOTION_TRIANGLE: {
  272. for (; prim_addr < prim_addr2; prim_addr++) {
  273. kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
  274. /* Only primitives from volume object. */
  275. uint tri_object = (object == OBJECT_NONE) ?
  276. kernel_tex_fetch(__prim_object, prim_addr) :
  277. object;
  278. int object_flag = kernel_tex_fetch(__object_flag, tri_object);
  279. if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
  280. continue;
  281. }
  282. /* Intersect ray against primitive. */
  283. hit = motion_triangle_intersect(
  284. kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
  285. if (hit) {
  286. /* Move on to next entry in intersections array. */
  287. isect_array++;
  288. num_hits++;
  289. # if BVH_FEATURE(BVH_INSTANCING)
  290. num_hits_in_instance++;
  291. # endif
  292. isect_array->t = isect_t;
  293. if (num_hits == max_hits) {
  294. # if BVH_FEATURE(BVH_INSTANCING)
  295. if (object != OBJECT_NONE) {
  296. # if BVH_FEATURE(BVH_MOTION)
  297. float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
  298. # else
  299. Transform itfm = object_fetch_transform(
  300. kg, object, OBJECT_INVERSE_TRANSFORM);
  301. float t_fac = 1.0f / len(transform_direction(&itfm, dir));
  302. # endif
  303. for (int i = 0; i < num_hits_in_instance; i++) {
  304. (isect_array - i - 1)->t *= t_fac;
  305. }
  306. }
  307. # endif /* BVH_FEATURE(BVH_INSTANCING) */
  308. return num_hits;
  309. }
  310. }
  311. }
  312. break;
  313. }
  314. #endif
  315. }
  316. }
  317. #if BVH_FEATURE(BVH_INSTANCING)
  318. else {
  319. /* Instance push. */
  320. object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
  321. int object_flag = kernel_tex_fetch(__object_flag, object);
  322. if (object_flag & SD_OBJECT_HAS_VOLUME) {
  323. # if BVH_FEATURE(BVH_MOTION)
  324. isect_t = bvh_instance_motion_push(
  325. kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
  326. # else
  327. isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
  328. # endif
  329. qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
  330. tfar = ssef(isect_t);
  331. idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
  332. # if BVH_FEATURE(BVH_HAIR)
  333. dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
  334. # endif
  335. # ifdef __KERNEL_AVX2__
  336. P_idir = P * idir;
  337. P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
  338. # endif
  339. # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  340. org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
  341. # endif
  342. num_hits_in_instance = 0;
  343. isect_array->t = isect_t;
  344. ++stack_ptr;
  345. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  346. traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
  347. node_addr = kernel_tex_fetch(__object_node, object);
  348. }
  349. else {
  350. /* Pop. */
  351. object = OBJECT_NONE;
  352. node_addr = traversal_stack[stack_ptr].addr;
  353. --stack_ptr;
  354. }
  355. }
  356. }
  357. #endif /* FEATURE(BVH_INSTANCING) */
  358. } while (node_addr != ENTRYPOINT_SENTINEL);
  359. #if BVH_FEATURE(BVH_INSTANCING)
  360. if (stack_ptr >= 0) {
  361. kernel_assert(object != OBJECT_NONE);
  362. /* Instance pop. */
  363. if (num_hits_in_instance) {
  364. float t_fac;
  365. # if BVH_FEATURE(BVH_MOTION)
  366. bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
  367. # else
  368. bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
  369. # endif
  370. /* Scale isect->t to adjust for instancing. */
  371. for (int i = 0; i < num_hits_in_instance; i++) {
  372. (isect_array - i - 1)->t *= t_fac;
  373. }
  374. }
  375. else {
  376. # if BVH_FEATURE(BVH_MOTION)
  377. bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
  378. # else
  379. bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
  380. # endif
  381. }
  382. isect_t = tmax;
  383. isect_array->t = isect_t;
  384. qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
  385. tfar = ssef(isect_t);
  386. # if BVH_FEATURE(BVH_HAIR)
  387. dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
  388. # endif
  389. idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
  390. # ifdef __KERNEL_AVX2__
  391. P_idir = P * idir;
  392. P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
  393. # endif
  394. # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  395. org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
  396. # endif
  397. object = OBJECT_NONE;
  398. node_addr = traversal_stack[stack_ptr].addr;
  399. --stack_ptr;
  400. }
  401. #endif /* FEATURE(BVH_INSTANCING) */
  402. } while (node_addr != ENTRYPOINT_SENTINEL);
  403. return num_hits;
  404. }
  405. #undef NODE_INTERSECT