obvh_volume.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. /*
  2. * Copyright 2011-2013 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /* This is a template BVH traversal function for volumes, where
  17. * various features can be enabled/disabled. This way we can compile optimized
  18. * versions for each case without new features slowing things down.
  19. *
  20. * BVH_INSTANCING: object instancing
  21. * BVH_MOTION: motion blur rendering
  22. */
  23. #if BVH_FEATURE(BVH_HAIR)
  24. # define NODE_INTERSECT obvh_node_intersect
  25. #else
  26. # define NODE_INTERSECT obvh_aligned_node_intersect
  27. #endif
  28. ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
  29. const Ray *ray,
  30. Intersection *isect,
  31. const uint visibility)
  32. {
  33. /* Traversal stack in CUDA thread-local memory. */
  34. OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
  35. traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
  36. /* Traversal variables in registers. */
  37. int stack_ptr = 0;
  38. int node_addr = kernel_data.bvh.root;
  39. /* Ray parameters in registers. */
  40. float3 P = ray->P;
  41. float3 dir = bvh_clamp_direction(ray->D);
  42. float3 idir = bvh_inverse_direction(dir);
  43. int object = OBJECT_NONE;
  44. #if BVH_FEATURE(BVH_MOTION)
  45. Transform ob_itfm;
  46. #endif
  47. isect->t = ray->t;
  48. isect->u = 0.0f;
  49. isect->v = 0.0f;
  50. isect->prim = PRIM_NONE;
  51. isect->object = OBJECT_NONE;
  52. avxf tnear(0.0f), tfar(ray->t);
  53. #if BVH_FEATURE(BVH_HAIR)
  54. avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
  55. #endif
  56. avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
  57. #ifdef __KERNEL_AVX2__
  58. float3 P_idir = P * idir;
  59. avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
  60. #endif
  61. #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  62. avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
  63. #endif
  64. /* Offsets to select the side that becomes the lower or upper bound. */
  65. int near_x, near_y, near_z;
  66. int far_x, far_y, far_z;
  67. obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
  68. /* Traversal loop. */
  69. do {
  70. do {
  71. /* Traverse internal nodes. */
  72. while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
  73. float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
  74. #ifdef __VISIBILITY_FLAG__
  75. if ((__float_as_uint(inodes.x) & visibility) == 0) {
  76. /* Pop. */
  77. node_addr = traversal_stack[stack_ptr].addr;
  78. --stack_ptr;
  79. continue;
  80. }
  81. #endif
  82. avxf dist;
  83. int child_mask = NODE_INTERSECT(kg,
  84. tnear,
  85. tfar,
  86. #ifdef __KERNEL_AVX2__
  87. P_idir4,
  88. #endif
  89. #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  90. org4,
  91. #endif
  92. #if BVH_FEATURE(BVH_HAIR)
  93. dir4,
  94. #endif
  95. idir4,
  96. near_x,
  97. near_y,
  98. near_z,
  99. far_x,
  100. far_y,
  101. far_z,
  102. node_addr,
  103. &dist);
  104. if (child_mask != 0) {
  105. avxf cnodes;
  106. #if BVH_FEATURE(BVH_HAIR)
  107. if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
  108. cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
  109. }
  110. else
  111. #endif
  112. {
  113. cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
  114. }
  115. /* One child is hit, continue with that child. */
  116. int r = __bscf(child_mask);
  117. if (child_mask == 0) {
  118. node_addr = __float_as_int(cnodes[r]);
  119. continue;
  120. }
  121. /* Two children are hit, push far child, and continue with
  122. * closer child.
  123. */
  124. int c0 = __float_as_int(cnodes[r]);
  125. float d0 = ((float *)&dist)[r];
  126. r = __bscf(child_mask);
  127. int c1 = __float_as_int(cnodes[r]);
  128. float d1 = ((float *)&dist)[r];
  129. if (child_mask == 0) {
  130. if (d1 < d0) {
  131. node_addr = c1;
  132. ++stack_ptr;
  133. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  134. traversal_stack[stack_ptr].addr = c0;
  135. traversal_stack[stack_ptr].dist = d0;
  136. continue;
  137. }
  138. else {
  139. node_addr = c0;
  140. ++stack_ptr;
  141. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  142. traversal_stack[stack_ptr].addr = c1;
  143. traversal_stack[stack_ptr].dist = d1;
  144. continue;
  145. }
  146. }
  147. /* Here starts the slow path for 3 or 4 hit children. We push
  148. * all nodes onto the stack to sort them there.
  149. */
  150. ++stack_ptr;
  151. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  152. traversal_stack[stack_ptr].addr = c1;
  153. traversal_stack[stack_ptr].dist = d1;
  154. ++stack_ptr;
  155. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  156. traversal_stack[stack_ptr].addr = c0;
  157. traversal_stack[stack_ptr].dist = d0;
  158. /* Three children are hit, push all onto stack and sort 3
  159. * stack items, continue with closest child.
  160. */
  161. r = __bscf(child_mask);
  162. int c2 = __float_as_int(cnodes[r]);
  163. float d2 = ((float *)&dist)[r];
  164. if (child_mask == 0) {
  165. ++stack_ptr;
  166. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  167. traversal_stack[stack_ptr].addr = c2;
  168. traversal_stack[stack_ptr].dist = d2;
  169. obvh_stack_sort(&traversal_stack[stack_ptr],
  170. &traversal_stack[stack_ptr - 1],
  171. &traversal_stack[stack_ptr - 2]);
  172. node_addr = traversal_stack[stack_ptr].addr;
  173. --stack_ptr;
  174. continue;
  175. }
  176. /* Four children are hit, push all onto stack and sort 4
  177. * stack items, continue with closest child.
  178. */
  179. r = __bscf(child_mask);
  180. int c3 = __float_as_int(cnodes[r]);
  181. float d3 = ((float *)&dist)[r];
  182. if (child_mask == 0) {
  183. ++stack_ptr;
  184. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  185. traversal_stack[stack_ptr].addr = c3;
  186. traversal_stack[stack_ptr].dist = d3;
  187. ++stack_ptr;
  188. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  189. traversal_stack[stack_ptr].addr = c2;
  190. traversal_stack[stack_ptr].dist = d2;
  191. obvh_stack_sort(&traversal_stack[stack_ptr],
  192. &traversal_stack[stack_ptr - 1],
  193. &traversal_stack[stack_ptr - 2],
  194. &traversal_stack[stack_ptr - 3]);
  195. node_addr = traversal_stack[stack_ptr].addr;
  196. --stack_ptr;
  197. continue;
  198. }
  199. ++stack_ptr;
  200. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  201. traversal_stack[stack_ptr].addr = c3;
  202. traversal_stack[stack_ptr].dist = d3;
  203. ++stack_ptr;
  204. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  205. traversal_stack[stack_ptr].addr = c2;
  206. traversal_stack[stack_ptr].dist = d2;
  207. /* Five children are hit, push all onto stack and sort 5
  208. * stack items, continue with closest child
  209. */
  210. r = __bscf(child_mask);
  211. int c4 = __float_as_int(cnodes[r]);
  212. float d4 = ((float *)&dist)[r];
  213. if (child_mask == 0) {
  214. ++stack_ptr;
  215. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  216. traversal_stack[stack_ptr].addr = c4;
  217. traversal_stack[stack_ptr].dist = d4;
  218. obvh_stack_sort(&traversal_stack[stack_ptr],
  219. &traversal_stack[stack_ptr - 1],
  220. &traversal_stack[stack_ptr - 2],
  221. &traversal_stack[stack_ptr - 3],
  222. &traversal_stack[stack_ptr - 4]);
  223. node_addr = traversal_stack[stack_ptr].addr;
  224. --stack_ptr;
  225. continue;
  226. }
  227. /* Six children are hit, push all onto stack and sort 6
  228. * stack items, continue with closest child.
  229. */
  230. r = __bscf(child_mask);
  231. int c5 = __float_as_int(cnodes[r]);
  232. float d5 = ((float *)&dist)[r];
  233. if (child_mask == 0) {
  234. ++stack_ptr;
  235. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  236. traversal_stack[stack_ptr].addr = c5;
  237. traversal_stack[stack_ptr].dist = d5;
  238. ++stack_ptr;
  239. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  240. traversal_stack[stack_ptr].addr = c4;
  241. traversal_stack[stack_ptr].dist = d4;
  242. obvh_stack_sort(&traversal_stack[stack_ptr],
  243. &traversal_stack[stack_ptr - 1],
  244. &traversal_stack[stack_ptr - 2],
  245. &traversal_stack[stack_ptr - 3],
  246. &traversal_stack[stack_ptr - 4],
  247. &traversal_stack[stack_ptr - 5]);
  248. node_addr = traversal_stack[stack_ptr].addr;
  249. --stack_ptr;
  250. continue;
  251. }
  252. ++stack_ptr;
  253. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  254. traversal_stack[stack_ptr].addr = c5;
  255. traversal_stack[stack_ptr].dist = d5;
  256. ++stack_ptr;
  257. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  258. traversal_stack[stack_ptr].addr = c4;
  259. traversal_stack[stack_ptr].dist = d4;
  260. /* Seven children are hit, push all onto stack and sort 7
  261. * stack items, continue with closest child.
  262. */
  263. r = __bscf(child_mask);
  264. int c6 = __float_as_int(cnodes[r]);
  265. float d6 = ((float *)&dist)[r];
  266. if (child_mask == 0) {
  267. ++stack_ptr;
  268. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  269. traversal_stack[stack_ptr].addr = c6;
  270. traversal_stack[stack_ptr].dist = d6;
  271. obvh_stack_sort(&traversal_stack[stack_ptr],
  272. &traversal_stack[stack_ptr - 1],
  273. &traversal_stack[stack_ptr - 2],
  274. &traversal_stack[stack_ptr - 3],
  275. &traversal_stack[stack_ptr - 4],
  276. &traversal_stack[stack_ptr - 5],
  277. &traversal_stack[stack_ptr - 6]);
  278. node_addr = traversal_stack[stack_ptr].addr;
  279. --stack_ptr;
  280. continue;
  281. }
  282. /* Eight children are hit, push all onto stack and sort 8
  283. * stack items, continue with closest child.
  284. */
  285. r = __bscf(child_mask);
  286. int c7 = __float_as_int(cnodes[r]);
  287. float d7 = ((float *)&dist)[r];
  288. ++stack_ptr;
  289. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  290. traversal_stack[stack_ptr].addr = c7;
  291. traversal_stack[stack_ptr].dist = d7;
  292. ++stack_ptr;
  293. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  294. traversal_stack[stack_ptr].addr = c6;
  295. traversal_stack[stack_ptr].dist = d6;
  296. obvh_stack_sort(&traversal_stack[stack_ptr],
  297. &traversal_stack[stack_ptr - 1],
  298. &traversal_stack[stack_ptr - 2],
  299. &traversal_stack[stack_ptr - 3],
  300. &traversal_stack[stack_ptr - 4],
  301. &traversal_stack[stack_ptr - 5],
  302. &traversal_stack[stack_ptr - 6],
  303. &traversal_stack[stack_ptr - 7]);
  304. node_addr = traversal_stack[stack_ptr].addr;
  305. --stack_ptr;
  306. continue;
  307. }
  308. node_addr = traversal_stack[stack_ptr].addr;
  309. --stack_ptr;
  310. }
  311. /* If node is leaf, fetch triangle list. */
  312. if (node_addr < 0) {
  313. float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
  314. if ((__float_as_uint(leaf.z) & visibility) == 0) {
  315. /* Pop. */
  316. node_addr = traversal_stack[stack_ptr].addr;
  317. --stack_ptr;
  318. continue;
  319. }
  320. int prim_addr = __float_as_int(leaf.x);
  321. #if BVH_FEATURE(BVH_INSTANCING)
  322. if (prim_addr >= 0) {
  323. #endif
  324. int prim_addr2 = __float_as_int(leaf.y);
  325. const uint type = __float_as_int(leaf.w);
  326. const uint p_type = type & PRIMITIVE_ALL;
  327. /* Pop. */
  328. node_addr = traversal_stack[stack_ptr].addr;
  329. --stack_ptr;
  330. /* Primitive intersection. */
  331. switch (p_type) {
  332. case PRIMITIVE_TRIANGLE: {
  333. for (; prim_addr < prim_addr2; prim_addr++) {
  334. kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
  335. /* Only primitives from volume object. */
  336. uint tri_object = (object == OBJECT_NONE) ?
  337. kernel_tex_fetch(__prim_object, prim_addr) :
  338. object;
  339. int object_flag = kernel_tex_fetch(__object_flag, tri_object);
  340. if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
  341. continue;
  342. }
  343. /* Intersect ray against primitive. */
  344. triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
  345. }
  346. break;
  347. }
  348. #if BVH_FEATURE(BVH_MOTION)
  349. case PRIMITIVE_MOTION_TRIANGLE: {
  350. for (; prim_addr < prim_addr2; prim_addr++) {
  351. kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
  352. /* Only primitives from volume object. */
  353. uint tri_object = (object == OBJECT_NONE) ?
  354. kernel_tex_fetch(__prim_object, prim_addr) :
  355. object;
  356. int object_flag = kernel_tex_fetch(__object_flag, tri_object);
  357. if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
  358. continue;
  359. }
  360. /* Intersect ray against primitive. */
  361. motion_triangle_intersect(
  362. kg, isect, P, dir, ray->time, visibility, object, prim_addr);
  363. }
  364. break;
  365. }
  366. #endif
  367. }
  368. }
  369. #if BVH_FEATURE(BVH_INSTANCING)
  370. else {
  371. /* Instance push. */
  372. object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
  373. int object_flag = kernel_tex_fetch(__object_flag, object);
  374. if (object_flag & SD_OBJECT_HAS_VOLUME) {
  375. # if BVH_FEATURE(BVH_MOTION)
  376. isect->t = bvh_instance_motion_push(
  377. kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
  378. # else
  379. isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
  380. # endif
  381. obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
  382. tfar = avxf(isect->t);
  383. # if BVH_FEATURE(BVH_HAIR)
  384. dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
  385. # endif
  386. idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
  387. # ifdef __KERNEL_AVX2__
  388. P_idir = P * idir;
  389. P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
  390. # endif
  391. # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  392. org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
  393. # endif
  394. ++stack_ptr;
  395. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  396. traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
  397. node_addr = kernel_tex_fetch(__object_node, object);
  398. }
  399. else {
  400. /* Pop. */
  401. object = OBJECT_NONE;
  402. node_addr = traversal_stack[stack_ptr].addr;
  403. --stack_ptr;
  404. }
  405. }
  406. }
  407. #endif /* FEATURE(BVH_INSTANCING) */
  408. } while (node_addr != ENTRYPOINT_SENTINEL);
  409. #if BVH_FEATURE(BVH_INSTANCING)
  410. if (stack_ptr >= 0) {
  411. kernel_assert(object != OBJECT_NONE);
  412. /* Instance pop. */
  413. # if BVH_FEATURE(BVH_MOTION)
  414. isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
  415. # else
  416. isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
  417. # endif
  418. obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
  419. tfar = avxf(isect->t);
  420. # if BVH_FEATURE(BVH_HAIR)
  421. dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
  422. # endif
  423. idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
  424. # ifdef __KERNEL_AVX2__
  425. P_idir = P * idir;
  426. P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
  427. # endif
  428. # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  429. org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
  430. # endif
  431. object = OBJECT_NONE;
  432. node_addr = traversal_stack[stack_ptr].addr;
  433. --stack_ptr;
  434. }
  435. #endif /* FEATURE(BVH_INSTANCING) */
  436. } while (node_addr != ENTRYPOINT_SENTINEL);
  437. return (isect->prim != PRIM_NONE);
  438. }
  439. #undef NODE_INTERSECT