obvh_local.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. /*
  2. * Copyright 2011-2013 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /* This is a template BVH traversal function for subsurface scattering, where
  17. * various features can be enabled/disabled. This way we can compile optimized
  18. * versions for each case without new features slowing things down.
  19. *
  20. * BVH_MOTION: motion blur rendering
  21. */
  22. #if BVH_FEATURE(BVH_HAIR)
  23. # define NODE_INTERSECT obvh_node_intersect
  24. #else
  25. # define NODE_INTERSECT obvh_aligned_node_intersect
  26. #endif
  27. ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
  28. const Ray *ray,
  29. LocalIntersection *local_isect,
  30. int local_object,
  31. uint *lcg_state,
  32. int max_hits)
  33. {
  34. /* Traversal stack in CUDA thread-local memory. */
  35. OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
  36. traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
  37. /* Traversal variables in registers. */
  38. int stack_ptr = 0;
  39. int node_addr = kernel_tex_fetch(__object_node, local_object);
  40. /* Ray parameters in registers. */
  41. float3 P = ray->P;
  42. float3 dir = bvh_clamp_direction(ray->D);
  43. float3 idir = bvh_inverse_direction(dir);
  44. int object = OBJECT_NONE;
  45. float isect_t = ray->t;
  46. if (local_isect != NULL) {
  47. local_isect->num_hits = 0;
  48. }
  49. kernel_assert((local_isect == NULL) == (max_hits == 0));
  50. const int object_flag = kernel_tex_fetch(__object_flag, local_object);
  51. if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
  52. #if BVH_FEATURE(BVH_MOTION)
  53. Transform ob_itfm;
  54. isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
  55. #else
  56. isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
  57. #endif
  58. object = local_object;
  59. }
  60. avxf tnear(0.0f), tfar(isect_t);
  61. #if BVH_FEATURE(BVH_HAIR)
  62. avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
  63. #endif
  64. avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
  65. #ifdef __KERNEL_AVX2__
  66. float3 P_idir = P * idir;
  67. avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
  68. #endif
  69. #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  70. avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
  71. #endif
  72. /* Offsets to select the side that becomes the lower or upper bound. */
  73. int near_x, near_y, near_z;
  74. int far_x, far_y, far_z;
  75. obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
  76. /* Traversal loop. */
  77. do {
  78. do {
  79. /* Traverse internal nodes. */
  80. while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
  81. avxf dist;
  82. int child_mask = NODE_INTERSECT(kg,
  83. tnear,
  84. tfar,
  85. #ifdef __KERNEL_AVX2__
  86. P_idir4,
  87. #endif
  88. #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  89. org4,
  90. #endif
  91. #if BVH_FEATURE(BVH_HAIR)
  92. dir4,
  93. #endif
  94. idir4,
  95. near_x,
  96. near_y,
  97. near_z,
  98. far_x,
  99. far_y,
  100. far_z,
  101. node_addr,
  102. &dist);
  103. if (child_mask != 0) {
  104. float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
  105. avxf cnodes;
  106. #if BVH_FEATURE(BVH_HAIR)
  107. if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
  108. cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
  109. }
  110. else
  111. #endif
  112. {
  113. cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
  114. }
  115. /* One child is hit, continue with that child. */
  116. int r = __bscf(child_mask);
  117. if (child_mask == 0) {
  118. node_addr = __float_as_int(cnodes[r]);
  119. continue;
  120. }
  121. /* Two children are hit, push far child, and continue with
  122. * closer child.
  123. */
  124. int c0 = __float_as_int(cnodes[r]);
  125. float d0 = ((float *)&dist)[r];
  126. r = __bscf(child_mask);
  127. int c1 = __float_as_int(cnodes[r]);
  128. float d1 = ((float *)&dist)[r];
  129. if (child_mask == 0) {
  130. if (d1 < d0) {
  131. node_addr = c1;
  132. ++stack_ptr;
  133. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  134. traversal_stack[stack_ptr].addr = c0;
  135. traversal_stack[stack_ptr].dist = d0;
  136. continue;
  137. }
  138. else {
  139. node_addr = c0;
  140. ++stack_ptr;
  141. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  142. traversal_stack[stack_ptr].addr = c1;
  143. traversal_stack[stack_ptr].dist = d1;
  144. continue;
  145. }
  146. }
  147. /* Here starts the slow path for 3 or 4 hit children. We push
  148. * all nodes onto the stack to sort them there.
  149. */
  150. ++stack_ptr;
  151. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  152. traversal_stack[stack_ptr].addr = c1;
  153. traversal_stack[stack_ptr].dist = d1;
  154. ++stack_ptr;
  155. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  156. traversal_stack[stack_ptr].addr = c0;
  157. traversal_stack[stack_ptr].dist = d0;
  158. /* Three children are hit, push all onto stack and sort 3
  159. * stack items, continue with closest child.
  160. */
  161. r = __bscf(child_mask);
  162. int c2 = __float_as_int(cnodes[r]);
  163. float d2 = ((float *)&dist)[r];
  164. if (child_mask == 0) {
  165. ++stack_ptr;
  166. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  167. traversal_stack[stack_ptr].addr = c2;
  168. traversal_stack[stack_ptr].dist = d2;
  169. obvh_stack_sort(&traversal_stack[stack_ptr],
  170. &traversal_stack[stack_ptr - 1],
  171. &traversal_stack[stack_ptr - 2]);
  172. node_addr = traversal_stack[stack_ptr].addr;
  173. --stack_ptr;
  174. continue;
  175. }
  176. /* Four children are hit, push all onto stack and sort 4
  177. * stack items, continue with closest child.
  178. */
  179. r = __bscf(child_mask);
  180. int c3 = __float_as_int(cnodes[r]);
  181. float d3 = ((float *)&dist)[r];
  182. if (child_mask == 0) {
  183. ++stack_ptr;
  184. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  185. traversal_stack[stack_ptr].addr = c3;
  186. traversal_stack[stack_ptr].dist = d3;
  187. ++stack_ptr;
  188. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  189. traversal_stack[stack_ptr].addr = c2;
  190. traversal_stack[stack_ptr].dist = d2;
  191. obvh_stack_sort(&traversal_stack[stack_ptr],
  192. &traversal_stack[stack_ptr - 1],
  193. &traversal_stack[stack_ptr - 2],
  194. &traversal_stack[stack_ptr - 3]);
  195. node_addr = traversal_stack[stack_ptr].addr;
  196. --stack_ptr;
  197. continue;
  198. }
  199. ++stack_ptr;
  200. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  201. traversal_stack[stack_ptr].addr = c3;
  202. traversal_stack[stack_ptr].dist = d3;
  203. ++stack_ptr;
  204. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  205. traversal_stack[stack_ptr].addr = c2;
  206. traversal_stack[stack_ptr].dist = d2;
  207. /* Five children are hit, push all onto stack and sort 5
  208. * stack items, continue with closest child
  209. */
  210. r = __bscf(child_mask);
  211. int c4 = __float_as_int(cnodes[r]);
  212. float d4 = ((float *)&dist)[r];
  213. if (child_mask == 0) {
  214. ++stack_ptr;
  215. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  216. traversal_stack[stack_ptr].addr = c4;
  217. traversal_stack[stack_ptr].dist = d4;
  218. obvh_stack_sort(&traversal_stack[stack_ptr],
  219. &traversal_stack[stack_ptr - 1],
  220. &traversal_stack[stack_ptr - 2],
  221. &traversal_stack[stack_ptr - 3],
  222. &traversal_stack[stack_ptr - 4]);
  223. node_addr = traversal_stack[stack_ptr].addr;
  224. --stack_ptr;
  225. continue;
  226. }
  227. /* Six children are hit, push all onto stack and sort 6
  228. * stack items, continue with closest child.
  229. */
  230. r = __bscf(child_mask);
  231. int c5 = __float_as_int(cnodes[r]);
  232. float d5 = ((float *)&dist)[r];
  233. if (child_mask == 0) {
  234. ++stack_ptr;
  235. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  236. traversal_stack[stack_ptr].addr = c5;
  237. traversal_stack[stack_ptr].dist = d5;
  238. ++stack_ptr;
  239. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  240. traversal_stack[stack_ptr].addr = c4;
  241. traversal_stack[stack_ptr].dist = d4;
  242. obvh_stack_sort(&traversal_stack[stack_ptr],
  243. &traversal_stack[stack_ptr - 1],
  244. &traversal_stack[stack_ptr - 2],
  245. &traversal_stack[stack_ptr - 3],
  246. &traversal_stack[stack_ptr - 4],
  247. &traversal_stack[stack_ptr - 5]);
  248. node_addr = traversal_stack[stack_ptr].addr;
  249. --stack_ptr;
  250. continue;
  251. }
  252. ++stack_ptr;
  253. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  254. traversal_stack[stack_ptr].addr = c5;
  255. traversal_stack[stack_ptr].dist = d5;
  256. ++stack_ptr;
  257. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  258. traversal_stack[stack_ptr].addr = c4;
  259. traversal_stack[stack_ptr].dist = d4;
  260. /* Seven children are hit, push all onto stack and sort 7
  261. * stack items, continue with closest child.
  262. */
  263. r = __bscf(child_mask);
  264. int c6 = __float_as_int(cnodes[r]);
  265. float d6 = ((float *)&dist)[r];
  266. if (child_mask == 0) {
  267. ++stack_ptr;
  268. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  269. traversal_stack[stack_ptr].addr = c6;
  270. traversal_stack[stack_ptr].dist = d6;
  271. obvh_stack_sort(&traversal_stack[stack_ptr],
  272. &traversal_stack[stack_ptr - 1],
  273. &traversal_stack[stack_ptr - 2],
  274. &traversal_stack[stack_ptr - 3],
  275. &traversal_stack[stack_ptr - 4],
  276. &traversal_stack[stack_ptr - 5],
  277. &traversal_stack[stack_ptr - 6]);
  278. node_addr = traversal_stack[stack_ptr].addr;
  279. --stack_ptr;
  280. continue;
  281. }
  282. /* Eight children are hit, push all onto stack and sort 8
  283. * stack items, continue with closest child.
  284. */
  285. r = __bscf(child_mask);
  286. int c7 = __float_as_int(cnodes[r]);
  287. float d7 = ((float *)&dist)[r];
  288. ++stack_ptr;
  289. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  290. traversal_stack[stack_ptr].addr = c7;
  291. traversal_stack[stack_ptr].dist = d7;
  292. ++stack_ptr;
  293. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  294. traversal_stack[stack_ptr].addr = c6;
  295. traversal_stack[stack_ptr].dist = d6;
  296. obvh_stack_sort(&traversal_stack[stack_ptr],
  297. &traversal_stack[stack_ptr - 1],
  298. &traversal_stack[stack_ptr - 2],
  299. &traversal_stack[stack_ptr - 3],
  300. &traversal_stack[stack_ptr - 4],
  301. &traversal_stack[stack_ptr - 5],
  302. &traversal_stack[stack_ptr - 6],
  303. &traversal_stack[stack_ptr - 7]);
  304. node_addr = traversal_stack[stack_ptr].addr;
  305. --stack_ptr;
  306. continue;
  307. }
  308. node_addr = traversal_stack[stack_ptr].addr;
  309. --stack_ptr;
  310. }
  311. /* If node is leaf, fetch triangle list. */
  312. if (node_addr < 0) {
  313. float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
  314. int prim_addr = __float_as_int(leaf.x);
  315. int prim_addr2 = __float_as_int(leaf.y);
  316. const uint type = __float_as_int(leaf.w);
  317. /* Pop. */
  318. node_addr = traversal_stack[stack_ptr].addr;
  319. --stack_ptr;
  320. /* Primitive intersection. */
  321. switch (type & PRIMITIVE_ALL) {
  322. case PRIMITIVE_TRIANGLE: {
  323. /* Intersect ray against primitive, */
  324. for (; prim_addr < prim_addr2; prim_addr++) {
  325. kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
  326. if (triangle_intersect_local(kg,
  327. local_isect,
  328. P,
  329. dir,
  330. object,
  331. local_object,
  332. prim_addr,
  333. isect_t,
  334. lcg_state,
  335. max_hits)) {
  336. return true;
  337. }
  338. }
  339. break;
  340. }
  341. #if BVH_FEATURE(BVH_MOTION)
  342. case PRIMITIVE_MOTION_TRIANGLE: {
  343. /* Intersect ray against primitive. */
  344. for (; prim_addr < prim_addr2; prim_addr++) {
  345. kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
  346. if (motion_triangle_intersect_local(kg,
  347. local_isect,
  348. P,
  349. dir,
  350. ray->time,
  351. object,
  352. local_object,
  353. prim_addr,
  354. isect_t,
  355. lcg_state,
  356. max_hits)) {
  357. return true;
  358. }
  359. }
  360. break;
  361. }
  362. #endif
  363. default:
  364. break;
  365. }
  366. }
  367. } while (node_addr != ENTRYPOINT_SENTINEL);
  368. } while (node_addr != ENTRYPOINT_SENTINEL);
  369. return false;
  370. }
  371. #undef NODE_INTERSECT