obvh_shadow_all.h 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665
  1. /*
  2. * Copyright 2011-2013 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /* This is a template BVH traversal function, where various features can be
  17. * enabled/disabled. This way we can compile optimized versions for each case
  18. * without new features slowing things down.
  19. *
  20. * BVH_INSTANCING: object instancing
  21. * BVH_HAIR: hair curve rendering
  22. * BVH_MOTION: motion blur rendering
  23. */
  24. #if BVH_FEATURE(BVH_HAIR)
  25. # define NODE_INTERSECT obvh_node_intersect
  26. #else
  27. # define NODE_INTERSECT obvh_aligned_node_intersect
  28. #endif
  29. ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
  30. const Ray *ray,
  31. Intersection *isect_array,
  32. const int skip_object,
  33. const uint max_hits,
  34. uint *num_hits)
  35. {
  36. /* TODO(sergey):
  37. * - Test if pushing distance on the stack helps.
  38. * - Likely and unlikely for if() statements.
  39. * - Test restrict attribute for pointers.
  40. */
  41. /* Traversal stack in CUDA thread-local memory. */
  42. OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
  43. traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
  44. /* Traversal variables in registers. */
  45. int stack_ptr = 0;
  46. int node_addr = kernel_data.bvh.root;
  47. /* Ray parameters in registers. */
  48. const float tmax = ray->t;
  49. float3 P = ray->P;
  50. float3 dir = bvh_clamp_direction(ray->D);
  51. float3 idir = bvh_inverse_direction(dir);
  52. int object = OBJECT_NONE;
  53. float isect_t = tmax;
  54. #if BVH_FEATURE(BVH_MOTION)
  55. Transform ob_itfm;
  56. #endif
  57. *num_hits = 0;
  58. isect_array->t = tmax;
  59. #if BVH_FEATURE(BVH_INSTANCING)
  60. int num_hits_in_instance = 0;
  61. #endif
  62. avxf tnear(0.0f), tfar(isect_t);
  63. #if BVH_FEATURE(BVH_HAIR)
  64. avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
  65. #endif
  66. avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
  67. #ifdef __KERNEL_AVX2__
  68. float3 P_idir = P * idir;
  69. avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
  70. #endif
  71. #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  72. avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
  73. #endif
  74. /* Offsets to select the side that becomes the lower or upper bound. */
  75. int near_x, near_y, near_z;
  76. int far_x, far_y, far_z;
  77. obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
  78. /* Traversal loop. */
  79. do {
  80. do {
  81. /* Traverse internal nodes. */
  82. while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
  83. float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
  84. (void)inodes;
  85. if (false
  86. #ifdef __VISIBILITY_FLAG__
  87. || ((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0)
  88. #endif
  89. #if BVH_FEATURE(BVH_MOTION)
  90. || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
  91. #endif
  92. ) {
  93. /* Pop. */
  94. node_addr = traversal_stack[stack_ptr].addr;
  95. --stack_ptr;
  96. continue;
  97. }
  98. avxf dist;
  99. int child_mask = NODE_INTERSECT(kg,
  100. tnear,
  101. tfar,
  102. #ifdef __KERNEL_AVX2__
  103. P_idir4,
  104. #endif
  105. #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  106. //#if !defined(__KERNEL_AVX2__)
  107. org4,
  108. #endif
  109. #if BVH_FEATURE(BVH_HAIR)
  110. dir4,
  111. #endif
  112. idir4,
  113. near_x,
  114. near_y,
  115. near_z,
  116. far_x,
  117. far_y,
  118. far_z,
  119. node_addr,
  120. &dist);
  121. if (child_mask != 0) {
  122. avxf cnodes;
  123. #if BVH_FEATURE(BVH_HAIR)
  124. if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
  125. cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
  126. }
  127. else
  128. #endif
  129. {
  130. cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
  131. }
  132. /* One child is hit, continue with that child. */
  133. int r = __bscf(child_mask);
  134. if (child_mask == 0) {
  135. node_addr = __float_as_int(cnodes[r]);
  136. continue;
  137. }
  138. /* Two children are hit, push far child, and continue with
  139. * closer child.
  140. */
  141. int c0 = __float_as_int(cnodes[r]);
  142. float d0 = ((float *)&dist)[r];
  143. r = __bscf(child_mask);
  144. int c1 = __float_as_int(cnodes[r]);
  145. float d1 = ((float *)&dist)[r];
  146. if (child_mask == 0) {
  147. if (d1 < d0) {
  148. node_addr = c1;
  149. ++stack_ptr;
  150. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  151. traversal_stack[stack_ptr].addr = c0;
  152. traversal_stack[stack_ptr].dist = d0;
  153. continue;
  154. }
  155. else {
  156. node_addr = c0;
  157. ++stack_ptr;
  158. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  159. traversal_stack[stack_ptr].addr = c1;
  160. traversal_stack[stack_ptr].dist = d1;
  161. continue;
  162. }
  163. }
  164. /* Here starts the slow path for 3 or 4 hit children. We push
  165. * all nodes onto the stack to sort them there.
  166. */
  167. ++stack_ptr;
  168. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  169. traversal_stack[stack_ptr].addr = c1;
  170. traversal_stack[stack_ptr].dist = d1;
  171. ++stack_ptr;
  172. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  173. traversal_stack[stack_ptr].addr = c0;
  174. traversal_stack[stack_ptr].dist = d0;
  175. /* Three children are hit, push all onto stack and sort 3
  176. * stack items, continue with closest child.
  177. */
  178. r = __bscf(child_mask);
  179. int c2 = __float_as_int(cnodes[r]);
  180. float d2 = ((float *)&dist)[r];
  181. if (child_mask == 0) {
  182. ++stack_ptr;
  183. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  184. traversal_stack[stack_ptr].addr = c2;
  185. traversal_stack[stack_ptr].dist = d2;
  186. obvh_stack_sort(&traversal_stack[stack_ptr],
  187. &traversal_stack[stack_ptr - 1],
  188. &traversal_stack[stack_ptr - 2]);
  189. node_addr = traversal_stack[stack_ptr].addr;
  190. --stack_ptr;
  191. continue;
  192. }
  193. /* Four children are hit, push all onto stack and sort 4
  194. * stack items, continue with closest child.
  195. */
  196. r = __bscf(child_mask);
  197. int c3 = __float_as_int(cnodes[r]);
  198. float d3 = ((float *)&dist)[r];
  199. if (child_mask == 0) {
  200. ++stack_ptr;
  201. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  202. traversal_stack[stack_ptr].addr = c3;
  203. traversal_stack[stack_ptr].dist = d3;
  204. ++stack_ptr;
  205. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  206. traversal_stack[stack_ptr].addr = c2;
  207. traversal_stack[stack_ptr].dist = d2;
  208. obvh_stack_sort(&traversal_stack[stack_ptr],
  209. &traversal_stack[stack_ptr - 1],
  210. &traversal_stack[stack_ptr - 2],
  211. &traversal_stack[stack_ptr - 3]);
  212. node_addr = traversal_stack[stack_ptr].addr;
  213. --stack_ptr;
  214. continue;
  215. }
  216. ++stack_ptr;
  217. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  218. traversal_stack[stack_ptr].addr = c3;
  219. traversal_stack[stack_ptr].dist = d3;
  220. ++stack_ptr;
  221. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  222. traversal_stack[stack_ptr].addr = c2;
  223. traversal_stack[stack_ptr].dist = d2;
  224. /* Five children are hit, push all onto stack and sort 5
  225. * stack items, continue with closest child
  226. */
  227. r = __bscf(child_mask);
  228. int c4 = __float_as_int(cnodes[r]);
  229. float d4 = ((float *)&dist)[r];
  230. if (child_mask == 0) {
  231. ++stack_ptr;
  232. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  233. traversal_stack[stack_ptr].addr = c4;
  234. traversal_stack[stack_ptr].dist = d4;
  235. obvh_stack_sort(&traversal_stack[stack_ptr],
  236. &traversal_stack[stack_ptr - 1],
  237. &traversal_stack[stack_ptr - 2],
  238. &traversal_stack[stack_ptr - 3],
  239. &traversal_stack[stack_ptr - 4]);
  240. node_addr = traversal_stack[stack_ptr].addr;
  241. --stack_ptr;
  242. continue;
  243. }
  244. /* Six children are hit, push all onto stack and sort 6
  245. * stack items, continue with closest child.
  246. */
  247. r = __bscf(child_mask);
  248. int c5 = __float_as_int(cnodes[r]);
  249. float d5 = ((float *)&dist)[r];
  250. if (child_mask == 0) {
  251. ++stack_ptr;
  252. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  253. traversal_stack[stack_ptr].addr = c5;
  254. traversal_stack[stack_ptr].dist = d5;
  255. ++stack_ptr;
  256. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  257. traversal_stack[stack_ptr].addr = c4;
  258. traversal_stack[stack_ptr].dist = d4;
  259. obvh_stack_sort(&traversal_stack[stack_ptr],
  260. &traversal_stack[stack_ptr - 1],
  261. &traversal_stack[stack_ptr - 2],
  262. &traversal_stack[stack_ptr - 3],
  263. &traversal_stack[stack_ptr - 4],
  264. &traversal_stack[stack_ptr - 5]);
  265. node_addr = traversal_stack[stack_ptr].addr;
  266. --stack_ptr;
  267. continue;
  268. }
  269. ++stack_ptr;
  270. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  271. traversal_stack[stack_ptr].addr = c5;
  272. traversal_stack[stack_ptr].dist = d5;
  273. ++stack_ptr;
  274. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  275. traversal_stack[stack_ptr].addr = c4;
  276. traversal_stack[stack_ptr].dist = d4;
  277. /* Seven children are hit, push all onto stack and sort 7
  278. * stack items, continue with closest child.
  279. */
  280. r = __bscf(child_mask);
  281. int c6 = __float_as_int(cnodes[r]);
  282. float d6 = ((float *)&dist)[r];
  283. if (child_mask == 0) {
  284. ++stack_ptr;
  285. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  286. traversal_stack[stack_ptr].addr = c6;
  287. traversal_stack[stack_ptr].dist = d6;
  288. obvh_stack_sort(&traversal_stack[stack_ptr],
  289. &traversal_stack[stack_ptr - 1],
  290. &traversal_stack[stack_ptr - 2],
  291. &traversal_stack[stack_ptr - 3],
  292. &traversal_stack[stack_ptr - 4],
  293. &traversal_stack[stack_ptr - 5],
  294. &traversal_stack[stack_ptr - 6]);
  295. node_addr = traversal_stack[stack_ptr].addr;
  296. --stack_ptr;
  297. continue;
  298. }
  299. /* Eight children are hit, push all onto stack and sort 8
  300. * stack items, continue with closest child.
  301. */
  302. r = __bscf(child_mask);
  303. int c7 = __float_as_int(cnodes[r]);
  304. float d7 = ((float *)&dist)[r];
  305. ++stack_ptr;
  306. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  307. traversal_stack[stack_ptr].addr = c7;
  308. traversal_stack[stack_ptr].dist = d7;
  309. ++stack_ptr;
  310. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  311. traversal_stack[stack_ptr].addr = c6;
  312. traversal_stack[stack_ptr].dist = d6;
  313. obvh_stack_sort(&traversal_stack[stack_ptr],
  314. &traversal_stack[stack_ptr - 1],
  315. &traversal_stack[stack_ptr - 2],
  316. &traversal_stack[stack_ptr - 3],
  317. &traversal_stack[stack_ptr - 4],
  318. &traversal_stack[stack_ptr - 5],
  319. &traversal_stack[stack_ptr - 6],
  320. &traversal_stack[stack_ptr - 7]);
  321. node_addr = traversal_stack[stack_ptr].addr;
  322. --stack_ptr;
  323. continue;
  324. }
  325. node_addr = traversal_stack[stack_ptr].addr;
  326. --stack_ptr;
  327. }
  328. /* If node is leaf, fetch triangle list. */
  329. if (node_addr < 0) {
  330. float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
  331. #ifdef __VISIBILITY_FLAG__
  332. if ((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) {
  333. /* Pop. */
  334. node_addr = traversal_stack[stack_ptr].addr;
  335. --stack_ptr;
  336. continue;
  337. }
  338. #endif
  339. int prim_addr = __float_as_int(leaf.x);
  340. #if BVH_FEATURE(BVH_INSTANCING)
  341. if (prim_addr >= 0) {
  342. #endif
  343. int prim_addr2 = __float_as_int(leaf.y);
  344. const uint type = __float_as_int(leaf.w);
  345. const uint p_type = type & PRIMITIVE_ALL;
  346. /* Pop. */
  347. node_addr = traversal_stack[stack_ptr].addr;
  348. --stack_ptr;
  349. /* Primitive intersection. */
  350. if (p_type == PRIMITIVE_TRIANGLE) {
  351. int prim_count = prim_addr2 - prim_addr;
  352. if (prim_count < 3) {
  353. while (prim_addr < prim_addr2) {
  354. kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) ==
  355. p_type);
  356. int hit = triangle_intersect(
  357. kg, isect_array, P, dir, PATH_RAY_SHADOW, object, prim_addr);
  358. /* Shadow ray early termination. */
  359. if (hit) {
  360. /* detect if this surface has a shader with transparent shadows */
  361. /* todo: optimize so primitive visibility flag indicates if
  362. * the primitive has a transparent shadow shader? */
  363. int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
  364. int shader = 0;
  365. #ifdef __HAIR__
  366. if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
  367. #endif
  368. {
  369. shader = kernel_tex_fetch(__tri_shader, prim);
  370. }
  371. #ifdef __HAIR__
  372. else {
  373. float4 str = kernel_tex_fetch(__curves, prim);
  374. shader = __float_as_int(str.z);
  375. }
  376. #endif
  377. int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
  378. /* if no transparent shadows, all light is blocked */
  379. if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
  380. return true;
  381. }
  382. /* if maximum number of hits reached, block all light */
  383. else if (*num_hits == max_hits) {
  384. return true;
  385. }
  386. /* move on to next entry in intersections array */
  387. isect_array++;
  388. (*num_hits)++;
  389. #if BVH_FEATURE(BVH_INSTANCING)
  390. num_hits_in_instance++;
  391. #endif
  392. isect_array->t = isect_t;
  393. }
  394. prim_addr++;
  395. } // while
  396. }
  397. else {
  398. kernel_assert((kernel_tex_fetch(__prim_type, (prim_addr)) & PRIMITIVE_ALL) ==
  399. p_type);
  400. #if BVH_FEATURE(BVH_INSTANCING)
  401. int *nhiptr = &num_hits_in_instance;
  402. #else
  403. int nhi = 0;
  404. int *nhiptr = &nhi;
  405. #endif
  406. int result = triangle_intersect8(kg,
  407. &isect_array,
  408. P,
  409. dir,
  410. PATH_RAY_SHADOW,
  411. object,
  412. prim_addr,
  413. prim_count,
  414. num_hits,
  415. max_hits,
  416. nhiptr,
  417. isect_t);
  418. if (result == 2) {
  419. return true;
  420. }
  421. } // prim_count
  422. } // PRIMITIVE_TRIANGLE
  423. else {
  424. while (prim_addr < prim_addr2) {
  425. kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
  426. #ifdef __SHADOW_TRICKS__
  427. uint tri_object = (object == OBJECT_NONE) ?
  428. kernel_tex_fetch(__prim_object, prim_addr) :
  429. object;
  430. if (tri_object == skip_object) {
  431. ++prim_addr;
  432. continue;
  433. }
  434. #endif
  435. bool hit;
  436. /* todo: specialized intersect functions which don't fill in
  437. * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
  438. * might give a few % performance improvement */
  439. switch (p_type) {
  440. #if BVH_FEATURE(BVH_MOTION)
  441. case PRIMITIVE_MOTION_TRIANGLE: {
  442. hit = motion_triangle_intersect(
  443. kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, prim_addr);
  444. break;
  445. }
  446. #endif
  447. #if BVH_FEATURE(BVH_HAIR)
  448. case PRIMITIVE_CURVE:
  449. case PRIMITIVE_MOTION_CURVE: {
  450. const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
  451. if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
  452. hit = cardinal_curve_intersect(kg,
  453. isect_array,
  454. P,
  455. dir,
  456. PATH_RAY_SHADOW,
  457. object,
  458. prim_addr,
  459. ray->time,
  460. curve_type);
  461. }
  462. else {
  463. hit = curve_intersect(kg,
  464. isect_array,
  465. P,
  466. dir,
  467. PATH_RAY_SHADOW,
  468. object,
  469. prim_addr,
  470. ray->time,
  471. curve_type);
  472. }
  473. break;
  474. }
  475. #endif
  476. default: {
  477. hit = false;
  478. break;
  479. }
  480. }
  481. /* Shadow ray early termination. */
  482. if (hit) {
  483. /* detect if this surface has a shader with transparent shadows */
  484. /* todo: optimize so primitive visibility flag indicates if
  485. * the primitive has a transparent shadow shader? */
  486. int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
  487. int shader = 0;
  488. #ifdef __HAIR__
  489. if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
  490. #endif
  491. {
  492. shader = kernel_tex_fetch(__tri_shader, prim);
  493. }
  494. #ifdef __HAIR__
  495. else {
  496. float4 str = kernel_tex_fetch(__curves, prim);
  497. shader = __float_as_int(str.z);
  498. }
  499. #endif
  500. int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
  501. /* if no transparent shadows, all light is blocked */
  502. if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
  503. return true;
  504. }
  505. /* if maximum number of hits reached, block all light */
  506. else if (*num_hits == max_hits) {
  507. return true;
  508. }
  509. /* move on to next entry in intersections array */
  510. isect_array++;
  511. (*num_hits)++;
  512. #if BVH_FEATURE(BVH_INSTANCING)
  513. num_hits_in_instance++;
  514. #endif
  515. isect_array->t = isect_t;
  516. }
  517. prim_addr++;
  518. } // while prim
  519. }
  520. }
  521. #if BVH_FEATURE(BVH_INSTANCING)
  522. else {
  523. /* Instance push. */
  524. object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
  525. # if BVH_FEATURE(BVH_MOTION)
  526. isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
  527. # else
  528. isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
  529. # endif
  530. num_hits_in_instance = 0;
  531. isect_array->t = isect_t;
  532. obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
  533. tfar = avxf(isect_t);
  534. # if BVH_FEATURE(BVH_HAIR)
  535. dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
  536. # endif
  537. idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
  538. # ifdef __KERNEL_AVX2__
  539. P_idir = P * idir;
  540. P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
  541. # endif
  542. # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  543. org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
  544. # endif
  545. ++stack_ptr;
  546. kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
  547. traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
  548. node_addr = kernel_tex_fetch(__object_node, object);
  549. }
  550. }
  551. #endif /* FEATURE(BVH_INSTANCING) */
  552. } while (node_addr != ENTRYPOINT_SENTINEL);
  553. #if BVH_FEATURE(BVH_INSTANCING)
  554. if (stack_ptr >= 0) {
  555. kernel_assert(object != OBJECT_NONE);
  556. /* Instance pop. */
  557. if (num_hits_in_instance) {
  558. float t_fac;
  559. # if BVH_FEATURE(BVH_MOTION)
  560. bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
  561. # else
  562. bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
  563. # endif
  564. /* Scale isect->t to adjust for instancing. */
  565. for (int i = 0; i < num_hits_in_instance; i++) {
  566. (isect_array - i - 1)->t *= t_fac;
  567. }
  568. }
  569. else {
  570. # if BVH_FEATURE(BVH_MOTION)
  571. bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
  572. # else
  573. bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
  574. # endif
  575. }
  576. isect_t = tmax;
  577. isect_array->t = isect_t;
  578. obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
  579. tfar = avxf(isect_t);
  580. # if BVH_FEATURE(BVH_HAIR)
  581. dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
  582. # endif
  583. idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
  584. # ifdef __KERNEL_AVX2__
  585. P_idir = P * idir;
  586. P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
  587. # endif
  588. # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  589. org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
  590. # endif
  591. object = OBJECT_NONE;
  592. node_addr = traversal_stack[stack_ptr].addr;
  593. --stack_ptr;
  594. }
  595. #endif /* FEATURE(BVH_INSTANCING) */
  596. } while (node_addr != ENTRYPOINT_SENTINEL);
  597. return false;
  598. }
  599. #undef NODE_INTERSECT