qbvh_shadow_all.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. /*
  2. * Copyright 2011-2013 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /* This is a template BVH traversal function, where various features can be
  17. * enabled/disabled. This way we can compile optimized versions for each case
  18. * without new features slowing things down.
  19. *
  20. * BVH_INSTANCING: object instancing
  21. * BVH_HAIR: hair curve rendering
  22. * BVH_MOTION: motion blur rendering
  23. */
  24. #if BVH_FEATURE(BVH_HAIR)
  25. # define NODE_INTERSECT qbvh_node_intersect
  26. #else
  27. # define NODE_INTERSECT qbvh_aligned_node_intersect
  28. #endif
  29. ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
  30. const Ray *ray,
  31. Intersection *isect_array,
  32. const uint visibility,
  33. const uint max_hits,
  34. uint *num_hits)
  35. {
  36. /* TODO(sergey):
  37. * - Test if pushing distance on the stack helps.
  38. * - Likely and unlikely for if() statements.
  39. * - Test restrict attribute for pointers.
  40. */
  41. /* Traversal stack in CUDA thread-local memory. */
  42. QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
  43. traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
  44. /* Traversal variables in registers. */
  45. int stack_ptr = 0;
  46. int node_addr = kernel_data.bvh.root;
  47. /* Ray parameters in registers. */
  48. const float tmax = ray->t;
  49. float3 P = ray->P;
  50. float3 dir = bvh_clamp_direction(ray->D);
  51. float3 idir = bvh_inverse_direction(dir);
  52. int object = OBJECT_NONE;
  53. float isect_t = tmax;
  54. #if BVH_FEATURE(BVH_MOTION)
  55. Transform ob_itfm;
  56. #endif
  57. *num_hits = 0;
  58. isect_array->t = tmax;
  59. #if BVH_FEATURE(BVH_INSTANCING)
  60. int num_hits_in_instance = 0;
  61. #endif
  62. ssef tnear(0.0f), tfar(isect_t);
  63. #if BVH_FEATURE(BVH_HAIR)
  64. sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
  65. #endif
  66. sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
  67. #ifdef __KERNEL_AVX2__
  68. float3 P_idir = P * idir;
  69. sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
  70. #endif
  71. #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  72. sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
  73. #endif
  74. /* Offsets to select the side that becomes the lower or upper bound. */
  75. int near_x, near_y, near_z;
  76. int far_x, far_y, far_z;
  77. qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
  78. /* Traversal loop. */
  79. do {
  80. do {
  81. /* Traverse internal nodes. */
  82. while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
  83. float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
  84. (void)inodes;
  85. if (false
  86. #ifdef __VISIBILITY_FLAG__
  87. || ((__float_as_uint(inodes.x) & visibility) == 0)
  88. #endif
  89. #if BVH_FEATURE(BVH_MOTION)
  90. || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
  91. #endif
  92. ) {
  93. /* Pop. */
  94. node_addr = traversal_stack[stack_ptr].addr;
  95. --stack_ptr;
  96. continue;
  97. }
  98. ssef dist;
  99. int child_mask = NODE_INTERSECT(kg,
  100. tnear,
  101. tfar,
  102. #ifdef __KERNEL_AVX2__
  103. P_idir4,
  104. #endif
  105. #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  106. org4,
  107. #endif
  108. #if BVH_FEATURE(BVH_HAIR)
  109. dir4,
  110. #endif
  111. idir4,
  112. near_x,
  113. near_y,
  114. near_z,
  115. far_x,
  116. far_y,
  117. far_z,
  118. node_addr,
  119. &dist);
  120. if (child_mask != 0) {
  121. float4 cnodes;
  122. #if BVH_FEATURE(BVH_HAIR)
  123. if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
  124. cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
  125. }
  126. else
  127. #endif
  128. {
  129. cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
  130. }
  131. /* One child is hit, continue with that child. */
  132. int r = __bscf(child_mask);
  133. if (child_mask == 0) {
  134. node_addr = __float_as_int(cnodes[r]);
  135. continue;
  136. }
  137. /* Two children are hit, push far child, and continue with
  138. * closer child.
  139. */
  140. int c0 = __float_as_int(cnodes[r]);
  141. float d0 = ((float *)&dist)[r];
  142. r = __bscf(child_mask);
  143. int c1 = __float_as_int(cnodes[r]);
  144. float d1 = ((float *)&dist)[r];
  145. if (child_mask == 0) {
  146. if (d1 < d0) {
  147. node_addr = c1;
  148. ++stack_ptr;
  149. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  150. traversal_stack[stack_ptr].addr = c0;
  151. traversal_stack[stack_ptr].dist = d0;
  152. continue;
  153. }
  154. else {
  155. node_addr = c0;
  156. ++stack_ptr;
  157. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  158. traversal_stack[stack_ptr].addr = c1;
  159. traversal_stack[stack_ptr].dist = d1;
  160. continue;
  161. }
  162. }
  163. /* Here starts the slow path for 3 or 4 hit children. We push
  164. * all nodes onto the stack to sort them there.
  165. */
  166. ++stack_ptr;
  167. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  168. traversal_stack[stack_ptr].addr = c1;
  169. traversal_stack[stack_ptr].dist = d1;
  170. ++stack_ptr;
  171. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  172. traversal_stack[stack_ptr].addr = c0;
  173. traversal_stack[stack_ptr].dist = d0;
  174. /* Three children are hit, push all onto stack and sort 3
  175. * stack items, continue with closest child.
  176. */
  177. r = __bscf(child_mask);
  178. int c2 = __float_as_int(cnodes[r]);
  179. float d2 = ((float *)&dist)[r];
  180. if (child_mask == 0) {
  181. ++stack_ptr;
  182. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  183. traversal_stack[stack_ptr].addr = c2;
  184. traversal_stack[stack_ptr].dist = d2;
  185. qbvh_stack_sort(&traversal_stack[stack_ptr],
  186. &traversal_stack[stack_ptr - 1],
  187. &traversal_stack[stack_ptr - 2]);
  188. node_addr = traversal_stack[stack_ptr].addr;
  189. --stack_ptr;
  190. continue;
  191. }
  192. /* Four children are hit, push all onto stack and sort 4
  193. * stack items, continue with closest child.
  194. */
  195. r = __bscf(child_mask);
  196. int c3 = __float_as_int(cnodes[r]);
  197. float d3 = ((float *)&dist)[r];
  198. ++stack_ptr;
  199. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  200. traversal_stack[stack_ptr].addr = c3;
  201. traversal_stack[stack_ptr].dist = d3;
  202. ++stack_ptr;
  203. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  204. traversal_stack[stack_ptr].addr = c2;
  205. traversal_stack[stack_ptr].dist = d2;
  206. qbvh_stack_sort(&traversal_stack[stack_ptr],
  207. &traversal_stack[stack_ptr - 1],
  208. &traversal_stack[stack_ptr - 2],
  209. &traversal_stack[stack_ptr - 3]);
  210. }
  211. node_addr = traversal_stack[stack_ptr].addr;
  212. --stack_ptr;
  213. }
  214. /* If node is leaf, fetch triangle list. */
  215. if (node_addr < 0) {
  216. float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
  217. #ifdef __VISIBILITY_FLAG__
  218. if ((__float_as_uint(leaf.z) & visibility) == 0) {
  219. /* Pop. */
  220. node_addr = traversal_stack[stack_ptr].addr;
  221. --stack_ptr;
  222. continue;
  223. }
  224. #endif
  225. int prim_addr = __float_as_int(leaf.x);
  226. #if BVH_FEATURE(BVH_INSTANCING)
  227. if (prim_addr >= 0) {
  228. #endif
  229. int prim_addr2 = __float_as_int(leaf.y);
  230. const uint type = __float_as_int(leaf.w);
  231. const uint p_type = type & PRIMITIVE_ALL;
  232. /* Pop. */
  233. node_addr = traversal_stack[stack_ptr].addr;
  234. --stack_ptr;
  235. /* Primitive intersection. */
  236. while (prim_addr < prim_addr2) {
  237. kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
  238. bool hit;
  239. /* todo: specialized intersect functions which don't fill in
  240. * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
  241. * might give a few % performance improvement */
  242. switch (p_type) {
  243. case PRIMITIVE_TRIANGLE: {
  244. hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
  245. break;
  246. }
  247. #if BVH_FEATURE(BVH_MOTION)
  248. case PRIMITIVE_MOTION_TRIANGLE: {
  249. hit = motion_triangle_intersect(
  250. kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
  251. break;
  252. }
  253. #endif
  254. #if BVH_FEATURE(BVH_HAIR)
  255. case PRIMITIVE_CURVE:
  256. case PRIMITIVE_MOTION_CURVE: {
  257. const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
  258. if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
  259. hit = cardinal_curve_intersect(kg,
  260. isect_array,
  261. P,
  262. dir,
  263. visibility,
  264. object,
  265. prim_addr,
  266. ray->time,
  267. curve_type);
  268. }
  269. else {
  270. hit = curve_intersect(kg,
  271. isect_array,
  272. P,
  273. dir,
  274. visibility,
  275. object,
  276. prim_addr,
  277. ray->time,
  278. curve_type);
  279. }
  280. break;
  281. }
  282. #endif
  283. default: {
  284. hit = false;
  285. break;
  286. }
  287. }
  288. /* Shadow ray early termination. */
  289. if (hit) {
  290. /* detect if this surface has a shader with transparent shadows */
  291. /* todo: optimize so primitive visibility flag indicates if
  292. * the primitive has a transparent shadow shader? */
  293. int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
  294. int shader = 0;
  295. #ifdef __HAIR__
  296. if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
  297. #endif
  298. {
  299. shader = kernel_tex_fetch(__tri_shader, prim);
  300. }
  301. #ifdef __HAIR__
  302. else {
  303. float4 str = kernel_tex_fetch(__curves, prim);
  304. shader = __float_as_int(str.z);
  305. }
  306. #endif
  307. int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
  308. /* if no transparent shadows, all light is blocked */
  309. if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
  310. return true;
  311. }
  312. /* if maximum number of hits reached, block all light */
  313. else if (*num_hits == max_hits) {
  314. return true;
  315. }
  316. /* move on to next entry in intersections array */
  317. isect_array++;
  318. (*num_hits)++;
  319. #if BVH_FEATURE(BVH_INSTANCING)
  320. num_hits_in_instance++;
  321. #endif
  322. isect_array->t = isect_t;
  323. }
  324. prim_addr++;
  325. }
  326. }
  327. #if BVH_FEATURE(BVH_INSTANCING)
  328. else {
  329. /* Instance push. */
  330. object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
  331. # if BVH_FEATURE(BVH_MOTION)
  332. isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
  333. # else
  334. isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
  335. # endif
  336. num_hits_in_instance = 0;
  337. isect_array->t = isect_t;
  338. qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
  339. tfar = ssef(isect_t);
  340. # if BVH_FEATURE(BVH_HAIR)
  341. dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
  342. # endif
  343. idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
  344. # ifdef __KERNEL_AVX2__
  345. P_idir = P * idir;
  346. P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
  347. # endif
  348. # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  349. org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
  350. # endif
  351. ++stack_ptr;
  352. kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
  353. traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
  354. node_addr = kernel_tex_fetch(__object_node, object);
  355. }
  356. }
  357. #endif /* FEATURE(BVH_INSTANCING) */
  358. } while (node_addr != ENTRYPOINT_SENTINEL);
  359. #if BVH_FEATURE(BVH_INSTANCING)
  360. if (stack_ptr >= 0) {
  361. kernel_assert(object != OBJECT_NONE);
  362. /* Instance pop. */
  363. if (num_hits_in_instance) {
  364. float t_fac;
  365. # if BVH_FEATURE(BVH_MOTION)
  366. bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
  367. # else
  368. bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
  369. # endif
  370. /* Scale isect->t to adjust for instancing. */
  371. for (int i = 0; i < num_hits_in_instance; i++) {
  372. (isect_array - i - 1)->t *= t_fac;
  373. }
  374. }
  375. else {
  376. # if BVH_FEATURE(BVH_MOTION)
  377. bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
  378. # else
  379. bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
  380. # endif
  381. }
  382. isect_t = tmax;
  383. isect_array->t = isect_t;
  384. qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
  385. tfar = ssef(isect_t);
  386. # if BVH_FEATURE(BVH_HAIR)
  387. dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
  388. # endif
  389. idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
  390. # ifdef __KERNEL_AVX2__
  391. P_idir = P * idir;
  392. P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
  393. # endif
  394. # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
  395. org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
  396. # endif
  397. object = OBJECT_NONE;
  398. node_addr = traversal_stack[stack_ptr].addr;
  399. --stack_ptr;
  400. }
  401. #endif /* FEATURE(BVH_INSTANCING) */
  402. } while (node_addr != ENTRYPOINT_SENTINEL);
  403. return false;
  404. }
  405. #undef NODE_INTERSECT