kernel_shadow.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. /*
  2. * Copyright 2011-2013 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. CCL_NAMESPACE_BEGIN
  17. #ifdef __VOLUME__
  18. typedef struct VolumeState {
  19. # ifdef __SPLIT_KERNEL__
  20. # else
  21. PathState ps;
  22. # endif
  23. } VolumeState;
  24. /* Get PathState ready for use for volume stack evaluation. */
  25. # ifdef __SPLIT_KERNEL__
  26. ccl_addr_space
  27. # endif
  28. ccl_device_inline PathState *
  29. shadow_blocked_volume_path_state(KernelGlobals *kg,
  30. VolumeState *volume_state,
  31. ccl_addr_space PathState *state,
  32. ShaderData *sd,
  33. Ray *ray)
  34. {
  35. # ifdef __SPLIT_KERNEL__
  36. ccl_addr_space PathState *ps =
  37. &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
  38. # else
  39. PathState *ps = &volume_state->ps;
  40. # endif
  41. *ps = *state;
  42. /* We are checking for shadow on the "other" side of the surface, so need
  43. * to discard volume we are currently at.
  44. */
  45. if (dot(sd->Ng, ray->D) < 0.0f) {
  46. kernel_volume_stack_enter_exit(kg, sd, ps->volume_stack);
  47. }
  48. return ps;
  49. }
  50. #endif /* __VOLUME__ */
  51. /* Attenuate throughput accordingly to the given intersection event.
  52. * Returns true if the throughput is zero and traversal can be aborted.
  53. */
  54. ccl_device_forceinline bool shadow_handle_transparent_isect(
  55. KernelGlobals *kg,
  56. ShaderData *shadow_sd,
  57. ccl_addr_space PathState *state,
  58. #ifdef __VOLUME__
  59. ccl_addr_space struct PathState *volume_state,
  60. #endif
  61. Intersection *isect,
  62. Ray *ray,
  63. float3 *throughput)
  64. {
  65. #ifdef __VOLUME__
  66. /* Attenuation between last surface and next surface. */
  67. if (volume_state->volume_stack[0].shader != SHADER_NONE) {
  68. Ray segment_ray = *ray;
  69. segment_ray.t = isect->t;
  70. kernel_volume_shadow(kg, shadow_sd, volume_state, &segment_ray, throughput);
  71. }
  72. #endif
  73. /* Setup shader data at surface. */
  74. shader_setup_from_ray(kg, shadow_sd, isect, ray);
  75. /* Attenuation from transparent surface. */
  76. if (!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
  77. path_state_modify_bounce(state, true);
  78. shader_eval_surface(kg, shadow_sd, state, PATH_RAY_SHADOW);
  79. path_state_modify_bounce(state, false);
  80. *throughput *= shader_bsdf_transparency(kg, shadow_sd);
  81. }
  82. /* Stop if all light is blocked. */
  83. if (is_zero(*throughput)) {
  84. return true;
  85. }
  86. #ifdef __VOLUME__
  87. /* Exit/enter volume. */
  88. kernel_volume_stack_enter_exit(kg, shadow_sd, volume_state->volume_stack);
  89. #endif
  90. return false;
  91. }
  92. /* Special version which only handles opaque shadows. */
  93. ccl_device bool shadow_blocked_opaque(KernelGlobals *kg,
  94. ShaderData *shadow_sd,
  95. ccl_addr_space PathState *state,
  96. const uint visibility,
  97. Ray *ray,
  98. Intersection *isect,
  99. float3 *shadow)
  100. {
  101. const bool blocked = scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect);
  102. #ifdef __VOLUME__
  103. if (!blocked && state->volume_stack[0].shader != SHADER_NONE) {
  104. /* Apply attenuation from current volume shader. */
  105. kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
  106. }
  107. #endif
  108. return blocked;
  109. }
  110. #ifdef __TRANSPARENT_SHADOWS__
  111. # ifdef __SHADOW_RECORD_ALL__
  112. /* Shadow function to compute how much light is blocked,
  113. *
  114. * We trace a single ray. If it hits any opaque surface, or more than a given
  115. * number of transparent surfaces is hit, then we consider the geometry to be
  116. * entirely blocked. If not, all transparent surfaces will be recorded and we
  117. * will shade them one by one to determine how much light is blocked. This all
  118. * happens in one scene intersection function.
  119. *
  120. * Recording all hits works well in some cases but may be slower in others. If
  121. * we have many semi-transparent hairs, one intersection may be faster because
  122. * you'd be reinteresecting the same hairs a lot with each step otherwise. If
  123. * however there is mostly binary transparency then we may be recording many
  124. * unnecessary intersections when one of the first surfaces blocks all light.
  125. *
  126. * From tests in real scenes it seems the performance loss is either minimal,
  127. * or there is a performance increase anyway due to avoiding the need to send
  128. * two rays with transparent shadows.
  129. *
  130. * On CPU it'll handle all transparent bounces (by allocating storage for
  131. * intersections when they don't fit into the stack storage).
  132. *
  133. * On GPU it'll only handle SHADOW_STACK_MAX_HITS-1 intersections, so this
  134. * is something to be kept an eye on.
  135. */
  136. # define SHADOW_STACK_MAX_HITS 64
  137. /* Actual logic with traversal loop implementation which is free from device
  138. * specific tweaks.
  139. *
  140. * Note that hits array should be as big as max_hits+1.
  141. */
  142. ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
  143. ShaderData *sd,
  144. ShaderData *shadow_sd,
  145. ccl_addr_space PathState *state,
  146. const uint visibility,
  147. Ray *ray,
  148. Intersection *hits,
  149. uint max_hits,
  150. float3 *shadow)
  151. {
  152. /* Intersect to find an opaque surface, or record all transparent
  153. * surface hits.
  154. */
  155. uint num_hits;
  156. const bool blocked = scene_intersect_shadow_all(kg, ray, hits, visibility, max_hits, &num_hits);
  157. # ifdef __VOLUME__
  158. VolumeState volume_state;
  159. # endif
  160. /* If no opaque surface found but we did find transparent hits,
  161. * shade them.
  162. */
  163. if (!blocked && num_hits > 0) {
  164. float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
  165. float3 Pend = ray->P + ray->D * ray->t;
  166. float last_t = 0.0f;
  167. int bounce = state->transparent_bounce;
  168. Intersection *isect = hits;
  169. # ifdef __VOLUME__
  170. # ifdef __SPLIT_KERNEL__
  171. ccl_addr_space
  172. # endif
  173. PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray);
  174. # endif
  175. sort_intersections(hits, num_hits);
  176. for (int hit = 0; hit < num_hits; hit++, isect++) {
  177. /* Adjust intersection distance for moving ray forward. */
  178. float new_t = isect->t;
  179. isect->t -= last_t;
  180. /* Skip hit if we did not move forward, step by step raytracing
  181. * would have skipped it as well then.
  182. */
  183. if (last_t == new_t) {
  184. continue;
  185. }
  186. last_t = new_t;
  187. /* Attenuate the throughput. */
  188. if (shadow_handle_transparent_isect(kg,
  189. shadow_sd,
  190. state,
  191. # ifdef __VOLUME__
  192. ps,
  193. # endif
  194. isect,
  195. ray,
  196. &throughput)) {
  197. return true;
  198. }
  199. /* Move ray forward. */
  200. ray->P = shadow_sd->P;
  201. if (ray->t != FLT_MAX) {
  202. ray->D = normalize_len(Pend - ray->P, &ray->t);
  203. }
  204. bounce++;
  205. }
  206. # ifdef __VOLUME__
  207. /* Attenuation for last line segment towards light. */
  208. if (ps->volume_stack[0].shader != SHADER_NONE) {
  209. kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
  210. }
  211. # endif
  212. *shadow = throughput;
  213. return is_zero(throughput);
  214. }
  215. # ifdef __VOLUME__
  216. if (!blocked && state->volume_stack[0].shader != SHADER_NONE) {
  217. /* Apply attenuation from current volume shader. */
  218. # ifdef __SPLIT_KERNEL__
  219. ccl_addr_space
  220. # endif
  221. PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray);
  222. kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow);
  223. }
  224. # endif
  225. return blocked;
  226. }
  227. /* Here we do all device specific trickery before invoking actual traversal
  228. * loop to help readability of the actual logic.
  229. */
  230. ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg,
  231. ShaderData *sd,
  232. ShaderData *shadow_sd,
  233. ccl_addr_space PathState *state,
  234. const uint visibility,
  235. Ray *ray,
  236. uint max_hits,
  237. float3 *shadow)
  238. {
  239. # ifdef __SPLIT_KERNEL__
  240. Intersection hits_[SHADOW_STACK_MAX_HITS];
  241. Intersection *hits = &hits_[0];
  242. # elif defined(__KERNEL_CUDA__)
  243. Intersection *hits = kg->hits_stack;
  244. # else
  245. Intersection hits_stack[SHADOW_STACK_MAX_HITS];
  246. Intersection *hits = hits_stack;
  247. # endif
  248. # ifndef __KERNEL_GPU__
  249. /* Prefer to use stack but use dynamic allocation if too deep max hits
  250. * we need max_hits + 1 storage space due to the logic in
  251. * scene_intersect_shadow_all which will first store and then check if
  252. * the limit is exceeded.
  253. *
  254. * Ignore this on GPU because of slow/unavailable malloc().
  255. */
  256. if (max_hits + 1 > SHADOW_STACK_MAX_HITS) {
  257. if (kg->transparent_shadow_intersections == NULL) {
  258. const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
  259. kg->transparent_shadow_intersections = (Intersection *)malloc(sizeof(Intersection) *
  260. (transparent_max_bounce + 1));
  261. }
  262. hits = kg->transparent_shadow_intersections;
  263. }
  264. # endif /* __KERNEL_GPU__ */
  265. /* Invoke actual traversal. */
  266. return shadow_blocked_transparent_all_loop(
  267. kg, sd, shadow_sd, state, visibility, ray, hits, max_hits, shadow);
  268. }
  269. # endif /* __SHADOW_RECORD_ALL__ */
  270. # if defined(__KERNEL_GPU__) || !defined(__SHADOW_RECORD_ALL__)
  271. /* Shadow function to compute how much light is blocked,
  272. *
  273. * Here we raytrace from one transparent surface to the next step by step.
  274. * To minimize overhead in cases where we don't need transparent shadows, we
  275. * first trace a regular shadow ray. We check if the hit primitive was
  276. * potentially transparent, and only in that case start marching. this gives
  277. * one extra ray cast for the cases were we do want transparency.
  278. */
  279. /* This function is only implementing device-independent traversal logic
  280. * which requires some precalculation done.
  281. */
  282. ccl_device bool shadow_blocked_transparent_stepped_loop(KernelGlobals *kg,
  283. ShaderData *sd,
  284. ShaderData *shadow_sd,
  285. ccl_addr_space PathState *state,
  286. const uint visibility,
  287. Ray *ray,
  288. Intersection *isect,
  289. const bool blocked,
  290. const bool is_transparent_isect,
  291. float3 *shadow)
  292. {
  293. # ifdef __VOLUME__
  294. VolumeState volume_state;
  295. # endif
  296. if (blocked && is_transparent_isect) {
  297. float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
  298. float3 Pend = ray->P + ray->D * ray->t;
  299. int bounce = state->transparent_bounce;
  300. # ifdef __VOLUME__
  301. # ifdef __SPLIT_KERNEL__
  302. ccl_addr_space
  303. # endif
  304. PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray);
  305. # endif
  306. for (;;) {
  307. if (bounce >= kernel_data.integrator.transparent_max_bounce) {
  308. return true;
  309. }
  310. if (!scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_TRANSPARENT, isect)) {
  311. break;
  312. }
  313. if (!shader_transparent_shadow(kg, isect)) {
  314. return true;
  315. }
  316. /* Attenuate the throughput. */
  317. if (shadow_handle_transparent_isect(kg,
  318. shadow_sd,
  319. state,
  320. # ifdef __VOLUME__
  321. ps,
  322. # endif
  323. isect,
  324. ray,
  325. &throughput)) {
  326. return true;
  327. }
  328. /* Move ray forward. */
  329. ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng);
  330. if (ray->t != FLT_MAX) {
  331. ray->D = normalize_len(Pend - ray->P, &ray->t);
  332. }
  333. bounce++;
  334. }
  335. # ifdef __VOLUME__
  336. /* Attenuation for last line segment towards light. */
  337. if (ps->volume_stack[0].shader != SHADER_NONE) {
  338. kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
  339. }
  340. # endif
  341. *shadow *= throughput;
  342. return is_zero(throughput);
  343. }
  344. # ifdef __VOLUME__
  345. if (!blocked && state->volume_stack[0].shader != SHADER_NONE) {
  346. /* Apply attenuation from current volume shader. */
  347. # ifdef __SPLIT_KERNEL__
  348. ccl_addr_space
  349. # endif
  350. PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray);
  351. kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow);
  352. }
  353. # endif
  354. return blocked;
  355. }
  356. ccl_device bool shadow_blocked_transparent_stepped(KernelGlobals *kg,
  357. ShaderData *sd,
  358. ShaderData *shadow_sd,
  359. ccl_addr_space PathState *state,
  360. const uint visibility,
  361. Ray *ray,
  362. Intersection *isect,
  363. float3 *shadow)
  364. {
  365. bool blocked = scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect);
  366. bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, isect) : false;
  367. return shadow_blocked_transparent_stepped_loop(
  368. kg, sd, shadow_sd, state, visibility, ray, isect, blocked, is_transparent_isect, shadow);
  369. }
  370. # endif /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */
  371. #endif /* __TRANSPARENT_SHADOWS__ */
  372. ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
  373. ShaderData *sd,
  374. ShaderData *shadow_sd,
  375. ccl_addr_space PathState *state,
  376. Ray *ray_input,
  377. float3 *shadow)
  378. {
  379. Ray *ray = ray_input;
  380. Intersection isect;
  381. /* Some common early checks. */
  382. *shadow = make_float3(1.0f, 1.0f, 1.0f);
  383. if (ray->t == 0.0f) {
  384. return false;
  385. }
  386. #ifdef __SHADOW_TRICKS__
  387. const uint visibility = (state->flag & PATH_RAY_SHADOW_CATCHER) ? PATH_RAY_SHADOW_NON_CATCHER :
  388. PATH_RAY_SHADOW;
  389. #else
  390. const uint visibility = PATH_RAY_SHADOW;
  391. #endif
  392. /* Do actual shadow shading. */
  393. /* First of all, we check if integrator requires transparent shadows.
  394. * if not, we use simplest and fastest ever way to calculate occlusion.
  395. */
  396. #ifdef __TRANSPARENT_SHADOWS__
  397. if (!kernel_data.integrator.transparent_shadows)
  398. #endif
  399. {
  400. return shadow_blocked_opaque(kg, shadow_sd, state, visibility, ray, &isect, shadow);
  401. }
  402. #ifdef __TRANSPARENT_SHADOWS__
  403. # ifdef __SHADOW_RECORD_ALL__
  404. /* For the transparent shadows we try to use record-all logic on the
  405. * devices which supports this.
  406. */
  407. const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
  408. /* Check transparent bounces here, for volume scatter which can do
  409. * lighting before surface path termination is checked.
  410. */
  411. if (state->transparent_bounce >= transparent_max_bounce) {
  412. return true;
  413. }
  414. const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
  415. # ifdef __KERNEL_GPU__
  416. /* On GPU we do tricky with tracing opaque ray first, this avoids speed
  417. * regressions in some files.
  418. *
  419. * TODO(sergey): Check why using record-all behavior causes slowdown in such
  420. * cases. Could that be caused by a higher spill pressure?
  421. */
  422. const bool blocked = scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, &isect);
  423. const bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, &isect) : false;
  424. if (!blocked || !is_transparent_isect || max_hits + 1 >= SHADOW_STACK_MAX_HITS) {
  425. return shadow_blocked_transparent_stepped_loop(
  426. kg, sd, shadow_sd, state, visibility, ray, &isect, blocked, is_transparent_isect, shadow);
  427. }
  428. # endif /* __KERNEL_GPU__ */
  429. return shadow_blocked_transparent_all(
  430. kg, sd, shadow_sd, state, visibility, ray, max_hits, shadow);
  431. # else /* __SHADOW_RECORD_ALL__ */
  432. /* Fallback to a slowest version which works on all devices. */
  433. return shadow_blocked_transparent_stepped(
  434. kg, sd, shadow_sd, state, visibility, ray, &isect, shadow);
  435. # endif /* __SHADOW_RECORD_ALL__ */
  436. #endif /* __TRANSPARENT_SHADOWS__ */
  437. }
  438. #undef SHADOW_STACK_MAX_HITS
  439. CCL_NAMESPACE_END