kernel_path_branched.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544
  1. /*
  2. * Copyright 2011-2013 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. CCL_NAMESPACE_BEGIN
  17. #ifdef __BRANCHED_PATH__
  18. ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
  19. ShaderData *sd,
  20. ShaderData *emission_sd,
  21. PathRadiance *L,
  22. ccl_addr_space PathState *state,
  23. float3 throughput)
  24. {
  25. int num_samples = kernel_data.integrator.ao_samples;
  26. float num_samples_inv = 1.0f / num_samples;
  27. float ao_factor = kernel_data.background.ao_factor;
  28. float3 ao_N;
  29. float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
  30. float3 ao_alpha = shader_bsdf_alpha(kg, sd);
  31. for (int j = 0; j < num_samples; j++) {
  32. float bsdf_u, bsdf_v;
  33. path_branched_rng_2D(
  34. kg, state->rng_hash, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
  35. float3 ao_D;
  36. float ao_pdf;
  37. sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
  38. if (dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
  39. Ray light_ray;
  40. float3 ao_shadow;
  41. light_ray.P = ray_offset(sd->P, sd->Ng);
  42. light_ray.D = ao_D;
  43. light_ray.t = kernel_data.background.ao_distance;
  44. light_ray.time = sd->time;
  45. light_ray.dP = sd->dP;
  46. light_ray.dD = differential3_zero();
  47. if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) {
  48. path_radiance_accum_ao(
  49. L, state, throughput * num_samples_inv, ao_alpha, ao_bsdf, ao_shadow);
  50. }
  51. else {
  52. path_radiance_accum_total_ao(L, state, throughput * num_samples_inv, ao_bsdf);
  53. }
  54. }
  55. }
  56. }
  57. # ifndef __SPLIT_KERNEL__
  58. # ifdef __VOLUME__
  59. ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg,
  60. ShaderData *sd,
  61. PathState *state,
  62. Ray *ray,
  63. float3 *throughput,
  64. ccl_addr_space Intersection *isect,
  65. bool hit,
  66. ShaderData *indirect_sd,
  67. ShaderData *emission_sd,
  68. PathRadiance *L)
  69. {
  70. /* Sanitize volume stack. */
  71. if (!hit) {
  72. kernel_volume_clean_stack(kg, state->volume_stack);
  73. }
  74. if (state->volume_stack[0].shader == SHADER_NONE) {
  75. return;
  76. }
  77. /* volume attenuation, emission, scatter */
  78. Ray volume_ray = *ray;
  79. volume_ray.t = (hit) ? isect->t : FLT_MAX;
  80. bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
  81. # ifdef __VOLUME_DECOUPLED__
  82. /* decoupled ray marching only supported on CPU */
  83. if (kernel_data.integrator.volume_decoupled) {
  84. /* cache steps along volume for repeated sampling */
  85. VolumeSegment volume_segment;
  86. shader_setup_from_volume(kg, sd, &volume_ray);
  87. kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, heterogeneous);
  88. /* direct light sampling */
  89. if (volume_segment.closure_flag & SD_SCATTER) {
  90. volume_segment.sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
  91. int all = kernel_data.integrator.sample_all_lights_direct;
  92. kernel_branched_path_volume_connect_light(
  93. kg, sd, emission_sd, *throughput, state, L, all, &volume_ray, &volume_segment);
  94. /* indirect light sampling */
  95. int num_samples = kernel_data.integrator.volume_samples;
  96. float num_samples_inv = 1.0f / num_samples;
  97. for (int j = 0; j < num_samples; j++) {
  98. PathState ps = *state;
  99. Ray pray = *ray;
  100. float3 tp = *throughput;
  101. /* branch RNG state */
  102. path_state_branch(&ps, j, num_samples);
  103. /* scatter sample. if we use distance sampling and take just one
  104. * sample for direct and indirect light, we could share this
  105. * computation, but makes code a bit complex */
  106. float rphase = path_state_rng_1D(kg, &ps, PRNG_PHASE_CHANNEL);
  107. float rscatter = path_state_rng_1D(kg, &ps, PRNG_SCATTER_DISTANCE);
  108. VolumeIntegrateResult result = kernel_volume_decoupled_scatter(
  109. kg, &ps, &pray, sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
  110. if (result == VOLUME_PATH_SCATTERED &&
  111. kernel_path_volume_bounce(kg, sd, &tp, &ps, &L->state, &pray)) {
  112. kernel_path_indirect(kg, indirect_sd, emission_sd, &pray, tp * num_samples_inv, &ps, L);
  113. /* for render passes, sum and reset indirect light pass variables
  114. * for the next samples */
  115. path_radiance_sum_indirect(L);
  116. path_radiance_reset_indirect(L);
  117. }
  118. }
  119. }
  120. /* emission and transmittance */
  121. if (volume_segment.closure_flag & SD_EMISSION)
  122. path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission);
  123. *throughput *= volume_segment.accum_transmittance;
  124. /* free cached steps */
  125. kernel_volume_decoupled_free(kg, &volume_segment);
  126. }
  127. else
  128. # endif /* __VOLUME_DECOUPLED__ */
  129. {
  130. /* GPU: no decoupled ray marching, scatter probalistically */
  131. int num_samples = kernel_data.integrator.volume_samples;
  132. float num_samples_inv = 1.0f / num_samples;
  133. /* todo: we should cache the shader evaluations from stepping
  134. * through the volume, for now we redo them multiple times */
  135. for (int j = 0; j < num_samples; j++) {
  136. PathState ps = *state;
  137. Ray pray = *ray;
  138. float3 tp = (*throughput) * num_samples_inv;
  139. /* branch RNG state */
  140. path_state_branch(&ps, j, num_samples);
  141. VolumeIntegrateResult result = kernel_volume_integrate(
  142. kg, &ps, sd, &volume_ray, L, &tp, heterogeneous);
  143. # ifdef __VOLUME_SCATTER__
  144. if (result == VOLUME_PATH_SCATTERED) {
  145. /* todo: support equiangular, MIS and all light sampling.
  146. * alternatively get decoupled ray marching working on the GPU */
  147. kernel_path_volume_connect_light(kg, sd, emission_sd, tp, state, L);
  148. if (kernel_path_volume_bounce(kg, sd, &tp, &ps, &L->state, &pray)) {
  149. kernel_path_indirect(kg, indirect_sd, emission_sd, &pray, tp, &ps, L);
  150. /* for render passes, sum and reset indirect light pass variables
  151. * for the next samples */
  152. path_radiance_sum_indirect(L);
  153. path_radiance_reset_indirect(L);
  154. }
  155. }
  156. # endif /* __VOLUME_SCATTER__ */
  157. }
  158. /* todo: avoid this calculation using decoupled ray marching */
  159. kernel_volume_shadow(kg, emission_sd, state, &volume_ray, throughput);
  160. }
  161. }
  162. # endif /* __VOLUME__ */
  163. /* bounce off surface and integrate indirect light */
  164. ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
  165. ShaderData *sd,
  166. ShaderData *indirect_sd,
  167. ShaderData *emission_sd,
  168. float3 throughput,
  169. float num_samples_adjust,
  170. PathState *state,
  171. PathRadiance *L)
  172. {
  173. float sum_sample_weight = 0.0f;
  174. # ifdef __DENOISING_FEATURES__
  175. if (state->denoising_feature_weight > 0.0f) {
  176. for (int i = 0; i < sd->num_closure; i++) {
  177. const ShaderClosure *sc = &sd->closure[i];
  178. /* transparency is not handled here, but in outer loop */
  179. if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
  180. continue;
  181. }
  182. sum_sample_weight += sc->sample_weight;
  183. }
  184. }
  185. else {
  186. sum_sample_weight = 1.0f;
  187. }
  188. # endif /* __DENOISING_FEATURES__ */
  189. for (int i = 0; i < sd->num_closure; i++) {
  190. const ShaderClosure *sc = &sd->closure[i];
  191. /* transparency is not handled here, but in outer loop */
  192. if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
  193. continue;
  194. }
  195. int num_samples;
  196. if (CLOSURE_IS_BSDF_DIFFUSE(sc->type))
  197. num_samples = kernel_data.integrator.diffuse_samples;
  198. else if (CLOSURE_IS_BSDF_BSSRDF(sc->type))
  199. num_samples = 1;
  200. else if (CLOSURE_IS_BSDF_GLOSSY(sc->type))
  201. num_samples = kernel_data.integrator.glossy_samples;
  202. else
  203. num_samples = kernel_data.integrator.transmission_samples;
  204. num_samples = ceil_to_int(num_samples_adjust * num_samples);
  205. float num_samples_inv = num_samples_adjust / num_samples;
  206. for (int j = 0; j < num_samples; j++) {
  207. PathState ps = *state;
  208. float3 tp = throughput;
  209. Ray bsdf_ray;
  210. # ifdef __SHADOW_TRICKS__
  211. float shadow_transparency = L->shadow_transparency;
  212. # endif
  213. ps.rng_hash = cmj_hash(state->rng_hash, i);
  214. if (!kernel_branched_path_surface_bounce(
  215. kg, sd, sc, j, num_samples, &tp, &ps, &L->state, &bsdf_ray, sum_sample_weight)) {
  216. continue;
  217. }
  218. ps.rng_hash = state->rng_hash;
  219. kernel_path_indirect(kg, indirect_sd, emission_sd, &bsdf_ray, tp * num_samples_inv, &ps, L);
  220. /* for render passes, sum and reset indirect light pass variables
  221. * for the next samples */
  222. path_radiance_sum_indirect(L);
  223. path_radiance_reset_indirect(L);
  224. # ifdef __SHADOW_TRICKS__
  225. L->shadow_transparency = shadow_transparency;
  226. # endif
  227. }
  228. }
  229. }
  230. # ifdef __SUBSURFACE__
  231. ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
  232. ShaderData *sd,
  233. ShaderData *indirect_sd,
  234. ShaderData *emission_sd,
  235. PathRadiance *L,
  236. PathState *state,
  237. Ray *ray,
  238. float3 throughput)
  239. {
  240. for (int i = 0; i < sd->num_closure; i++) {
  241. ShaderClosure *sc = &sd->closure[i];
  242. if (!CLOSURE_IS_BSSRDF(sc->type))
  243. continue;
  244. /* set up random number generator */
  245. uint lcg_state = lcg_state_init(state, 0x68bc21eb);
  246. int num_samples = kernel_data.integrator.subsurface_samples * 3;
  247. float num_samples_inv = 1.0f / num_samples;
  248. uint bssrdf_rng_hash = cmj_hash(state->rng_hash, i);
  249. /* do subsurface scatter step with copy of shader data, this will
  250. * replace the BSSRDF with a diffuse BSDF closure */
  251. for (int j = 0; j < num_samples; j++) {
  252. PathState hit_state = *state;
  253. path_state_branch(&hit_state, j, num_samples);
  254. hit_state.rng_hash = bssrdf_rng_hash;
  255. LocalIntersection ss_isect;
  256. float bssrdf_u, bssrdf_v;
  257. path_state_rng_2D(kg, &hit_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
  258. int num_hits = subsurface_scatter_multi_intersect(
  259. kg, &ss_isect, sd, &hit_state, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
  260. hit_state.rng_offset += PRNG_BOUNCE_NUM;
  261. # ifdef __VOLUME__
  262. Ray volume_ray = *ray;
  263. bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
  264. sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
  265. # endif /* __VOLUME__ */
  266. /* compute lighting with the BSDF closure */
  267. for (int hit = 0; hit < num_hits; hit++) {
  268. ShaderData bssrdf_sd = *sd;
  269. Bssrdf *bssrdf = (Bssrdf *)sc;
  270. ClosureType bssrdf_type = sc->type;
  271. float bssrdf_roughness = bssrdf->roughness;
  272. subsurface_scatter_multi_setup(
  273. kg, &ss_isect, hit, &bssrdf_sd, &hit_state, bssrdf_type, bssrdf_roughness);
  274. # ifdef __VOLUME__
  275. if (need_update_volume_stack) {
  276. /* Setup ray from previous surface point to the new one. */
  277. float3 P = ray_offset(bssrdf_sd.P, -bssrdf_sd.Ng);
  278. volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
  279. for (int k = 0; k < VOLUME_STACK_SIZE; k++) {
  280. hit_state.volume_stack[k] = state->volume_stack[k];
  281. }
  282. kernel_volume_stack_update_for_subsurface(
  283. kg, emission_sd, &volume_ray, hit_state.volume_stack);
  284. }
  285. # endif /* __VOLUME__ */
  286. # ifdef __EMISSION__
  287. /* direct light */
  288. if (kernel_data.integrator.use_direct_light) {
  289. int all = (kernel_data.integrator.sample_all_lights_direct) ||
  290. (hit_state.flag & PATH_RAY_SHADOW_CATCHER);
  291. kernel_branched_path_surface_connect_light(
  292. kg, &bssrdf_sd, emission_sd, &hit_state, throughput, num_samples_inv, L, all);
  293. }
  294. # endif /* __EMISSION__ */
  295. /* indirect light */
  296. kernel_branched_path_surface_indirect_light(
  297. kg, &bssrdf_sd, indirect_sd, emission_sd, throughput, num_samples_inv, &hit_state, L);
  298. }
  299. }
  300. }
  301. }
  302. # endif /* __SUBSURFACE__ */
  303. ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
  304. uint rng_hash,
  305. int sample,
  306. Ray ray,
  307. ccl_global float *buffer,
  308. PathRadiance *L)
  309. {
  310. /* initialize */
  311. float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
  312. path_radiance_init(L, kernel_data.film.use_light_pass);
  313. /* shader data memory used for both volumes and surfaces, saves stack space */
  314. ShaderData sd;
  315. /* shader data used by emission, shadows, volume stacks, indirect path */
  316. ShaderDataTinyStorage emission_sd_storage;
  317. ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
  318. ShaderData indirect_sd;
  319. PathState state;
  320. path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
  321. /* Main Loop
  322. * Here we only handle transparency intersections from the camera ray.
  323. * Indirect bounces are handled in kernel_branched_path_surface_indirect_light().
  324. */
  325. for (;;) {
  326. /* Find intersection with objects in scene. */
  327. Intersection isect;
  328. bool hit = kernel_path_scene_intersect(kg, &state, &ray, &isect, L);
  329. # ifdef __VOLUME__
  330. /* Volume integration. */
  331. kernel_branched_path_volume(
  332. kg, &sd, &state, &ray, &throughput, &isect, hit, &indirect_sd, emission_sd, L);
  333. # endif /* __VOLUME__ */
  334. /* Shade background. */
  335. if (!hit) {
  336. kernel_path_background(kg, &state, &ray, throughput, &sd, L);
  337. break;
  338. }
  339. /* Setup and evaluate shader. */
  340. shader_setup_from_ray(kg, &sd, &isect, &ray);
  341. /* Skip most work for volume bounding surface. */
  342. # ifdef __VOLUME__
  343. if (!(sd.flag & SD_HAS_ONLY_VOLUME)) {
  344. # endif
  345. shader_eval_surface(kg, &sd, &state, state.flag);
  346. shader_merge_closures(&sd);
  347. /* Apply shadow catcher, holdout, emission. */
  348. if (!kernel_path_shader_apply(kg, &sd, &state, &ray, throughput, emission_sd, L, buffer)) {
  349. break;
  350. }
  351. /* transparency termination */
  352. if (state.flag & PATH_RAY_TRANSPARENT) {
  353. /* path termination. this is a strange place to put the termination, it's
  354. * mainly due to the mixed in MIS that we use. gives too many unneeded
  355. * shader evaluations, only need emission if we are going to terminate */
  356. float probability = path_state_continuation_probability(kg, &state, throughput);
  357. if (probability == 0.0f) {
  358. break;
  359. }
  360. else if (probability != 1.0f) {
  361. float terminate = path_state_rng_1D(kg, &state, PRNG_TERMINATE);
  362. if (terminate >= probability)
  363. break;
  364. throughput /= probability;
  365. }
  366. }
  367. kernel_update_denoising_features(kg, &sd, &state, L);
  368. # ifdef __AO__
  369. /* ambient occlusion */
  370. if (kernel_data.integrator.use_ambient_occlusion) {
  371. kernel_branched_path_ao(kg, &sd, emission_sd, L, &state, throughput);
  372. }
  373. # endif /* __AO__ */
  374. # ifdef __SUBSURFACE__
  375. /* bssrdf scatter to a different location on the same object */
  376. if (sd.flag & SD_BSSRDF) {
  377. kernel_branched_path_subsurface_scatter(
  378. kg, &sd, &indirect_sd, emission_sd, L, &state, &ray, throughput);
  379. }
  380. # endif /* __SUBSURFACE__ */
  381. PathState hit_state = state;
  382. # ifdef __EMISSION__
  383. /* direct light */
  384. if (kernel_data.integrator.use_direct_light) {
  385. int all = (kernel_data.integrator.sample_all_lights_direct) ||
  386. (state.flag & PATH_RAY_SHADOW_CATCHER);
  387. kernel_branched_path_surface_connect_light(
  388. kg, &sd, emission_sd, &hit_state, throughput, 1.0f, L, all);
  389. }
  390. # endif /* __EMISSION__ */
  391. /* indirect light */
  392. kernel_branched_path_surface_indirect_light(
  393. kg, &sd, &indirect_sd, emission_sd, throughput, 1.0f, &hit_state, L);
  394. /* continue in case of transparency */
  395. throughput *= shader_bsdf_transparency(kg, &sd);
  396. if (is_zero(throughput))
  397. break;
  398. /* Update Path State */
  399. path_state_next(kg, &state, LABEL_TRANSPARENT);
  400. # ifdef __VOLUME__
  401. }
  402. else {
  403. if (!path_state_volume_next(kg, &state)) {
  404. break;
  405. }
  406. }
  407. # endif
  408. ray.P = ray_offset(sd.P, -sd.Ng);
  409. ray.t -= sd.ray_length; /* clipping works through transparent */
  410. # ifdef __RAY_DIFFERENTIALS__
  411. ray.dP = sd.dP;
  412. ray.dD.dx = -sd.dI.dx;
  413. ray.dD.dy = -sd.dI.dy;
  414. # endif /* __RAY_DIFFERENTIALS__ */
  415. # ifdef __VOLUME__
  416. /* enter/exit volume */
  417. kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack);
  418. # endif /* __VOLUME__ */
  419. }
  420. }
  421. ccl_device void kernel_branched_path_trace(
  422. KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride)
  423. {
  424. /* buffer offset */
  425. int index = offset + x + y * stride;
  426. int pass_stride = kernel_data.film.pass_stride;
  427. buffer += index * pass_stride;
  428. /* initialize random numbers and ray */
  429. uint rng_hash;
  430. Ray ray;
  431. kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
  432. /* integrate */
  433. PathRadiance L;
  434. if (ray.t != 0.0f) {
  435. kernel_branched_path_integrate(kg, rng_hash, sample, ray, buffer, &L);
  436. kernel_write_result(kg, buffer, sample, &L);
  437. }
  438. }
  439. # endif /* __SPLIT_KERNEL__ */
  440. #endif /* __BRANCHED_PATH__ */
  441. CCL_NAMESPACE_END