kernel_subsurface.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496
  1. /*
  2. * Copyright 2011-2013 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. CCL_NAMESPACE_BEGIN
  17. /* BSSRDF using disk based importance sampling.
  18. *
  19. * BSSRDF Importance Sampling, SIGGRAPH 2013
  20. * http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf
  21. */
  22. ccl_device_inline float3
  23. subsurface_scatter_eval(ShaderData *sd, const ShaderClosure *sc, float disk_r, float r, bool all)
  24. {
  25. /* this is the veach one-sample model with balance heuristic, some pdf
  26. * factors drop out when using balance heuristic weighting */
  27. float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f);
  28. float pdf_sum = 0.0f;
  29. float sample_weight_inv = 0.0f;
  30. if (!all) {
  31. float sample_weight_sum = 0.0f;
  32. for (int i = 0; i < sd->num_closure; i++) {
  33. sc = &sd->closure[i];
  34. if (CLOSURE_IS_DISK_BSSRDF(sc->type)) {
  35. sample_weight_sum += sc->sample_weight;
  36. }
  37. }
  38. sample_weight_inv = 1.0f / sample_weight_sum;
  39. }
  40. for (int i = 0; i < sd->num_closure; i++) {
  41. sc = &sd->closure[i];
  42. if (CLOSURE_IS_DISK_BSSRDF(sc->type)) {
  43. /* in case of branched path integrate we sample all bssrdf's once,
  44. * for path trace we pick one, so adjust pdf for that */
  45. float sample_weight = (all) ? 1.0f : sc->sample_weight * sample_weight_inv;
  46. /* compute pdf */
  47. float3 eval = bssrdf_eval(sc, r);
  48. float pdf = bssrdf_pdf(sc, disk_r);
  49. eval_sum += sc->weight * eval;
  50. pdf_sum += sample_weight * pdf;
  51. }
  52. }
  53. return (pdf_sum > 0.0f) ? eval_sum / pdf_sum : make_float3(0.0f, 0.0f, 0.0f);
  54. }
  55. /* replace closures with a single diffuse bsdf closure after scatter step */
  56. ccl_device void subsurface_scatter_setup_diffuse_bsdf(
  57. KernelGlobals *kg, ShaderData *sd, ClosureType type, float roughness, float3 weight, float3 N)
  58. {
  59. sd->flag &= ~SD_CLOSURE_FLAGS;
  60. sd->num_closure = 0;
  61. sd->num_closure_left = kernel_data.integrator.max_closures;
  62. #ifdef __PRINCIPLED__
  63. if (type == CLOSURE_BSSRDF_PRINCIPLED_ID || type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) {
  64. PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
  65. sd, sizeof(PrincipledDiffuseBsdf), weight);
  66. if (bsdf) {
  67. bsdf->N = N;
  68. bsdf->roughness = roughness;
  69. sd->flag |= bsdf_principled_diffuse_setup(bsdf);
  70. /* replace CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID with this special ID so render passes
  71. * can recognize it as not being a regular Disney principled diffuse closure */
  72. bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
  73. }
  74. }
  75. else if (CLOSURE_IS_BSDF_BSSRDF(type) || CLOSURE_IS_BSSRDF(type))
  76. #endif /* __PRINCIPLED__ */
  77. {
  78. DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
  79. if (bsdf) {
  80. bsdf->N = N;
  81. sd->flag |= bsdf_diffuse_setup(bsdf);
  82. /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
  83. * can recognize it as not being a regular diffuse closure */
  84. bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
  85. }
  86. }
  87. }
  88. /* optionally do blurring of color and/or bump mapping, at the cost of a shader evaluation */
  89. ccl_device float3 subsurface_color_pow(float3 color, float exponent)
  90. {
  91. color = max(color, make_float3(0.0f, 0.0f, 0.0f));
  92. if (exponent == 1.0f) {
  93. /* nothing to do */
  94. }
  95. else if (exponent == 0.5f) {
  96. color.x = sqrtf(color.x);
  97. color.y = sqrtf(color.y);
  98. color.z = sqrtf(color.z);
  99. }
  100. else {
  101. color.x = powf(color.x, exponent);
  102. color.y = powf(color.y, exponent);
  103. color.z = powf(color.z, exponent);
  104. }
  105. return color;
  106. }
  107. ccl_device void subsurface_color_bump_blur(
  108. KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float3 *eval, float3 *N)
  109. {
  110. /* average color and texture blur at outgoing point */
  111. float texture_blur;
  112. float3 out_color = shader_bssrdf_sum(sd, NULL, &texture_blur);
  113. /* do we have bump mapping? */
  114. bool bump = (sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
  115. if (bump || texture_blur > 0.0f) {
  116. /* average color and normal at incoming point */
  117. shader_eval_surface(kg, sd, state, state->flag);
  118. float3 in_color = shader_bssrdf_sum(sd, (bump) ? N : NULL, NULL);
  119. /* we simply divide out the average color and multiply with the average
  120. * of the other one. we could try to do this per closure but it's quite
  121. * tricky to match closures between shader evaluations, their number and
  122. * order may change, this is simpler */
  123. if (texture_blur > 0.0f) {
  124. out_color = subsurface_color_pow(out_color, texture_blur);
  125. in_color = subsurface_color_pow(in_color, texture_blur);
  126. *eval *= safe_divide_color(in_color, out_color);
  127. }
  128. }
  129. }
  130. /* Subsurface scattering step, from a point on the surface to other
  131. * nearby points on the same object.
  132. */
  133. ccl_device_inline int subsurface_scatter_disk(KernelGlobals *kg,
  134. LocalIntersection *ss_isect,
  135. ShaderData *sd,
  136. const ShaderClosure *sc,
  137. uint *lcg_state,
  138. float disk_u,
  139. float disk_v,
  140. bool all)
  141. {
  142. /* pick random axis in local frame and point on disk */
  143. float3 disk_N, disk_T, disk_B;
  144. float pick_pdf_N, pick_pdf_T, pick_pdf_B;
  145. disk_N = sd->Ng;
  146. make_orthonormals(disk_N, &disk_T, &disk_B);
  147. if (disk_v < 0.5f) {
  148. pick_pdf_N = 0.5f;
  149. pick_pdf_T = 0.25f;
  150. pick_pdf_B = 0.25f;
  151. disk_v *= 2.0f;
  152. }
  153. else if (disk_v < 0.75f) {
  154. float3 tmp = disk_N;
  155. disk_N = disk_T;
  156. disk_T = tmp;
  157. pick_pdf_N = 0.25f;
  158. pick_pdf_T = 0.5f;
  159. pick_pdf_B = 0.25f;
  160. disk_v = (disk_v - 0.5f) * 4.0f;
  161. }
  162. else {
  163. float3 tmp = disk_N;
  164. disk_N = disk_B;
  165. disk_B = tmp;
  166. pick_pdf_N = 0.25f;
  167. pick_pdf_T = 0.25f;
  168. pick_pdf_B = 0.5f;
  169. disk_v = (disk_v - 0.75f) * 4.0f;
  170. }
  171. /* sample point on disk */
  172. float phi = M_2PI_F * disk_v;
  173. float disk_height, disk_r;
  174. bssrdf_sample(sc, disk_u, &disk_r, &disk_height);
  175. float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B;
  176. /* create ray */
  177. #ifdef __SPLIT_KERNEL__
  178. Ray ray_object = ss_isect->ray;
  179. Ray *ray = &ray_object;
  180. #else
  181. Ray *ray = &ss_isect->ray;
  182. #endif
  183. ray->P = sd->P + disk_N * disk_height + disk_P;
  184. ray->D = -disk_N;
  185. ray->t = 2.0f * disk_height;
  186. ray->dP = sd->dP;
  187. ray->dD = differential3_zero();
  188. ray->time = sd->time;
  189. /* intersect with the same object. if multiple intersections are found it
  190. * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
  191. scene_intersect_local(kg, *ray, ss_isect, sd->object, lcg_state, BSSRDF_MAX_HITS);
  192. int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS);
  193. for (int hit = 0; hit < num_eval_hits; hit++) {
  194. /* Quickly retrieve P and Ng without setting up ShaderData. */
  195. float3 hit_P;
  196. if (sd->type & PRIMITIVE_TRIANGLE) {
  197. hit_P = triangle_refine_local(kg, sd, &ss_isect->hits[hit], ray);
  198. }
  199. #ifdef __OBJECT_MOTION__
  200. else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) {
  201. float3 verts[3];
  202. motion_triangle_vertices(kg,
  203. sd->object,
  204. kernel_tex_fetch(__prim_index, ss_isect->hits[hit].prim),
  205. sd->time,
  206. verts);
  207. hit_P = motion_triangle_refine_local(kg, sd, &ss_isect->hits[hit], ray, verts);
  208. }
  209. #endif /* __OBJECT_MOTION__ */
  210. else {
  211. ss_isect->weight[hit] = make_float3(0.0f, 0.0f, 0.0f);
  212. continue;
  213. }
  214. float3 hit_Ng = ss_isect->Ng[hit];
  215. if (ss_isect->hits[hit].object != OBJECT_NONE) {
  216. object_normal_transform(kg, sd, &hit_Ng);
  217. }
  218. /* Probability densities for local frame axes. */
  219. float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
  220. float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
  221. float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
  222. /* Multiple importance sample between 3 axes, power heuristic
  223. * found to be slightly better than balance heuristic. pdf_N
  224. * in the MIS weight and denominator cancelled out. */
  225. float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
  226. if (ss_isect->num_hits > BSSRDF_MAX_HITS) {
  227. w *= ss_isect->num_hits / (float)BSSRDF_MAX_HITS;
  228. }
  229. /* Real distance to sampled point. */
  230. float r = len(hit_P - sd->P);
  231. /* Evaluate profiles. */
  232. float3 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
  233. ss_isect->weight[hit] = eval;
  234. }
  235. #ifdef __SPLIT_KERNEL__
  236. ss_isect->ray = *ray;
  237. #endif
  238. return num_eval_hits;
  239. }
  240. ccl_device_noinline void subsurface_scatter_multi_setup(KernelGlobals *kg,
  241. LocalIntersection *ss_isect,
  242. int hit,
  243. ShaderData *sd,
  244. ccl_addr_space PathState *state,
  245. ClosureType type,
  246. float roughness)
  247. {
  248. #ifdef __SPLIT_KERNEL__
  249. Ray ray_object = ss_isect->ray;
  250. Ray *ray = &ray_object;
  251. #else
  252. Ray *ray = &ss_isect->ray;
  253. #endif
  254. /* Workaround for AMD GPU OpenCL compiler. Most probably cache bypass issue. */
  255. #if defined(__SPLIT_KERNEL__) && defined(__KERNEL_OPENCL_AMD__) && defined(__KERNEL_GPU__)
  256. kernel_split_params.dummy_sd_flag = sd->flag;
  257. #endif
  258. /* Setup new shading point. */
  259. shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], ray);
  260. /* Optionally blur colors and bump mapping. */
  261. float3 weight = ss_isect->weight[hit];
  262. float3 N = sd->N;
  263. subsurface_color_bump_blur(kg, sd, state, &weight, &N);
  264. /* Setup diffuse BSDF. */
  265. subsurface_scatter_setup_diffuse_bsdf(kg, sd, type, roughness, weight, N);
  266. }
  267. /* Random walk subsurface scattering.
  268. *
  269. * "Practical and Controllable Subsurface Scattering for Production Path
  270. * Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
  271. ccl_device void subsurface_random_walk_remap(const float A,
  272. const float d,
  273. float *sigma_t,
  274. float *sigma_s)
  275. {
  276. /* Compute attenuation and scattering coefficients from albedo. */
  277. const float a = 1.0f - expf(A * (-5.09406f + A * (2.61188f - A * 4.31805f)));
  278. const float s = 1.9f - A + 3.5f * sqr(A - 0.8f);
  279. *sigma_t = 1.0f / fmaxf(d * s, 1e-16f);
  280. *sigma_s = *sigma_t * a;
  281. }
  282. ccl_device void subsurface_random_walk_coefficients(const ShaderClosure *sc,
  283. float3 *sigma_t,
  284. float3 *sigma_s,
  285. float3 *weight)
  286. {
  287. const Bssrdf *bssrdf = (const Bssrdf *)sc;
  288. const float3 A = bssrdf->albedo;
  289. const float3 d = bssrdf->radius;
  290. float sigma_t_x, sigma_t_y, sigma_t_z;
  291. float sigma_s_x, sigma_s_y, sigma_s_z;
  292. subsurface_random_walk_remap(A.x, d.x, &sigma_t_x, &sigma_s_x);
  293. subsurface_random_walk_remap(A.y, d.y, &sigma_t_y, &sigma_s_y);
  294. subsurface_random_walk_remap(A.z, d.z, &sigma_t_z, &sigma_s_z);
  295. *sigma_t = make_float3(sigma_t_x, sigma_t_y, sigma_t_z);
  296. *sigma_s = make_float3(sigma_s_x, sigma_s_y, sigma_s_z);
  297. /* Closure mixing and Fresnel weights separate from albedo. */
  298. *weight = safe_divide_color(bssrdf->weight, A);
  299. }
  300. ccl_device_noinline bool subsurface_random_walk(KernelGlobals *kg,
  301. LocalIntersection *ss_isect,
  302. ShaderData *sd,
  303. ccl_addr_space PathState *state,
  304. const ShaderClosure *sc,
  305. const float bssrdf_u,
  306. const float bssrdf_v)
  307. {
  308. /* Sample diffuse surface scatter into the object. */
  309. float3 D;
  310. float pdf;
  311. sample_cos_hemisphere(-sd->N, bssrdf_u, bssrdf_v, &D, &pdf);
  312. if (dot(-sd->Ng, D) <= 0.0f) {
  313. return 0;
  314. }
  315. /* Convert subsurface to volume coefficients. */
  316. float3 sigma_t, sigma_s;
  317. float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
  318. subsurface_random_walk_coefficients(sc, &sigma_t, &sigma_s, &throughput);
  319. /* Setup ray. */
  320. #ifdef __SPLIT_KERNEL__
  321. Ray ray_object = ss_isect->ray;
  322. Ray *ray = &ray_object;
  323. #else
  324. Ray *ray = &ss_isect->ray;
  325. #endif
  326. ray->P = ray_offset(sd->P, -sd->Ng);
  327. ray->D = D;
  328. ray->t = FLT_MAX;
  329. ray->time = sd->time;
  330. /* Modify state for RNGs, decorrelated from other paths. */
  331. uint prev_rng_offset = state->rng_offset;
  332. uint prev_rng_hash = state->rng_hash;
  333. state->rng_hash = cmj_hash(state->rng_hash + state->rng_offset, 0xdeadbeef);
  334. /* Random walk until we hit the surface again. */
  335. bool hit = false;
  336. for (int bounce = 0; bounce < BSSRDF_MAX_BOUNCES; bounce++) {
  337. /* Advance random number offset. */
  338. state->rng_offset += PRNG_BOUNCE_NUM;
  339. if (bounce > 0) {
  340. /* Sample scattering direction. */
  341. const float anisotropy = 0.0f;
  342. float scatter_u, scatter_v;
  343. path_state_rng_2D(kg, state, PRNG_BSDF_U, &scatter_u, &scatter_v);
  344. ray->D = henyey_greenstrein_sample(ray->D, anisotropy, scatter_u, scatter_v, NULL);
  345. }
  346. /* Sample color channel, use MIS with balance heuristic. */
  347. float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
  348. float3 albedo = safe_divide_color(sigma_s, sigma_t);
  349. float3 channel_pdf;
  350. int channel = kernel_volume_sample_channel(albedo, throughput, rphase, &channel_pdf);
  351. /* Distance sampling. */
  352. float rdist = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
  353. float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
  354. float t = -logf(1.0f - rdist) / sample_sigma_t;
  355. ray->t = t;
  356. scene_intersect_local(kg, *ray, ss_isect, sd->object, NULL, 1);
  357. hit = (ss_isect->num_hits > 0);
  358. if (hit) {
  359. /* Compute world space distance to surface hit. */
  360. float3 D = ray->D;
  361. object_inverse_dir_transform(kg, sd, &D);
  362. D = normalize(D) * ss_isect->hits[0].t;
  363. object_dir_transform(kg, sd, &D);
  364. t = len(D);
  365. }
  366. /* Advance to new scatter location. */
  367. ray->P += t * ray->D;
  368. /* Update throughput. */
  369. float3 transmittance = volume_color_transmittance(sigma_t, t);
  370. float pdf = dot(channel_pdf, (hit) ? transmittance : sigma_t * transmittance);
  371. throughput *= ((hit) ? transmittance : sigma_s * transmittance) / pdf;
  372. if (hit) {
  373. /* If we hit the surface, we are done. */
  374. break;
  375. }
  376. /* Russian roulette. */
  377. float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
  378. float probability = min(max3(fabs(throughput)), 1.0f);
  379. if (terminate >= probability) {
  380. break;
  381. }
  382. throughput /= probability;
  383. }
  384. kernel_assert(isfinite_safe(throughput.x) && isfinite_safe(throughput.y) &&
  385. isfinite_safe(throughput.z));
  386. state->rng_offset = prev_rng_offset;
  387. state->rng_hash = prev_rng_hash;
  388. /* Return number of hits in ss_isect. */
  389. if (!hit) {
  390. return 0;
  391. }
  392. /* TODO: gain back performance lost from merging with disk BSSRDF. We
  393. * only need to return on hit so this indirect ray push/pop overhead
  394. * is not actually needed, but it does keep the code simpler. */
  395. ss_isect->weight[0] = throughput;
  396. #ifdef __SPLIT_KERNEL__
  397. ss_isect->ray = *ray;
  398. #endif
  399. return 1;
  400. }
  401. ccl_device_inline int subsurface_scatter_multi_intersect(KernelGlobals *kg,
  402. LocalIntersection *ss_isect,
  403. ShaderData *sd,
  404. ccl_addr_space PathState *state,
  405. const ShaderClosure *sc,
  406. uint *lcg_state,
  407. float bssrdf_u,
  408. float bssrdf_v,
  409. bool all)
  410. {
  411. if (CLOSURE_IS_DISK_BSSRDF(sc->type)) {
  412. return subsurface_scatter_disk(kg, ss_isect, sd, sc, lcg_state, bssrdf_u, bssrdf_v, all);
  413. }
  414. else {
  415. return subsurface_random_walk(kg, ss_isect, sd, state, sc, bssrdf_u, bssrdf_v);
  416. }
  417. }
  418. CCL_NAMESPACE_END