bssrdf.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. /*
  2. * Copyright 2011-2013 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef __KERNEL_BSSRDF_H__
  17. #define __KERNEL_BSSRDF_H__
  18. CCL_NAMESPACE_BEGIN
  19. typedef ccl_addr_space struct Bssrdf {
  20. SHADER_CLOSURE_BASE;
  21. float3 radius;
  22. float3 albedo;
  23. float sharpness;
  24. float texture_blur;
  25. float roughness;
  26. float channels;
  27. } Bssrdf;
  28. static_assert(sizeof(ShaderClosure) >= sizeof(Bssrdf), "Bssrdf is too large!");
  29. /* Planar Truncated Gaussian
  30. *
  31. * Note how this is different from the typical gaussian, this one integrates
  32. * to 1 over the plane (where you get an extra 2*pi*x factor). We are lucky
  33. * that integrating x*exp(-x) gives a nice closed form solution. */
  34. /* paper suggests 1/12.46 which is much too small, suspect it's *12.46 */
  35. #define GAUSS_TRUNCATE 12.46f
  36. ccl_device float bssrdf_gaussian_eval(const float radius, float r)
  37. {
  38. /* integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) from 0 to Rm
  39. * = 1 - exp(-Rm*Rm/(2*v)) */
  40. const float v = radius * radius * (0.25f * 0.25f);
  41. const float Rm = sqrtf(v * GAUSS_TRUNCATE);
  42. if (r >= Rm)
  43. return 0.0f;
  44. return expf(-r * r / (2.0f * v)) / (2.0f * M_PI_F * v);
  45. }
  46. ccl_device float bssrdf_gaussian_pdf(const float radius, float r)
  47. {
  48. /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
  49. const float area_truncated = 1.0f - expf(-0.5f * GAUSS_TRUNCATE);
  50. return bssrdf_gaussian_eval(radius, r) * (1.0f / (area_truncated));
  51. }
  52. ccl_device void bssrdf_gaussian_sample(const float radius, float xi, float *r, float *h)
  53. {
  54. /* xi = integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) = -exp(-r^2/(2*v))
  55. * r = sqrt(-2*v*logf(xi)) */
  56. const float v = radius * radius * (0.25f * 0.25f);
  57. const float Rm = sqrtf(v * GAUSS_TRUNCATE);
  58. /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
  59. const float area_truncated = 1.0f - expf(-0.5f * GAUSS_TRUNCATE);
  60. /* r(xi) */
  61. const float r_squared = -2.0f * v * logf(1.0f - xi * area_truncated);
  62. *r = sqrtf(r_squared);
  63. /* h^2 + r^2 = Rm^2 */
  64. *h = safe_sqrtf(Rm * Rm - r_squared);
  65. }
  66. /* Planar Cubic BSSRDF falloff
  67. *
  68. * This is basically (Rm - x)^3, with some factors to normalize it. For sampling
  69. * we integrate 2*pi*x * (Rm - x)^3, which gives us a quintic equation that as
  70. * far as I can tell has no closed form solution. So we get an iterative solution
  71. * instead with newton-raphson. */
  72. ccl_device float bssrdf_cubic_eval(const float radius, const float sharpness, float r)
  73. {
  74. if (sharpness == 0.0f) {
  75. const float Rm = radius;
  76. if (r >= Rm)
  77. return 0.0f;
  78. /* integrate (2*pi*r * 10*(R - r)^3)/(pi * R^5) from 0 to R = 1 */
  79. const float Rm5 = (Rm * Rm) * (Rm * Rm) * Rm;
  80. const float f = Rm - r;
  81. const float num = f * f * f;
  82. return (10.0f * num) / (Rm5 * M_PI_F);
  83. }
  84. else {
  85. float Rm = radius * (1.0f + sharpness);
  86. if (r >= Rm)
  87. return 0.0f;
  88. /* custom variation with extra sharpness, to match the previous code */
  89. const float y = 1.0f / (1.0f + sharpness);
  90. float Rmy, ry, ryinv;
  91. if (sharpness == 1.0f) {
  92. Rmy = sqrtf(Rm);
  93. ry = sqrtf(r);
  94. ryinv = (ry > 0.0f) ? 1.0f / ry : 0.0f;
  95. }
  96. else {
  97. Rmy = powf(Rm, y);
  98. ry = powf(r, y);
  99. ryinv = (r > 0.0f) ? powf(r, y - 1.0f) : 0.0f;
  100. }
  101. const float Rmy5 = (Rmy * Rmy) * (Rmy * Rmy) * Rmy;
  102. const float f = Rmy - ry;
  103. const float num = f * (f * f) * (y * ryinv);
  104. return (10.0f * num) / (Rmy5 * M_PI_F);
  105. }
  106. }
  107. ccl_device float bssrdf_cubic_pdf(const float radius, const float sharpness, float r)
  108. {
  109. return bssrdf_cubic_eval(radius, sharpness, r);
  110. }
  111. /* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */
  112. ccl_device_forceinline float bssrdf_cubic_quintic_root_find(float xi)
  113. {
  114. /* newton-raphson iteration, usually succeeds in 2-4 iterations, except
  115. * outside 0.02 ... 0.98 where it can go up to 10, so overall performance
  116. * should not be too bad */
  117. const float tolerance = 1e-6f;
  118. const int max_iteration_count = 10;
  119. float x = 0.25f;
  120. int i;
  121. for (i = 0; i < max_iteration_count; i++) {
  122. float x2 = x * x;
  123. float x3 = x2 * x;
  124. float nx = (1.0f - x);
  125. float f = 10.0f * x2 - 20.0f * x3 + 15.0f * x2 * x2 - 4.0f * x2 * x3 - xi;
  126. float f_ = 20.0f * (x * nx) * (nx * nx);
  127. if (fabsf(f) < tolerance || f_ == 0.0f)
  128. break;
  129. x = saturate(x - f / f_);
  130. }
  131. return x;
  132. }
  133. ccl_device void bssrdf_cubic_sample(
  134. const float radius, const float sharpness, float xi, float *r, float *h)
  135. {
  136. float Rm = radius;
  137. float r_ = bssrdf_cubic_quintic_root_find(xi);
  138. if (sharpness != 0.0f) {
  139. r_ = powf(r_, 1.0f + sharpness);
  140. Rm *= (1.0f + sharpness);
  141. }
  142. r_ *= Rm;
  143. *r = r_;
  144. /* h^2 + r^2 = Rm^2 */
  145. *h = safe_sqrtf(Rm * Rm - r_ * r_);
  146. }
  147. /* Approximate Reflectance Profiles
  148. * http://graphics.pixar.com/library/ApproxBSSRDF/paper.pdf
  149. */
  150. /* This is a bit arbitrary, just need big enough radius so it matches
  151. * the mean free length, but still not too big so sampling is still
  152. * effective. Might need some further tweaks.
  153. */
  154. #define BURLEY_TRUNCATE 16.0f
  155. #define BURLEY_TRUNCATE_CDF 0.9963790093708328f // cdf(BURLEY_TRUNCATE)
  156. ccl_device_inline float bssrdf_burley_fitting(float A)
  157. {
  158. /* Diffuse surface transmission, equation (6). */
  159. return 1.9f - A + 3.5f * (A - 0.8f) * (A - 0.8f);
  160. }
  161. /* Scale mean free path length so it gives similar looking result
  162. * to Cubic and Gaussian models.
  163. */
  164. ccl_device_inline float3 bssrdf_burley_compatible_mfp(float3 r)
  165. {
  166. return 0.25f * M_1_PI_F * r;
  167. }
  168. ccl_device void bssrdf_burley_setup(Bssrdf *bssrdf)
  169. {
  170. /* Mean free path length. */
  171. const float3 l = bssrdf_burley_compatible_mfp(bssrdf->radius);
  172. /* Surface albedo. */
  173. const float3 A = bssrdf->albedo;
  174. const float3 s = make_float3(
  175. bssrdf_burley_fitting(A.x), bssrdf_burley_fitting(A.y), bssrdf_burley_fitting(A.z));
  176. bssrdf->radius = l / s;
  177. }
  178. ccl_device float bssrdf_burley_eval(const float d, float r)
  179. {
  180. const float Rm = BURLEY_TRUNCATE * d;
  181. if (r >= Rm)
  182. return 0.0f;
  183. /* Burley reflectance profile, equation (3).
  184. *
  185. * NOTES:
  186. * - Surface albedo is already included into sc->weight, no need to
  187. * multiply by this term here.
  188. * - This is normalized diffuse model, so the equation is mutliplied
  189. * by 2*pi, which also matches cdf().
  190. */
  191. float exp_r_3_d = expf(-r / (3.0f * d));
  192. float exp_r_d = exp_r_3_d * exp_r_3_d * exp_r_3_d;
  193. return (exp_r_d + exp_r_3_d) / (4.0f * d);
  194. }
  195. ccl_device float bssrdf_burley_pdf(const float d, float r)
  196. {
  197. return bssrdf_burley_eval(d, r) * (1.0f / BURLEY_TRUNCATE_CDF);
  198. }
  199. /* Find the radius for desired CDF value.
  200. * Returns scaled radius, meaning the result is to be scaled up by d.
  201. * Since there's no closed form solution we do Newton-Raphson method to find it.
  202. */
  203. ccl_device_forceinline float bssrdf_burley_root_find(float xi)
  204. {
  205. const float tolerance = 1e-6f;
  206. const int max_iteration_count = 10;
  207. /* Do initial guess based on manual curve fitting, this allows us to reduce
  208. * number of iterations to maximum 4 across the [0..1] range. We keep maximum
  209. * number of iteration higher just to be sure we didn't miss root in some
  210. * corner case.
  211. */
  212. float r;
  213. if (xi <= 0.9f) {
  214. r = expf(xi * xi * 2.4f) - 1.0f;
  215. }
  216. else {
  217. /* TODO(sergey): Some nicer curve fit is possible here. */
  218. r = 15.0f;
  219. }
  220. /* Solve against scaled radius. */
  221. for (int i = 0; i < max_iteration_count; i++) {
  222. float exp_r_3 = expf(-r / 3.0f);
  223. float exp_r = exp_r_3 * exp_r_3 * exp_r_3;
  224. float f = 1.0f - 0.25f * exp_r - 0.75f * exp_r_3 - xi;
  225. float f_ = 0.25f * exp_r + 0.25f * exp_r_3;
  226. if (fabsf(f) < tolerance || f_ == 0.0f) {
  227. break;
  228. }
  229. r = r - f / f_;
  230. if (r < 0.0f) {
  231. r = 0.0f;
  232. }
  233. }
  234. return r;
  235. }
  236. ccl_device void bssrdf_burley_sample(const float d, float xi, float *r, float *h)
  237. {
  238. const float Rm = BURLEY_TRUNCATE * d;
  239. const float r_ = bssrdf_burley_root_find(xi * BURLEY_TRUNCATE_CDF) * d;
  240. *r = r_;
  241. /* h^2 + r^2 = Rm^2 */
  242. *h = safe_sqrtf(Rm * Rm - r_ * r_);
  243. }
  244. /* None BSSRDF falloff
  245. *
  246. * Samples distributed over disk with no falloff, for reference. */
  247. ccl_device float bssrdf_none_eval(const float radius, float r)
  248. {
  249. const float Rm = radius;
  250. return (r < Rm) ? 1.0f : 0.0f;
  251. }
  252. ccl_device float bssrdf_none_pdf(const float radius, float r)
  253. {
  254. /* integrate (2*pi*r)/(pi*Rm*Rm) from 0 to Rm = 1 */
  255. const float Rm = radius;
  256. const float area = (M_PI_F * Rm * Rm);
  257. return bssrdf_none_eval(radius, r) / area;
  258. }
  259. ccl_device void bssrdf_none_sample(const float radius, float xi, float *r, float *h)
  260. {
  261. /* xi = integrate (2*pi*r)/(pi*Rm*Rm) = r^2/Rm^2
  262. * r = sqrt(xi)*Rm */
  263. const float Rm = radius;
  264. const float r_ = sqrtf(xi) * Rm;
  265. *r = r_;
  266. /* h^2 + r^2 = Rm^2 */
  267. *h = safe_sqrtf(Rm * Rm - r_ * r_);
  268. }
  269. /* Generic */
  270. ccl_device_inline Bssrdf *bssrdf_alloc(ShaderData *sd, float3 weight)
  271. {
  272. Bssrdf *bssrdf = (Bssrdf *)closure_alloc(sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight);
  273. if (bssrdf == NULL) {
  274. return NULL;
  275. }
  276. float sample_weight = fabsf(average(weight));
  277. bssrdf->sample_weight = sample_weight;
  278. return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? bssrdf : NULL;
  279. }
  280. ccl_device int bssrdf_setup(ShaderData *sd, Bssrdf *bssrdf, ClosureType type)
  281. {
  282. int flag = 0;
  283. int bssrdf_channels = 3;
  284. float3 diffuse_weight = make_float3(0.0f, 0.0f, 0.0f);
  285. /* Verify if the radii are large enough to sample without precision issues. */
  286. if (bssrdf->radius.x < BSSRDF_MIN_RADIUS) {
  287. diffuse_weight.x = bssrdf->weight.x;
  288. bssrdf->weight.x = 0.0f;
  289. bssrdf->radius.x = 0.0f;
  290. bssrdf_channels--;
  291. }
  292. if (bssrdf->radius.y < BSSRDF_MIN_RADIUS) {
  293. diffuse_weight.y = bssrdf->weight.y;
  294. bssrdf->weight.y = 0.0f;
  295. bssrdf->radius.y = 0.0f;
  296. bssrdf_channels--;
  297. }
  298. if (bssrdf->radius.z < BSSRDF_MIN_RADIUS) {
  299. diffuse_weight.z = bssrdf->weight.z;
  300. bssrdf->weight.z = 0.0f;
  301. bssrdf->radius.z = 0.0f;
  302. bssrdf_channels--;
  303. }
  304. if (bssrdf_channels < 3) {
  305. /* Add diffuse BSDF if any radius too small. */
  306. #ifdef __PRINCIPLED__
  307. if (type == CLOSURE_BSSRDF_PRINCIPLED_ID || type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) {
  308. float roughness = bssrdf->roughness;
  309. float3 N = bssrdf->N;
  310. PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
  311. sd, sizeof(PrincipledDiffuseBsdf), diffuse_weight);
  312. if (bsdf) {
  313. bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
  314. bsdf->N = N;
  315. bsdf->roughness = roughness;
  316. flag |= bsdf_principled_diffuse_setup(bsdf);
  317. }
  318. }
  319. else
  320. #endif /* __PRINCIPLED__ */
  321. {
  322. DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), diffuse_weight);
  323. if (bsdf) {
  324. bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
  325. bsdf->N = bssrdf->N;
  326. flag |= bsdf_diffuse_setup(bsdf);
  327. }
  328. }
  329. }
  330. /* Setup BSSRDF if radius is large enough. */
  331. if (bssrdf_channels > 0) {
  332. bssrdf->type = type;
  333. bssrdf->channels = bssrdf_channels;
  334. bssrdf->sample_weight = fabsf(average(bssrdf->weight)) * bssrdf->channels;
  335. bssrdf->texture_blur = saturate(bssrdf->texture_blur);
  336. bssrdf->sharpness = saturate(bssrdf->sharpness);
  337. if (type == CLOSURE_BSSRDF_BURLEY_ID || type == CLOSURE_BSSRDF_PRINCIPLED_ID ||
  338. type == CLOSURE_BSSRDF_RANDOM_WALK_ID ||
  339. type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) {
  340. bssrdf_burley_setup(bssrdf);
  341. }
  342. flag |= SD_BSSRDF;
  343. }
  344. else {
  345. bssrdf->type = type;
  346. bssrdf->sample_weight = 0.0f;
  347. }
  348. return flag;
  349. }
  350. ccl_device void bssrdf_sample(const ShaderClosure *sc, float xi, float *r, float *h)
  351. {
  352. const Bssrdf *bssrdf = (const Bssrdf *)sc;
  353. float radius;
  354. /* Sample color channel and reuse random number. Only a subset of channels
  355. * may be used if their radius was too small to handle as BSSRDF. */
  356. xi *= bssrdf->channels;
  357. if (xi < 1.0f) {
  358. radius = (bssrdf->radius.x > 0.0f) ?
  359. bssrdf->radius.x :
  360. (bssrdf->radius.y > 0.0f) ? bssrdf->radius.y : bssrdf->radius.z;
  361. }
  362. else if (xi < 2.0f) {
  363. xi -= 1.0f;
  364. radius = (bssrdf->radius.x > 0.0f) ? bssrdf->radius.y : bssrdf->radius.z;
  365. }
  366. else {
  367. xi -= 2.0f;
  368. radius = bssrdf->radius.z;
  369. }
  370. /* Sample BSSRDF. */
  371. if (bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) {
  372. bssrdf_cubic_sample(radius, bssrdf->sharpness, xi, r, h);
  373. }
  374. else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) {
  375. bssrdf_gaussian_sample(radius, xi, r, h);
  376. }
  377. else { /* if (bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID ||
  378. * bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID) */
  379. bssrdf_burley_sample(radius, xi, r, h);
  380. }
  381. }
  382. ccl_device float bssrdf_channel_pdf(const Bssrdf *bssrdf, float radius, float r)
  383. {
  384. if (radius == 0.0f) {
  385. return 0.0f;
  386. }
  387. else if (bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) {
  388. return bssrdf_cubic_pdf(radius, bssrdf->sharpness, r);
  389. }
  390. else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) {
  391. return bssrdf_gaussian_pdf(radius, r);
  392. }
  393. else { /* if (bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID ||
  394. * bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/
  395. return bssrdf_burley_pdf(radius, r);
  396. }
  397. }
  398. ccl_device_forceinline float3 bssrdf_eval(const ShaderClosure *sc, float r)
  399. {
  400. const Bssrdf *bssrdf = (const Bssrdf *)sc;
  401. return make_float3(bssrdf_channel_pdf(bssrdf, bssrdf->radius.x, r),
  402. bssrdf_channel_pdf(bssrdf, bssrdf->radius.y, r),
  403. bssrdf_channel_pdf(bssrdf, bssrdf->radius.z, r));
  404. }
  405. ccl_device_forceinline float bssrdf_pdf(const ShaderClosure *sc, float r)
  406. {
  407. const Bssrdf *bssrdf = (const Bssrdf *)sc;
  408. float3 pdf = bssrdf_eval(sc, r);
  409. return (pdf.x + pdf.y + pdf.z) / bssrdf->channels;
  410. }
  411. CCL_NAMESPACE_END
  412. #endif /* __KERNEL_BSSRDF_H__ */