lm_compute.glsl 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967
  1. #[versions]
  2. primary = "#define MODE_DIRECT_LIGHT";
  3. secondary = "#define MODE_BOUNCE_LIGHT";
  4. dilate = "#define MODE_DILATE";
  5. unocclude = "#define MODE_UNOCCLUDE";
  6. light_probes = "#define MODE_LIGHT_PROBES";
  7. denoise = "#define MODE_DENOISE";
  8. #[compute]
  9. #version 450
  10. #VERSION_DEFINES
  11. #extension GL_EXT_samplerless_texture_functions : enable
  12. // One 2D local group focusing in one layer at a time, though all
  13. // in parallel (no barriers) makes more sense than a 3D local group
  14. // as this can take more advantage of the cache for each group.
  15. #ifdef MODE_LIGHT_PROBES
  16. layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
  17. #else
  18. layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
  19. #endif
  20. #include "lm_common_inc.glsl"
  21. #ifdef MODE_LIGHT_PROBES
  22. layout(set = 1, binding = 0, std430) restrict buffer LightProbeData {
  23. vec4 data[];
  24. }
  25. light_probes;
  26. layout(set = 1, binding = 1) uniform texture2DArray source_light;
  27. layout(set = 1, binding = 2) uniform texture2D environment;
  28. #endif
  29. #ifdef MODE_UNOCCLUDE
  30. layout(rgba32f, set = 1, binding = 0) uniform restrict image2DArray position;
  31. layout(rgba32f, set = 1, binding = 1) uniform restrict readonly image2DArray unocclude;
  32. #endif
  33. #if defined(MODE_DIRECT_LIGHT) || defined(MODE_BOUNCE_LIGHT)
  34. layout(rgba16f, set = 1, binding = 0) uniform restrict writeonly image2DArray dest_light;
  35. layout(set = 1, binding = 1) uniform texture2DArray source_light;
  36. layout(set = 1, binding = 2) uniform texture2DArray source_position;
  37. layout(set = 1, binding = 3) uniform texture2DArray source_normal;
  38. layout(rgba16f, set = 1, binding = 4) uniform restrict image2DArray accum_light;
  39. #endif
  40. #ifdef MODE_BOUNCE_LIGHT
  41. layout(set = 1, binding = 5) uniform texture2D environment;
  42. #endif
  43. #if defined(MODE_DILATE) || defined(MODE_DENOISE)
  44. layout(rgba16f, set = 1, binding = 0) uniform restrict writeonly image2DArray dest_light;
  45. layout(set = 1, binding = 1) uniform texture2DArray source_light;
  46. #endif
  47. #ifdef MODE_DENOISE
  48. layout(set = 1, binding = 2) uniform texture2DArray source_normal;
  49. layout(set = 1, binding = 3) uniform DenoiseParams {
  50. float spatial_bandwidth;
  51. float light_bandwidth;
  52. float albedo_bandwidth;
  53. float normal_bandwidth;
  54. int half_search_window;
  55. float filter_strength;
  56. }
  57. denoise_params;
  58. #endif
  59. layout(push_constant, std430) uniform Params {
  60. uint atlas_slice;
  61. uint ray_count;
  62. uint ray_from;
  63. uint ray_to;
  64. ivec2 region_ofs;
  65. uint probe_count;
  66. }
  67. params;
  68. //check it, but also return distance and barycentric coords (for uv lookup)
  69. bool ray_hits_triangle(vec3 from, vec3 dir, float max_dist, vec3 p0, vec3 p1, vec3 p2, out float r_distance, out vec3 r_barycentric) {
  70. const float EPSILON = 0.00001;
  71. const vec3 e0 = p1 - p0;
  72. const vec3 e1 = p0 - p2;
  73. vec3 triangle_normal = cross(e1, e0);
  74. float n_dot_dir = dot(triangle_normal, dir);
  75. if (abs(n_dot_dir) < EPSILON) {
  76. return false;
  77. }
  78. const vec3 e2 = (p0 - from) / n_dot_dir;
  79. const vec3 i = cross(dir, e2);
  80. r_barycentric.y = dot(i, e1);
  81. r_barycentric.z = dot(i, e0);
  82. r_barycentric.x = 1.0 - (r_barycentric.z + r_barycentric.y);
  83. r_distance = dot(triangle_normal, e2);
  84. return (r_distance > bake_params.bias) && (r_distance < max_dist) && all(greaterThanEqual(r_barycentric, vec3(0.0)));
  85. }
  86. const uint RAY_MISS = 0;
  87. const uint RAY_FRONT = 1;
  88. const uint RAY_BACK = 2;
  89. const uint RAY_ANY = 3;
  90. bool ray_box_test(vec3 p_from, vec3 p_inv_dir, vec3 p_box_min, vec3 p_box_max) {
  91. vec3 t0 = (p_box_min - p_from) * p_inv_dir;
  92. vec3 t1 = (p_box_max - p_from) * p_inv_dir;
  93. vec3 tmin = min(t0, t1), tmax = max(t0, t1);
  94. return max(tmin.x, max(tmin.y, tmin.z)) <= min(tmax.x, min(tmax.y, tmax.z));
  95. }
  96. #if CLUSTER_SIZE > 32
  97. #define CLUSTER_TRIANGLE_ITERATION
  98. #endif
  99. uint trace_ray(vec3 p_from, vec3 p_to, bool p_any_hit, out float r_distance, out vec3 r_normal, out uint r_triangle, out vec3 r_barycentric) {
  100. // World coordinates.
  101. vec3 rel = p_to - p_from;
  102. float rel_len = length(rel);
  103. vec3 dir = normalize(rel);
  104. vec3 inv_dir = 1.0 / dir;
  105. // Cell coordinates.
  106. vec3 from_cell = (p_from - bake_params.to_cell_offset) * bake_params.to_cell_size;
  107. vec3 to_cell = (p_to - bake_params.to_cell_offset) * bake_params.to_cell_size;
  108. // Prepare DDA.
  109. vec3 rel_cell = to_cell - from_cell;
  110. ivec3 icell = ivec3(from_cell);
  111. ivec3 iendcell = ivec3(to_cell);
  112. vec3 dir_cell = normalize(rel_cell);
  113. vec3 delta = min(abs(1.0 / dir_cell), bake_params.grid_size); // Use bake_params.grid_size as max to prevent infinity values.
  114. ivec3 step = ivec3(sign(rel_cell));
  115. vec3 side = (sign(rel_cell) * (vec3(icell) - from_cell) + (sign(rel_cell) * 0.5) + 0.5) * delta;
  116. uint iters = 0;
  117. while (all(greaterThanEqual(icell, ivec3(0))) && all(lessThan(icell, ivec3(bake_params.grid_size))) && (iters < 1000)) {
  118. uvec2 cell_data = texelFetch(grid, icell, 0).xy;
  119. uint triangle_count = cell_data.x;
  120. if (triangle_count > 0) {
  121. uint hit = RAY_MISS;
  122. float best_distance = 1e20;
  123. uint cluster_start = cluster_indices.data[cell_data.y * 2];
  124. uint cell_triangle_start = cluster_indices.data[cell_data.y * 2 + 1];
  125. uint cluster_count = (triangle_count + CLUSTER_SIZE - 1) / CLUSTER_SIZE;
  126. uint cluster_base_index = 0;
  127. while (cluster_base_index < cluster_count) {
  128. // To minimize divergence, all Ray-AABB tests on the clusters contained in the cell are performed
  129. // before checking against the triangles. We do this 32 clusters at a time and store the intersected
  130. // clusters on each bit of the 32-bit integer.
  131. uint cluster_test_count = min(32, cluster_count - cluster_base_index);
  132. uint cluster_hits = 0;
  133. for (uint i = 0; i < cluster_test_count; i++) {
  134. uint cluster_index = cluster_start + cluster_base_index + i;
  135. ClusterAABB cluster_aabb = cluster_aabbs.data[cluster_index];
  136. if (ray_box_test(p_from, inv_dir, cluster_aabb.min_bounds, cluster_aabb.max_bounds)) {
  137. cluster_hits |= (1 << i);
  138. }
  139. }
  140. // Check the triangles in any of the clusters that were intersected by toggling off the bits in the
  141. // 32-bit integer counter until no bits are left.
  142. while (cluster_hits > 0) {
  143. uint cluster_index = findLSB(cluster_hits);
  144. cluster_hits &= ~(1 << cluster_index);
  145. cluster_index += cluster_base_index;
  146. // Do the same divergence execution trick with triangles as well.
  147. uint triangle_base_index = 0;
  148. #ifdef CLUSTER_TRIANGLE_ITERATION
  149. while (triangle_base_index < triangle_count)
  150. #endif
  151. {
  152. uint triangle_start_index = cell_triangle_start + cluster_index * CLUSTER_SIZE + triangle_base_index;
  153. uint triangle_test_count = min(CLUSTER_SIZE, triangle_count - triangle_base_index);
  154. uint triangle_hits = 0;
  155. for (uint i = 0; i < triangle_test_count; i++) {
  156. uint triangle_index = triangle_indices.data[triangle_start_index + i];
  157. if (ray_box_test(p_from, inv_dir, triangles.data[triangle_index].min_bounds, triangles.data[triangle_index].max_bounds)) {
  158. triangle_hits |= (1 << i);
  159. }
  160. }
  161. while (triangle_hits > 0) {
  162. uint cluster_triangle_index = findLSB(triangle_hits);
  163. triangle_hits &= ~(1 << cluster_triangle_index);
  164. cluster_triangle_index += triangle_start_index;
  165. uint triangle_index = triangle_indices.data[cluster_triangle_index];
  166. Triangle triangle = triangles.data[triangle_index];
  167. // Gather the triangle vertex positions.
  168. vec3 vtx0 = vertices.data[triangle.indices.x].position;
  169. vec3 vtx1 = vertices.data[triangle.indices.y].position;
  170. vec3 vtx2 = vertices.data[triangle.indices.z].position;
  171. vec3 normal = -normalize(cross((vtx0 - vtx1), (vtx0 - vtx2)));
  172. bool backface = dot(normal, dir) >= 0.0;
  173. float distance;
  174. vec3 barycentric;
  175. if (ray_hits_triangle(p_from, dir, rel_len, vtx0, vtx1, vtx2, distance, barycentric)) {
  176. if (p_any_hit) {
  177. // Return early if any hit was requested.
  178. return RAY_ANY;
  179. }
  180. vec3 position = p_from + dir * distance;
  181. vec3 hit_cell = (position - bake_params.to_cell_offset) * bake_params.to_cell_size;
  182. if (icell != ivec3(hit_cell)) {
  183. // It's possible for the ray to hit a triangle in a position outside the bounds of the cell
  184. // if it's large enough to cover multiple ones. The hit must be ignored if this is the case.
  185. continue;
  186. }
  187. if (!backface) {
  188. // The case of meshes having both a front and back face in the same plane is more common than
  189. // expected, so if this is a front-face, bias it closer to the ray origin, so it always wins
  190. // over the back-face.
  191. distance = max(bake_params.bias, distance - bake_params.bias);
  192. }
  193. if (distance < best_distance) {
  194. hit = backface ? RAY_BACK : RAY_FRONT;
  195. best_distance = distance;
  196. r_distance = distance;
  197. r_normal = normal;
  198. r_triangle = triangle_index;
  199. r_barycentric = barycentric;
  200. }
  201. }
  202. }
  203. #ifdef CLUSTER_TRIANGLE_ITERATION
  204. triangle_base_index += CLUSTER_SIZE;
  205. #endif
  206. }
  207. }
  208. cluster_base_index += 32;
  209. }
  210. if (hit != RAY_MISS) {
  211. return hit;
  212. }
  213. }
  214. if (icell == iendcell) {
  215. break;
  216. }
  217. // There should be only one axis updated at a time for DDA to work properly.
  218. bvec3 mask = bvec3(true, false, false);
  219. float m = side.x;
  220. if (side.y < m) {
  221. m = side.y;
  222. mask = bvec3(false, true, false);
  223. }
  224. if (side.z < m) {
  225. mask = bvec3(false, false, true);
  226. }
  227. side += vec3(mask) * delta;
  228. icell += ivec3(vec3(mask)) * step;
  229. iters++;
  230. }
  231. return RAY_MISS;
  232. }
  233. uint trace_ray_closest_hit_triangle(vec3 p_from, vec3 p_to, out uint r_triangle, out vec3 r_barycentric) {
  234. float distance;
  235. vec3 normal;
  236. return trace_ray(p_from, p_to, false, distance, normal, r_triangle, r_barycentric);
  237. }
  238. uint trace_ray_closest_hit_distance(vec3 p_from, vec3 p_to, out float r_distance, out vec3 r_normal) {
  239. uint triangle;
  240. vec3 barycentric;
  241. return trace_ray(p_from, p_to, false, r_distance, r_normal, triangle, barycentric);
  242. }
  243. uint trace_ray_any_hit(vec3 p_from, vec3 p_to) {
  244. float distance;
  245. vec3 normal;
  246. uint triangle;
  247. vec3 barycentric;
  248. return trace_ray(p_from, p_to, true, distance, normal, triangle, barycentric);
  249. }
  250. // https://www.reedbeta.com/blog/hash-functions-for-gpu-rendering/
  251. uint hash(uint value) {
  252. uint state = value * 747796405u + 2891336453u;
  253. uint word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;
  254. return (word >> 22u) ^ word;
  255. }
  256. uint random_seed(ivec3 seed) {
  257. return hash(seed.x ^ hash(seed.y ^ hash(seed.z)));
  258. }
  259. // generates a random value in range [0.0, 1.0)
  260. float randomize(inout uint value) {
  261. value = hash(value);
  262. return float(value / 4294967296.0);
  263. }
  264. const float PI = 3.14159265f;
  265. // http://www.realtimerendering.com/raytracinggems/unofficial_RayTracingGems_v1.4.pdf (chapter 15)
  266. vec3 generate_hemisphere_cosine_weighted_direction(inout uint noise) {
  267. float noise1 = randomize(noise);
  268. float noise2 = randomize(noise) * 2.0 * PI;
  269. return vec3(sqrt(noise1) * cos(noise2), sqrt(noise1) * sin(noise2), sqrt(1.0 - noise1));
  270. }
  271. // Distribution generation adapted from "Generating uniformly distributed numbers on a sphere"
  272. // <http://corysimon.github.io/articles/uniformdistn-on-sphere/>
  273. vec3 generate_sphere_uniform_direction(inout uint noise) {
  274. float theta = 2.0 * PI * randomize(noise);
  275. float phi = acos(1.0 - 2.0 * randomize(noise));
  276. return vec3(sin(phi) * cos(theta), sin(phi) * sin(theta), cos(phi));
  277. }
  278. vec3 generate_ray_dir_from_normal(vec3 normal, inout uint noise) {
  279. vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0);
  280. vec3 tangent = normalize(cross(v0, normal));
  281. vec3 bitangent = normalize(cross(tangent, normal));
  282. mat3 normal_mat = mat3(tangent, bitangent, normal);
  283. return normal_mat * generate_hemisphere_cosine_weighted_direction(noise);
  284. }
  285. #if defined(MODE_DIRECT_LIGHT) || defined(MODE_BOUNCE_LIGHT) || defined(MODE_LIGHT_PROBES)
  286. float get_omni_attenuation(float distance, float inv_range, float decay) {
  287. float nd = distance * inv_range;
  288. nd *= nd;
  289. nd *= nd; // nd^4
  290. nd = max(1.0 - nd, 0.0);
  291. nd *= nd; // nd^2
  292. return nd * pow(max(distance, 0.0001), -decay);
  293. }
  294. void trace_direct_light(vec3 p_position, vec3 p_normal, uint p_light_index, bool p_soft_shadowing, out vec3 r_light, out vec3 r_light_dir, inout uint r_noise) {
  295. r_light = vec3(0.0f);
  296. vec3 light_pos;
  297. float dist;
  298. float attenuation;
  299. float soft_shadowing_disk_size;
  300. Light light_data = lights.data[p_light_index];
  301. if (light_data.type == LIGHT_TYPE_DIRECTIONAL) {
  302. vec3 light_vec = light_data.direction;
  303. light_pos = p_position - light_vec * length(bake_params.world_size);
  304. r_light_dir = normalize(light_pos - p_position);
  305. dist = length(bake_params.world_size);
  306. attenuation = 1.0;
  307. soft_shadowing_disk_size = light_data.size;
  308. } else {
  309. light_pos = light_data.position;
  310. r_light_dir = normalize(light_pos - p_position);
  311. dist = distance(p_position, light_pos);
  312. if (dist > light_data.range) {
  313. return;
  314. }
  315. soft_shadowing_disk_size = light_data.size / dist;
  316. attenuation = get_omni_attenuation(dist, 1.0 / light_data.range, light_data.attenuation);
  317. if (light_data.type == LIGHT_TYPE_SPOT) {
  318. vec3 rel = normalize(p_position - light_pos);
  319. float cos_spot_angle = light_data.cos_spot_angle;
  320. float cos_angle = dot(rel, light_data.direction);
  321. if (cos_angle < cos_spot_angle) {
  322. return;
  323. }
  324. float scos = max(cos_angle, cos_spot_angle);
  325. float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cos_spot_angle));
  326. attenuation *= 1.0 - pow(spot_rim, light_data.inv_spot_attenuation);
  327. }
  328. }
  329. attenuation *= max(0.0, dot(p_normal, r_light_dir));
  330. if (attenuation <= 0.0001) {
  331. return;
  332. }
  333. float penumbra = 0.0;
  334. if ((light_data.size > 0.0) && p_soft_shadowing) {
  335. vec3 light_to_point = -r_light_dir;
  336. vec3 aux = light_to_point.y < 0.777 ? vec3(0.0, 1.0, 0.0) : vec3(1.0, 0.0, 0.0);
  337. vec3 light_to_point_tan = normalize(cross(light_to_point, aux));
  338. vec3 light_to_point_bitan = normalize(cross(light_to_point, light_to_point_tan));
  339. const uint shadowing_rays_check_penumbra_denom = 2;
  340. uint shadowing_ray_count = p_soft_shadowing ? params.ray_count : 1;
  341. uint hits = 0;
  342. vec3 light_disk_to_point = light_to_point;
  343. for (uint j = 0; j < shadowing_ray_count; j++) {
  344. // Optimization:
  345. // Once already traced an important proportion of rays, if all are hits or misses,
  346. // assume we're not in the penumbra so we can infer the rest would have the same result
  347. if (p_soft_shadowing) {
  348. if (j == shadowing_ray_count / shadowing_rays_check_penumbra_denom) {
  349. if (hits == j) {
  350. // Assume totally lit
  351. hits = shadowing_ray_count;
  352. break;
  353. } else if (hits == 0) {
  354. // Assume totally dark
  355. hits = 0;
  356. break;
  357. }
  358. }
  359. }
  360. float r = randomize(r_noise);
  361. float a = randomize(r_noise) * 2.0 * PI;
  362. vec2 disk_sample = (r * vec2(cos(a), sin(a))) * soft_shadowing_disk_size * light_data.shadow_blur;
  363. light_disk_to_point = normalize(light_to_point + disk_sample.x * light_to_point_tan + disk_sample.y * light_to_point_bitan);
  364. if (trace_ray_any_hit(p_position - light_disk_to_point * bake_params.bias, p_position - light_disk_to_point * dist) == RAY_MISS) {
  365. hits++;
  366. }
  367. }
  368. penumbra = float(hits) / float(shadowing_ray_count);
  369. } else {
  370. if (trace_ray_any_hit(p_position + r_light_dir * bake_params.bias, light_pos) == RAY_MISS) {
  371. penumbra = 1.0;
  372. }
  373. }
  374. r_light = light_data.color * light_data.energy * attenuation * penumbra;
  375. }
  376. #endif
  377. #if defined(MODE_BOUNCE_LIGHT) || defined(MODE_LIGHT_PROBES)
  378. vec3 trace_environment_color(vec3 ray_dir) {
  379. vec3 sky_dir = normalize(mat3(bake_params.env_transform) * ray_dir);
  380. vec2 st = vec2(atan(sky_dir.x, sky_dir.z), acos(sky_dir.y));
  381. if (st.x < 0.0) {
  382. st.x += PI * 2.0;
  383. }
  384. return textureLod(sampler2D(environment, linear_sampler), st / vec2(PI * 2.0, PI), 0.0).rgb;
  385. }
  386. vec3 trace_indirect_light(vec3 p_position, vec3 p_ray_dir, inout uint r_noise) {
  387. // The lower limit considers the case where the lightmapper might have bounces disabled but light probes are requested.
  388. vec3 position = p_position;
  389. vec3 ray_dir = p_ray_dir;
  390. uint max_depth = max(bake_params.bounces, 1);
  391. vec3 throughput = vec3(1.0);
  392. vec3 light = vec3(0.0);
  393. for (uint depth = 0; depth < max_depth; depth++) {
  394. uint tidx;
  395. vec3 barycentric;
  396. uint trace_result = trace_ray_closest_hit_triangle(position + ray_dir * bake_params.bias, position + ray_dir * length(bake_params.world_size), tidx, barycentric);
  397. if (trace_result == RAY_FRONT) {
  398. Vertex vert0 = vertices.data[triangles.data[tidx].indices.x];
  399. Vertex vert1 = vertices.data[triangles.data[tidx].indices.y];
  400. Vertex vert2 = vertices.data[triangles.data[tidx].indices.z];
  401. vec3 uvw = vec3(barycentric.x * vert0.uv + barycentric.y * vert1.uv + barycentric.z * vert2.uv, float(triangles.data[tidx].slice));
  402. position = barycentric.x * vert0.position + barycentric.y * vert1.position + barycentric.z * vert2.position;
  403. vec3 norm0 = vec3(vert0.normal_xy, vert0.normal_z);
  404. vec3 norm1 = vec3(vert1.normal_xy, vert1.normal_z);
  405. vec3 norm2 = vec3(vert2.normal_xy, vert2.normal_z);
  406. vec3 normal = barycentric.x * norm0 + barycentric.y * norm1 + barycentric.z * norm2;
  407. vec3 direct_light = vec3(0.0f);
  408. #ifdef USE_LIGHT_TEXTURE_FOR_BOUNCES
  409. direct_light += textureLod(sampler2DArray(source_light, linear_sampler), uvw, 0.0).rgb;
  410. #else
  411. // Trace the lights directly. Significantly more expensive but more accurate in scenarios
  412. // where the lightmap texture isn't reliable.
  413. for (uint i = 0; i < bake_params.light_count; i++) {
  414. vec3 light;
  415. vec3 light_dir;
  416. trace_direct_light(position, normal, i, false, light, light_dir, r_noise);
  417. direct_light += light * lights.data[i].indirect_energy;
  418. }
  419. direct_light *= bake_params.exposure_normalization;
  420. #endif
  421. vec3 albedo = textureLod(sampler2DArray(albedo_tex, linear_sampler), uvw, 0).rgb;
  422. vec3 emissive = textureLod(sampler2DArray(emission_tex, linear_sampler), uvw, 0).rgb;
  423. emissive *= bake_params.exposure_normalization;
  424. light += throughput * emissive;
  425. throughput *= albedo;
  426. light += throughput * direct_light * bake_params.bounce_indirect_energy;
  427. // Use Russian Roulette to determine a probability to terminate the bounce earlier as an optimization.
  428. // <https://computergraphics.stackexchange.com/questions/2316/is-russian-roulette-really-the-answer>
  429. float p = max(max(throughput.x, throughput.y), throughput.z);
  430. if (randomize(r_noise) > p) {
  431. break;
  432. }
  433. // Boost the throughput from the probability of the ray being terminated early.
  434. throughput *= 1.0 / p;
  435. // Generate a new ray direction for the next bounce from this surface's normal.
  436. ray_dir = generate_ray_dir_from_normal(normal, r_noise);
  437. } else if (trace_result == RAY_MISS) {
  438. // Look for the environment color and stop bouncing.
  439. light += throughput * trace_environment_color(ray_dir);
  440. break;
  441. } else {
  442. // Ignore any other trace results.
  443. break;
  444. }
  445. }
  446. return light;
  447. }
  448. #endif
  449. void main() {
  450. // Check if invocation is out of bounds.
  451. #ifdef MODE_LIGHT_PROBES
  452. int probe_index = int(gl_GlobalInvocationID.x);
  453. if (probe_index >= params.probe_count) {
  454. return;
  455. }
  456. #else
  457. ivec2 atlas_pos = ivec2(gl_GlobalInvocationID.xy) + params.region_ofs;
  458. if (any(greaterThanEqual(atlas_pos, bake_params.atlas_size))) {
  459. return;
  460. }
  461. #endif
  462. #ifdef MODE_DIRECT_LIGHT
  463. vec3 normal = texelFetch(sampler2DArray(source_normal, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).xyz;
  464. if (length(normal) < 0.5) {
  465. return; //empty texel, no process
  466. }
  467. vec3 position = texelFetch(sampler2DArray(source_position, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).xyz;
  468. vec3 light_for_texture = vec3(0.0);
  469. vec3 light_for_bounces = vec3(0.0);
  470. #ifdef USE_SH_LIGHTMAPS
  471. vec4 sh_accum[4] = vec4[](
  472. vec4(0.0, 0.0, 0.0, 1.0),
  473. vec4(0.0, 0.0, 0.0, 1.0),
  474. vec4(0.0, 0.0, 0.0, 1.0),
  475. vec4(0.0, 0.0, 0.0, 1.0));
  476. #endif
  477. // Use atlas position and a prime number as the seed.
  478. uint noise = random_seed(ivec3(atlas_pos, 43573547));
  479. for (uint i = 0; i < bake_params.light_count; i++) {
  480. vec3 light;
  481. vec3 light_dir;
  482. trace_direct_light(position, normal, i, true, light, light_dir, noise);
  483. if (lights.data[i].static_bake) {
  484. light_for_texture += light;
  485. #ifdef USE_SH_LIGHTMAPS
  486. float c[4] = float[](
  487. 0.282095, //l0
  488. 0.488603 * light_dir.y, //l1n1
  489. 0.488603 * light_dir.z, //l1n0
  490. 0.488603 * light_dir.x //l1p1
  491. );
  492. for (uint j = 0; j < 4; j++) {
  493. sh_accum[j].rgb += light * c[j] * 8.0;
  494. }
  495. #endif
  496. }
  497. light_for_bounces += light * lights.data[i].indirect_energy;
  498. }
  499. light_for_bounces *= bake_params.exposure_normalization;
  500. imageStore(dest_light, ivec3(atlas_pos, params.atlas_slice), vec4(light_for_bounces, 1.0));
  501. #ifdef USE_SH_LIGHTMAPS
  502. // Keep for adding at the end.
  503. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + 0), sh_accum[0]);
  504. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + 1), sh_accum[1]);
  505. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + 2), sh_accum[2]);
  506. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + 3), sh_accum[3]);
  507. #else
  508. light_for_texture *= bake_params.exposure_normalization;
  509. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice), vec4(light_for_texture, 1.0));
  510. #endif
  511. #endif
  512. #ifdef MODE_BOUNCE_LIGHT
  513. #ifdef USE_SH_LIGHTMAPS
  514. vec4 sh_accum[4] = vec4[](
  515. vec4(0.0, 0.0, 0.0, 1.0),
  516. vec4(0.0, 0.0, 0.0, 1.0),
  517. vec4(0.0, 0.0, 0.0, 1.0),
  518. vec4(0.0, 0.0, 0.0, 1.0));
  519. #else
  520. vec3 light_accum = vec3(0.0);
  521. #endif
  522. // Retrieve starting normal and position.
  523. vec3 normal = texelFetch(sampler2DArray(source_normal, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).xyz;
  524. if (length(normal) < 0.5) {
  525. // The pixel is empty, skip processing it.
  526. return;
  527. }
  528. vec3 position = texelFetch(sampler2DArray(source_position, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).xyz;
  529. uint noise = random_seed(ivec3(params.ray_from, atlas_pos));
  530. for (uint i = params.ray_from; i < params.ray_to; i++) {
  531. vec3 ray_dir = generate_ray_dir_from_normal(normal, noise);
  532. vec3 light = trace_indirect_light(position, ray_dir, noise);
  533. #ifdef USE_SH_LIGHTMAPS
  534. float c[4] = float[](
  535. 0.282095, //l0
  536. 0.488603 * ray_dir.y, //l1n1
  537. 0.488603 * ray_dir.z, //l1n0
  538. 0.488603 * ray_dir.x //l1p1
  539. );
  540. for (uint j = 0; j < 4; j++) {
  541. sh_accum[j].rgb += light * c[j] * 8.0;
  542. }
  543. #else
  544. light_accum += light;
  545. #endif
  546. }
  547. // Add the averaged result to the accumulated light texture.
  548. #ifdef USE_SH_LIGHTMAPS
  549. for (int i = 0; i < 4; i++) {
  550. vec4 accum = imageLoad(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + i));
  551. accum.rgb += sh_accum[i].rgb / float(params.ray_count);
  552. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + i), accum);
  553. }
  554. #else
  555. vec4 accum = imageLoad(accum_light, ivec3(atlas_pos, params.atlas_slice));
  556. accum.rgb += light_accum / float(params.ray_count);
  557. imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice), accum);
  558. #endif
  559. #endif
  560. #ifdef MODE_UNOCCLUDE
  561. //texel_size = 0.5;
  562. //compute tangents
  563. vec4 position_alpha = imageLoad(position, ivec3(atlas_pos, params.atlas_slice));
  564. if (position_alpha.a < 0.5) {
  565. return;
  566. }
  567. vec3 vertex_pos = position_alpha.xyz;
  568. vec4 normal_tsize = imageLoad(unocclude, ivec3(atlas_pos, params.atlas_slice));
  569. vec3 face_normal = normal_tsize.xyz;
  570. float texel_size = normal_tsize.w;
  571. vec3 v0 = abs(face_normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0);
  572. vec3 tangent = normalize(cross(v0, face_normal));
  573. vec3 bitangent = normalize(cross(tangent, face_normal));
  574. vec3 base_pos = vertex_pos + face_normal * bake_params.bias; // Raise a bit.
  575. vec3 rays[4] = vec3[](tangent, bitangent, -tangent, -bitangent);
  576. float min_d = 1e20;
  577. for (int i = 0; i < 4; i++) {
  578. vec3 ray_to = base_pos + rays[i] * texel_size;
  579. float d;
  580. vec3 norm;
  581. if (trace_ray_closest_hit_distance(base_pos, ray_to, d, norm) == RAY_BACK) {
  582. if (d < min_d) {
  583. // This bias needs to be greater than the regular bias, because otherwise later, rays will go the other side when pointing back.
  584. vertex_pos = base_pos + rays[i] * d + norm * bake_params.bias * 10.0;
  585. min_d = d;
  586. }
  587. }
  588. }
  589. position_alpha.xyz = vertex_pos;
  590. imageStore(position, ivec3(atlas_pos, params.atlas_slice), position_alpha);
  591. #endif
  592. #ifdef MODE_LIGHT_PROBES
  593. vec3 position = probe_positions.data[probe_index].xyz;
  594. vec4 probe_sh_accum[9] = vec4[](
  595. vec4(0.0),
  596. vec4(0.0),
  597. vec4(0.0),
  598. vec4(0.0),
  599. vec4(0.0),
  600. vec4(0.0),
  601. vec4(0.0),
  602. vec4(0.0),
  603. vec4(0.0));
  604. uint noise = random_seed(ivec3(params.ray_from, probe_index, 49502741 /* some prime */));
  605. for (uint i = params.ray_from; i < params.ray_to; i++) {
  606. vec3 ray_dir = generate_sphere_uniform_direction(noise);
  607. vec3 light = trace_indirect_light(position, ray_dir, noise);
  608. float c[9] = float[](
  609. 0.282095, //l0
  610. 0.488603 * ray_dir.y, //l1n1
  611. 0.488603 * ray_dir.z, //l1n0
  612. 0.488603 * ray_dir.x, //l1p1
  613. 1.092548 * ray_dir.x * ray_dir.y, //l2n2
  614. 1.092548 * ray_dir.y * ray_dir.z, //l2n1
  615. //0.315392 * (ray_dir.x * ray_dir.x + ray_dir.y * ray_dir.y + 2.0 * ray_dir.z * ray_dir.z), //l20
  616. 0.315392 * (3.0 * ray_dir.z * ray_dir.z - 1.0), //l20
  617. 1.092548 * ray_dir.x * ray_dir.z, //l2p1
  618. 0.546274 * (ray_dir.x * ray_dir.x - ray_dir.y * ray_dir.y) //l2p2
  619. );
  620. for (uint j = 0; j < 9; j++) {
  621. probe_sh_accum[j].rgb += light * c[j];
  622. }
  623. }
  624. if (params.ray_from > 0) {
  625. for (uint j = 0; j < 9; j++) { //accum from existing
  626. probe_sh_accum[j] += light_probes.data[probe_index * 9 + j];
  627. }
  628. }
  629. if (params.ray_to == params.ray_count) {
  630. for (uint j = 0; j < 9; j++) { //accum from existing
  631. probe_sh_accum[j] *= 4.0 / float(params.ray_count);
  632. }
  633. }
  634. for (uint j = 0; j < 9; j++) { //accum from existing
  635. light_probes.data[probe_index * 9 + j] = probe_sh_accum[j];
  636. }
  637. #endif
  638. #ifdef MODE_DILATE
  639. vec4 c = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0);
  640. //sides first, as they are closer
  641. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-1, 0), params.atlas_slice), 0);
  642. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(0, 1), params.atlas_slice), 0);
  643. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(1, 0), params.atlas_slice), 0);
  644. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(0, -1), params.atlas_slice), 0);
  645. //endpoints second
  646. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-1, -1), params.atlas_slice), 0);
  647. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-1, 1), params.atlas_slice), 0);
  648. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(1, -1), params.atlas_slice), 0);
  649. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(1, 1), params.atlas_slice), 0);
  650. //far sides third
  651. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-2, 0), params.atlas_slice), 0);
  652. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(0, 2), params.atlas_slice), 0);
  653. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(2, 0), params.atlas_slice), 0);
  654. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(0, -2), params.atlas_slice), 0);
  655. //far-mid endpoints
  656. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-2, -1), params.atlas_slice), 0);
  657. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-2, 1), params.atlas_slice), 0);
  658. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(2, -1), params.atlas_slice), 0);
  659. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(2, 1), params.atlas_slice), 0);
  660. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-1, -2), params.atlas_slice), 0);
  661. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-1, 2), params.atlas_slice), 0);
  662. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(1, -2), params.atlas_slice), 0);
  663. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(1, 2), params.atlas_slice), 0);
  664. //far endpoints
  665. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-2, -2), params.atlas_slice), 0);
  666. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-2, 2), params.atlas_slice), 0);
  667. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(2, -2), params.atlas_slice), 0);
  668. c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(2, 2), params.atlas_slice), 0);
  669. imageStore(dest_light, ivec3(atlas_pos, params.atlas_slice), c);
  670. #endif
  671. #ifdef MODE_DENOISE
  672. // Joint Non-local means (JNLM) denoiser.
  673. //
  674. // Based on YoctoImageDenoiser's JNLM implementation with corrections from "Nonlinearly Weighted First-order Regression for Denoising Monte Carlo Renderings".
  675. //
  676. // <https://github.com/ManuelPrandini/YoctoImageDenoiser/blob/06e19489dd64e47792acffde536393802ba48607/libs/yocto_extension/yocto_extension.cpp#L207>
  677. // <https://benedikt-bitterli.me/nfor/nfor.pdf>
  678. //
  679. // MIT License
  680. //
  681. // Copyright (c) 2020 ManuelPrandini
  682. //
  683. // Permission is hereby granted, free of charge, to any person obtaining a copy
  684. // of this software and associated documentation files (the "Software"), to deal
  685. // in the Software without restriction, including without limitation the rights
  686. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  687. // copies of the Software, and to permit persons to whom the Software is
  688. // furnished to do so, subject to the following conditions:
  689. //
  690. // The above copyright notice and this permission notice shall be included in all
  691. // copies or substantial portions of the Software.
  692. //
  693. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  694. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  695. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  696. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  697. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  698. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  699. // SOFTWARE.
  700. //
  701. // Most of the constants below have been hand-picked to fit the common scenarios lightmaps
  702. // are generated with, but they can be altered freely to experiment and achieve better results.
  703. // Half the size of the patch window around each pixel that is weighted to compute the denoised pixel.
  704. // A value of 1 represents a 3x3 window, a value of 2 a 5x5 window, etc.
  705. const int HALF_PATCH_WINDOW = 3;
  706. // Half the size of the search window around each pixel that is denoised and weighted to compute the denoised pixel.
  707. const int HALF_SEARCH_WINDOW = denoise_params.half_search_window;
  708. // For all of the following sigma values, smaller values will give less weight to pixels that have a bigger distance
  709. // in the feature being evaluated. Therefore, smaller values are likely to cause more noise to appear, but will also
  710. // cause less features to be erased in the process.
  711. // Controls how much the spatial distance of the pixels influences the denoising weight.
  712. const float SIGMA_SPATIAL = denoise_params.spatial_bandwidth;
  713. // Controls how much the light color distance of the pixels influences the denoising weight.
  714. const float SIGMA_LIGHT = denoise_params.light_bandwidth;
  715. // Controls how much the albedo color distance of the pixels influences the denoising weight.
  716. const float SIGMA_ALBEDO = denoise_params.albedo_bandwidth;
  717. // Controls how much the normal vector distance of the pixels influences the denoising weight.
  718. const float SIGMA_NORMAL = denoise_params.normal_bandwidth;
  719. // Strength of the filter. The original paper recommends values around 10 to 15 times the Sigma parameter.
  720. const float FILTER_VALUE = denoise_params.filter_strength * SIGMA_LIGHT;
  721. // Formula constants.
  722. const int PATCH_WINDOW_DIMENSION = (HALF_PATCH_WINDOW * 2 + 1);
  723. const int PATCH_WINDOW_DIMENSION_SQUARE = (PATCH_WINDOW_DIMENSION * PATCH_WINDOW_DIMENSION);
  724. const float TWO_SIGMA_SPATIAL_SQUARE = 2.0f * SIGMA_SPATIAL * SIGMA_SPATIAL;
  725. const float TWO_SIGMA_LIGHT_SQUARE = 2.0f * SIGMA_LIGHT * SIGMA_LIGHT;
  726. const float TWO_SIGMA_ALBEDO_SQUARE = 2.0f * SIGMA_ALBEDO * SIGMA_ALBEDO;
  727. const float TWO_SIGMA_NORMAL_SQUARE = 2.0f * SIGMA_NORMAL * SIGMA_NORMAL;
  728. const float FILTER_SQUARE_TWO_SIGMA_LIGHT_SQUARE = FILTER_VALUE * FILTER_VALUE * TWO_SIGMA_LIGHT_SQUARE;
  729. const float EPSILON = 1e-6f;
  730. #ifdef USE_SH_LIGHTMAPS
  731. const uint slice_count = 4;
  732. const uint slice_base = params.atlas_slice * slice_count;
  733. #else
  734. const uint slice_count = 1;
  735. const uint slice_base = params.atlas_slice;
  736. #endif
  737. for (uint i = 0; i < slice_count; i++) {
  738. uint lightmap_slice = slice_base + i;
  739. vec3 denoised_rgb = vec3(0.0f);
  740. vec4 input_light = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos, lightmap_slice), 0);
  741. vec3 input_albedo = texelFetch(sampler2DArray(albedo_tex, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).rgb;
  742. vec3 input_normal = texelFetch(sampler2DArray(source_normal, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).xyz;
  743. if (length(input_normal) > EPSILON) {
  744. // Compute the denoised pixel if the normal is valid.
  745. float sum_weights = 0.0f;
  746. vec3 input_rgb = input_light.rgb;
  747. for (int search_y = -HALF_SEARCH_WINDOW; search_y <= HALF_SEARCH_WINDOW; search_y++) {
  748. for (int search_x = -HALF_SEARCH_WINDOW; search_x <= HALF_SEARCH_WINDOW; search_x++) {
  749. ivec2 search_pos = atlas_pos + ivec2(search_x, search_y);
  750. vec3 search_rgb = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(search_pos, lightmap_slice), 0).rgb;
  751. vec3 search_albedo = texelFetch(sampler2DArray(albedo_tex, linear_sampler), ivec3(search_pos, params.atlas_slice), 0).rgb;
  752. vec3 search_normal = texelFetch(sampler2DArray(source_normal, linear_sampler), ivec3(search_pos, params.atlas_slice), 0).xyz;
  753. float patch_square_dist = 0.0f;
  754. for (int offset_y = -HALF_PATCH_WINDOW; offset_y <= HALF_PATCH_WINDOW; offset_y++) {
  755. for (int offset_x = -HALF_PATCH_WINDOW; offset_x <= HALF_PATCH_WINDOW; offset_x++) {
  756. ivec2 offset_input_pos = atlas_pos + ivec2(offset_x, offset_y);
  757. ivec2 offset_search_pos = search_pos + ivec2(offset_x, offset_y);
  758. vec3 offset_input_rgb = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(offset_input_pos, lightmap_slice), 0).rgb;
  759. vec3 offset_search_rgb = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(offset_search_pos, lightmap_slice), 0).rgb;
  760. vec3 offset_delta_rgb = offset_input_rgb - offset_search_rgb;
  761. patch_square_dist += dot(offset_delta_rgb, offset_delta_rgb) - TWO_SIGMA_LIGHT_SQUARE;
  762. }
  763. }
  764. patch_square_dist = max(0.0f, patch_square_dist / (3.0f * PATCH_WINDOW_DIMENSION_SQUARE));
  765. float weight = 1.0f;
  766. // Ignore weight if search position is out of bounds.
  767. weight *= step(0, search_pos.x) * step(search_pos.x, bake_params.atlas_size.x - 1);
  768. weight *= step(0, search_pos.y) * step(search_pos.y, bake_params.atlas_size.y - 1);
  769. // Ignore weight if normal is zero length.
  770. weight *= step(EPSILON, length(search_normal));
  771. // Weight with pixel distance.
  772. vec2 pixel_delta = vec2(search_x, search_y);
  773. float pixel_square_dist = dot(pixel_delta, pixel_delta);
  774. weight *= exp(-pixel_square_dist / TWO_SIGMA_SPATIAL_SQUARE);
  775. // Weight with patch.
  776. weight *= exp(-patch_square_dist / FILTER_SQUARE_TWO_SIGMA_LIGHT_SQUARE);
  777. // Weight with albedo.
  778. vec3 albedo_delta = input_albedo - search_albedo;
  779. float albedo_square_dist = dot(albedo_delta, albedo_delta);
  780. weight *= exp(-albedo_square_dist / TWO_SIGMA_ALBEDO_SQUARE);
  781. // Weight with normal.
  782. vec3 normal_delta = input_normal - search_normal;
  783. float normal_square_dist = dot(normal_delta, normal_delta);
  784. weight *= exp(-normal_square_dist / TWO_SIGMA_NORMAL_SQUARE);
  785. denoised_rgb += weight * search_rgb;
  786. sum_weights += weight;
  787. }
  788. }
  789. denoised_rgb /= sum_weights;
  790. } else {
  791. // Ignore pixels where the normal is empty, just copy the light color.
  792. denoised_rgb = input_light.rgb;
  793. }
  794. imageStore(dest_light, ivec3(atlas_pos, lightmap_slice), vec4(denoised_rgb, input_light.a));
  795. }
  796. #endif
  797. }