kernel_id_passes.h 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. /*
  2. * Copyright 2018 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. CCL_NAMESPACE_BEGIN
  17. ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
  18. int num_slots,
  19. float id,
  20. float weight)
  21. {
  22. kernel_assert(id != ID_NONE);
  23. if (weight == 0.0f) {
  24. return;
  25. }
  26. for (int slot = 0; slot < num_slots; slot++) {
  27. ccl_global float2 *id_buffer = (ccl_global float2 *)buffer;
  28. #ifdef __ATOMIC_PASS_WRITE__
  29. /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
  30. if (id_buffer[slot].x == ID_NONE) {
  31. /* Use an atomic to claim this slot.
  32. * If a different thread got here first, try again from this slot on. */
  33. float old_id = atomic_compare_and_swap_float(buffer + slot * 2, ID_NONE, id);
  34. if (old_id != ID_NONE && old_id != id) {
  35. continue;
  36. }
  37. atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight);
  38. break;
  39. }
  40. /* If there already is a slot for that ID, add the weight.
  41. * If no slot was found, add it to the last. */
  42. else if (id_buffer[slot].x == id || slot == num_slots - 1) {
  43. atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight);
  44. break;
  45. }
  46. #else /* __ATOMIC_PASS_WRITE__ */
  47. /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
  48. if (id_buffer[slot].x == ID_NONE) {
  49. id_buffer[slot].x = id;
  50. id_buffer[slot].y = weight;
  51. break;
  52. }
  53. /* If there already is a slot for that ID, add the weight.
  54. * If no slot was found, add it to the last. */
  55. else if (id_buffer[slot].x == id || slot == num_slots - 1) {
  56. id_buffer[slot].y += weight;
  57. break;
  58. }
  59. #endif /* __ATOMIC_PASS_WRITE__ */
  60. }
  61. }
  62. ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_slots)
  63. {
  64. ccl_global float2 *id_buffer = (ccl_global float2 *)buffer;
  65. for (int slot = 1; slot < num_slots; ++slot) {
  66. if (id_buffer[slot].x == ID_NONE) {
  67. return;
  68. }
  69. /* Since we're dealing with a tiny number of elements, insertion sort should be fine. */
  70. int i = slot;
  71. while (i > 0 && id_buffer[i].y > id_buffer[i - 1].y) {
  72. float2 swap = id_buffer[i];
  73. id_buffer[i] = id_buffer[i - 1];
  74. id_buffer[i - 1] = swap;
  75. --i;
  76. }
  77. }
  78. }
  79. #ifdef __KERNEL_GPU__
  80. /* post-sorting for Cryptomatte */
  81. ccl_device void kernel_cryptomatte_post(
  82. KernelGlobals *kg, ccl_global float *buffer, uint sample, int x, int y, int offset, int stride)
  83. {
  84. if (sample - 1 == kernel_data.integrator.aa_samples) {
  85. int index = offset + x + y * stride;
  86. int pass_stride = kernel_data.film.pass_stride;
  87. ccl_global float *cryptomatte_buffer = buffer + index * pass_stride +
  88. kernel_data.film.pass_cryptomatte;
  89. kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
  90. }
  91. }
  92. #endif
  93. CCL_NAMESPACE_END