kernel_jitter.h 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. /*
  2. * Copyright 2011-2013 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /* TODO(sergey): Consider moving portable ctz/clz stuff to util. */
  17. CCL_NAMESPACE_BEGIN
  18. /* "Correlated Multi-Jittered Sampling"
  19. * Andrew Kensler, Pixar Technical Memo 13-01, 2013 */
  20. /* todo: find good value, suggested 64 gives pattern on cornell box ceiling */
  21. #define CMJ_RANDOM_OFFSET_LIMIT 4096
  22. ccl_device_inline bool cmj_is_pow2(int i)
  23. {
  24. return (i > 1) && ((i & (i - 1)) == 0);
  25. }
  26. ccl_device_inline int cmj_fast_mod_pow2(int a, int b)
  27. {
  28. return (a & (b - 1));
  29. }
  30. /* b must be > 1 */
  31. ccl_device_inline int cmj_fast_div_pow2(int a, int b)
  32. {
  33. kernel_assert(b > 1);
  34. #if defined(__KERNEL_SSE2__)
  35. # ifdef _MSC_VER
  36. unsigned long ctz;
  37. _BitScanForward(&ctz, b);
  38. return a >> ctz;
  39. # else
  40. return a >> __builtin_ctz(b);
  41. # endif
  42. #elif defined(__KERNEL_CUDA__)
  43. return a >> (__ffs(b) - 1);
  44. #else
  45. return a / b;
  46. #endif
  47. }
  48. ccl_device_inline uint cmj_w_mask(uint w)
  49. {
  50. kernel_assert(w > 1);
  51. #if defined(__KERNEL_SSE2__)
  52. # ifdef _MSC_VER
  53. unsigned long leading_zero;
  54. _BitScanReverse(&leading_zero, w);
  55. return ((1 << (1 + leading_zero)) - 1);
  56. # else
  57. return ((1 << (32 - __builtin_clz(w))) - 1);
  58. # endif
  59. #elif defined(__KERNEL_CUDA__)
  60. return ((1 << (32 - __clz(w))) - 1);
  61. #else
  62. w |= w >> 1;
  63. w |= w >> 2;
  64. w |= w >> 4;
  65. w |= w >> 8;
  66. w |= w >> 16;
  67. return w;
  68. #endif
  69. }
  70. ccl_device_inline uint cmj_permute(uint i, uint l, uint p)
  71. {
  72. uint w = l - 1;
  73. if ((l & w) == 0) {
  74. /* l is a power of two (fast) */
  75. i ^= p;
  76. i *= 0xe170893d;
  77. i ^= p >> 16;
  78. i ^= (i & w) >> 4;
  79. i ^= p >> 8;
  80. i *= 0x0929eb3f;
  81. i ^= p >> 23;
  82. i ^= (i & w) >> 1;
  83. i *= 1 | p >> 27;
  84. i *= 0x6935fa69;
  85. i ^= (i & w) >> 11;
  86. i *= 0x74dcb303;
  87. i ^= (i & w) >> 2;
  88. i *= 0x9e501cc3;
  89. i ^= (i & w) >> 2;
  90. i *= 0xc860a3df;
  91. i &= w;
  92. i ^= i >> 5;
  93. return (i + p) & w;
  94. }
  95. else {
  96. /* l is not a power of two (slow) */
  97. w = cmj_w_mask(w);
  98. do {
  99. i ^= p;
  100. i *= 0xe170893d;
  101. i ^= p >> 16;
  102. i ^= (i & w) >> 4;
  103. i ^= p >> 8;
  104. i *= 0x0929eb3f;
  105. i ^= p >> 23;
  106. i ^= (i & w) >> 1;
  107. i *= 1 | p >> 27;
  108. i *= 0x6935fa69;
  109. i ^= (i & w) >> 11;
  110. i *= 0x74dcb303;
  111. i ^= (i & w) >> 2;
  112. i *= 0x9e501cc3;
  113. i ^= (i & w) >> 2;
  114. i *= 0xc860a3df;
  115. i &= w;
  116. i ^= i >> 5;
  117. } while (i >= l);
  118. return (i + p) % l;
  119. }
  120. }
  121. ccl_device_inline uint cmj_hash(uint i, uint p)
  122. {
  123. i ^= p;
  124. i ^= i >> 17;
  125. i ^= i >> 10;
  126. i *= 0xb36534e5;
  127. i ^= i >> 12;
  128. i ^= i >> 21;
  129. i *= 0x93fc4795;
  130. i ^= 0xdf6e307f;
  131. i ^= i >> 17;
  132. i *= 1 | p >> 18;
  133. return i;
  134. }
  135. ccl_device_inline uint cmj_hash_simple(uint i, uint p)
  136. {
  137. i = (i ^ 61) ^ p;
  138. i += i << 3;
  139. i ^= i >> 4;
  140. i *= 0x27d4eb2d;
  141. return i;
  142. }
  143. ccl_device_inline float cmj_randfloat(uint i, uint p)
  144. {
  145. return cmj_hash(i, p) * (1.0f / 4294967808.0f);
  146. }
  147. #ifdef __CMJ__
  148. ccl_device float cmj_sample_1D(int s, int N, int p)
  149. {
  150. kernel_assert(s < N);
  151. uint x = cmj_permute(s, N, p * 0x68bc21eb);
  152. float jx = cmj_randfloat(s, p * 0x967a889b);
  153. float invN = 1.0f / N;
  154. return (x + jx) * invN;
  155. }
  156. /* TODO(sergey): Do some extra tests and consider moving to util_math.h. */
  157. ccl_device_inline int cmj_isqrt(int value)
  158. {
  159. # if defined(__KERNEL_CUDA__)
  160. return float_to_int(__fsqrt_ru(value));
  161. # elif defined(__KERNEL_GPU__)
  162. return float_to_int(sqrtf(value));
  163. # else
  164. /* This is a work around for fast-math on CPU which might replace sqrtf()
  165. * with am approximated version.
  166. */
  167. return float_to_int(sqrtf(value) + 1e-6f);
  168. # endif
  169. }
  170. ccl_device void cmj_sample_2D(int s, int N, int p, float *fx, float *fy)
  171. {
  172. kernel_assert(s < N);
  173. int m = cmj_isqrt(N);
  174. int n = (N - 1) / m + 1;
  175. float invN = 1.0f / N;
  176. float invm = 1.0f / m;
  177. float invn = 1.0f / n;
  178. s = cmj_permute(s, N, p * 0x51633e2d);
  179. int sdivm, smodm;
  180. if (cmj_is_pow2(m)) {
  181. sdivm = cmj_fast_div_pow2(s, m);
  182. smodm = cmj_fast_mod_pow2(s, m);
  183. }
  184. else {
  185. /* Doing s*inmv gives precision issues here. */
  186. sdivm = s / m;
  187. smodm = s - sdivm * m;
  188. }
  189. uint sx = cmj_permute(smodm, m, p * 0x68bc21eb);
  190. uint sy = cmj_permute(sdivm, n, p * 0x02e5be93);
  191. float jx = cmj_randfloat(s, p * 0x967a889b);
  192. float jy = cmj_randfloat(s, p * 0x368cc8b7);
  193. *fx = (sx + (sy + jx) * invn) * invm;
  194. *fy = (s + jy) * invN;
  195. }
  196. #endif
  197. CCL_NAMESPACE_END