svm_noise.h 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. /*
  2. * Adapted from Open Shading Language with this license:
  3. *
  4. * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
  5. * All Rights Reserved.
  6. *
  7. * Modifications Copyright 2011, Blender Foundation.
  8. *
  9. * Redistribution and use in source and binary forms, with or without
  10. * modification, are permitted provided that the following conditions are
  11. * met:
  12. * * Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. * * Redistributions in binary form must reproduce the above copyright
  15. * notice, this list of conditions and the following disclaimer in the
  16. * documentation and/or other materials provided with the distribution.
  17. * * Neither the name of Sony Pictures Imageworks nor the names of its
  18. * contributors may be used to endorse or promote products derived from
  19. * this software without specific prior written permission.
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. */
  32. CCL_NAMESPACE_BEGIN
  33. #ifdef __KERNEL_SSE2__
  34. ccl_device_inline ssei quick_floor_sse(const ssef &x)
  35. {
  36. ssei b = truncatei(x);
  37. ssei isneg = cast((x < ssef(0.0f)).m128);
  38. return b + isneg; // unsaturated add 0xffffffff is the same as subtract -1
  39. }
  40. #endif
  41. ccl_device uint hash(uint kx, uint ky, uint kz)
  42. {
  43. // define some handy macros
  44. #define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
  45. #define final(a, b, c) \
  46. { \
  47. c ^= b; \
  48. c -= rot(b, 14); \
  49. a ^= c; \
  50. a -= rot(c, 11); \
  51. b ^= a; \
  52. b -= rot(a, 25); \
  53. c ^= b; \
  54. c -= rot(b, 16); \
  55. a ^= c; \
  56. a -= rot(c, 4); \
  57. b ^= a; \
  58. b -= rot(a, 14); \
  59. c ^= b; \
  60. c -= rot(b, 24); \
  61. }
  62. // now hash the data!
  63. uint a, b, c, len = 3;
  64. a = b = c = 0xdeadbeef + (len << 2) + 13;
  65. c += kz;
  66. b += ky;
  67. a += kx;
  68. final(a, b, c);
  69. return c;
  70. // macros not needed anymore
  71. #undef rot
  72. #undef final
  73. }
  74. #ifdef __KERNEL_SSE2__
  75. ccl_device_inline ssei hash_sse(const ssei &kx, const ssei &ky, const ssei &kz)
  76. {
  77. # define rot(x, k) (((x) << (k)) | (srl(x, 32 - (k))))
  78. # define xor_rot(a, b, c) \
  79. do { \
  80. a = a ^ b; \
  81. a = a - rot(b, c); \
  82. } while (0)
  83. uint len = 3;
  84. ssei magic = ssei(0xdeadbeef + (len << 2) + 13);
  85. ssei a = magic + kx;
  86. ssei b = magic + ky;
  87. ssei c = magic + kz;
  88. xor_rot(c, b, 14);
  89. xor_rot(a, c, 11);
  90. xor_rot(b, a, 25);
  91. xor_rot(c, b, 16);
  92. xor_rot(a, c, 4);
  93. xor_rot(b, a, 14);
  94. xor_rot(c, b, 24);
  95. return c;
  96. # undef rot
  97. # undef xor_rot
  98. }
  99. #endif
  100. #if 0 // unused
  101. ccl_device int imod(int a, int b)
  102. {
  103. a %= b;
  104. return a < 0 ? a + b : a;
  105. }
  106. ccl_device uint phash(int kx, int ky, int kz, int3 p)
  107. {
  108. return hash(imod(kx, p.x), imod(ky, p.y), imod(kz, p.z));
  109. }
  110. #endif
  111. #ifndef __KERNEL_SSE2__
  112. ccl_device float floorfrac(float x, int *i)
  113. {
  114. *i = quick_floor_to_int(x);
  115. return x - *i;
  116. }
  117. #else
  118. ccl_device_inline ssef floorfrac_sse(const ssef &x, ssei *i)
  119. {
  120. *i = quick_floor_sse(x);
  121. return x - ssef(*i);
  122. }
  123. #endif
  124. #ifndef __KERNEL_SSE2__
  125. ccl_device float fade(float t)
  126. {
  127. return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
  128. }
  129. #else
  130. ccl_device_inline ssef fade_sse(const ssef *t)
  131. {
  132. ssef a = madd(*t, ssef(6.0f), ssef(-15.0f));
  133. ssef b = madd(*t, a, ssef(10.0f));
  134. return ((*t) * (*t)) * ((*t) * b);
  135. }
  136. #endif
  137. #ifndef __KERNEL_SSE2__
  138. ccl_device float nerp(float t, float a, float b)
  139. {
  140. return (1.0f - t) * a + t * b;
  141. }
  142. #else
  143. ccl_device_inline ssef nerp_sse(const ssef &t, const ssef &a, const ssef &b)
  144. {
  145. ssef x1 = (ssef(1.0f) - t) * a;
  146. return madd(t, b, x1);
  147. }
  148. #endif
  149. #ifndef __KERNEL_SSE2__
  150. ccl_device float grad(int hash, float x, float y, float z)
  151. {
  152. // use vectors pointing to the edges of the cube
  153. int h = hash & 15;
  154. float u = h < 8 ? x : y;
  155. float vt = ((h == 12) | (h == 14)) ? x : z;
  156. float v = h < 4 ? y : vt;
  157. return ((h & 1) ? -u : u) + ((h & 2) ? -v : v);
  158. }
  159. #else
  160. ccl_device_inline ssef grad_sse(const ssei &hash, const ssef &x, const ssef &y, const ssef &z)
  161. {
  162. ssei c1 = ssei(1);
  163. ssei c2 = ssei(2);
  164. ssei h = hash & ssei(15); // h = hash & 15
  165. sseb case_ux = h < ssei(8); // 0xffffffff if h < 8 else 0
  166. ssef u = select(case_ux, x, y); // u = h<8 ? x : y
  167. sseb case_vy = h < ssei(4); // 0xffffffff if h < 4 else 0
  168. sseb case_h12 = h == ssei(12); // 0xffffffff if h == 12 else 0
  169. sseb case_h14 = h == ssei(14); // 0xffffffff if h == 14 else 0
  170. sseb case_vx = case_h12 | case_h14; // 0xffffffff if h == 12 or h == 14 else 0
  171. ssef v = select(case_vy, y, select(case_vx, x, z)); // v = h<4 ? y : h == 12 || h == 14 ? x : z
  172. ssei case_uneg = (h & c1) << 31; // 1<<31 if h&1 else 0
  173. ssef case_uneg_mask = cast(case_uneg); // -0.0 if h&1 else +0.0
  174. ssef ru = u ^ case_uneg_mask; // -u if h&1 else u (copy float sign)
  175. ssei case_vneg = (h & c2) << 30; // 2<<30 if h&2 else 0
  176. ssef case_vneg_mask = cast(case_vneg); // -0.0 if h&2 else +0.0
  177. ssef rv = v ^ case_vneg_mask; // -v if h&2 else v (copy float sign)
  178. ssef r = ru + rv; // ((h&1) ? -u : u) + ((h&2) ? -v : v)
  179. return r;
  180. }
  181. #endif
  182. #ifndef __KERNEL_SSE2__
  183. ccl_device float scale3(float result)
  184. {
  185. return 0.9820f * result;
  186. }
  187. #else
  188. ccl_device_inline ssef scale3_sse(const ssef &result)
  189. {
  190. return ssef(0.9820f) * result;
  191. }
  192. #endif
  193. #ifndef __KERNEL_SSE2__
  194. ccl_device_noinline float perlin(float x, float y, float z)
  195. {
  196. int X;
  197. float fx = floorfrac(x, &X);
  198. int Y;
  199. float fy = floorfrac(y, &Y);
  200. int Z;
  201. float fz = floorfrac(z, &Z);
  202. float u = fade(fx);
  203. float v = fade(fy);
  204. float w = fade(fz);
  205. float result;
  206. result = nerp(
  207. w,
  208. nerp(v,
  209. nerp(u, grad(hash(X, Y, Z), fx, fy, fz), grad(hash(X + 1, Y, Z), fx - 1.0f, fy, fz)),
  210. nerp(u,
  211. grad(hash(X, Y + 1, Z), fx, fy - 1.0f, fz),
  212. grad(hash(X + 1, Y + 1, Z), fx - 1.0f, fy - 1.0f, fz))),
  213. nerp(v,
  214. nerp(u,
  215. grad(hash(X, Y, Z + 1), fx, fy, fz - 1.0f),
  216. grad(hash(X + 1, Y, Z + 1), fx - 1.0f, fy, fz - 1.0f)),
  217. nerp(u,
  218. grad(hash(X, Y + 1, Z + 1), fx, fy - 1.0f, fz - 1.0f),
  219. grad(hash(X + 1, Y + 1, Z + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f))));
  220. float r = scale3(result);
  221. /* can happen for big coordinates, things even out to 0.0 then anyway */
  222. return (isfinite(r)) ? r : 0.0f;
  223. }
  224. #else
  225. ccl_device_noinline float perlin(float x, float y, float z)
  226. {
  227. ssef xyz = ssef(x, y, z, 0.0f);
  228. ssei XYZ;
  229. ssef fxyz = floorfrac_sse(xyz, &XYZ);
  230. ssef uvw = fade_sse(&fxyz);
  231. ssef u = shuffle<0>(uvw), v = shuffle<1>(uvw), w = shuffle<2>(uvw);
  232. ssei XYZ_ofc = XYZ + ssei(1);
  233. ssei vdy = shuffle<1, 1, 1, 1>(XYZ, XYZ_ofc); // +0, +0, +1, +1
  234. ssei vdz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ_ofc)); // +0, +1, +0, +1
  235. ssei h1 = hash_sse(shuffle<0>(XYZ), vdy, vdz); // hash directions 000, 001, 010, 011
  236. ssei h2 = hash_sse(shuffle<0>(XYZ_ofc), vdy, vdz); // hash directions 100, 101, 110, 111
  237. ssef fxyz_ofc = fxyz - ssef(1.0f);
  238. ssef vfy = shuffle<1, 1, 1, 1>(fxyz, fxyz_ofc);
  239. ssef vfz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz_ofc));
  240. ssef g1 = grad_sse(h1, shuffle<0>(fxyz), vfy, vfz);
  241. ssef g2 = grad_sse(h2, shuffle<0>(fxyz_ofc), vfy, vfz);
  242. ssef n1 = nerp_sse(u, g1, g2);
  243. ssef n1_half = shuffle<2, 3, 2, 3>(n1); // extract 2 floats to a separate vector
  244. ssef n2 = nerp_sse(
  245. v, n1, n1_half); // process nerp([a, b, _, _], [c, d, _, _]) -> [a', b', _, _]
  246. ssef n2_second = shuffle<1>(n2); // extract b to a separate vector
  247. ssef result = nerp_sse(
  248. w, n2, n2_second); // process nerp([a', _, _, _], [b', _, _, _]) -> [a'', _, _, _]
  249. ssef r = scale3_sse(result);
  250. ssef infmask = cast(ssei(0x7f800000));
  251. ssef rinfmask = ((r & infmask) == infmask).m128; // 0xffffffff if r is inf/-inf/nan else 0
  252. ssef rfinite = andnot(rinfmask, r); // 0 if r is inf/-inf/nan else r
  253. return extract<0>(rfinite);
  254. }
  255. #endif
  256. /* perlin noise in range 0..1 */
  257. ccl_device float noise(float3 p)
  258. {
  259. float r = perlin(p.x, p.y, p.z);
  260. return 0.5f * r + 0.5f;
  261. }
  262. /* perlin noise in range -1..1 */
  263. ccl_device float snoise(float3 p)
  264. {
  265. return perlin(p.x, p.y, p.z);
  266. }
  267. /* cell noise */
  268. ccl_device float cellnoise(float3 p)
  269. {
  270. int3 ip = quick_floor_to_int3(p);
  271. return bits_to_01(hash(ip.x, ip.y, ip.z));
  272. }
  273. ccl_device float3 cellnoise3(float3 p)
  274. {
  275. int3 ip = quick_floor_to_int3(p);
  276. #ifndef __KERNEL_SSE__
  277. float r = bits_to_01(hash(ip.x, ip.y, ip.z));
  278. float g = bits_to_01(hash(ip.y, ip.x, ip.z));
  279. float b = bits_to_01(hash(ip.y, ip.z, ip.x));
  280. return make_float3(r, g, b);
  281. #else
  282. ssei ip_yxz = shuffle<1, 0, 2, 3>(ssei(ip.m128));
  283. ssei ip_xyy = shuffle<0, 1, 1, 3>(ssei(ip.m128));
  284. ssei ip_zzx = shuffle<2, 2, 0, 3>(ssei(ip.m128));
  285. ssei bits = hash_sse(ip_xyy, ip_yxz, ip_zzx);
  286. return float3(uint32_to_float(bits) * ssef(1.0f / (float)0xFFFFFFFF));
  287. #endif
  288. }
  289. CCL_NAMESPACE_END