util_math.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677
  1. /*
  2. * Copyright 2011-2013 Blender Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef __UTIL_MATH_H__
  17. #define __UTIL_MATH_H__
  18. /* Math
  19. *
  20. * Basic math functions on scalar and vector types. This header is used by
  21. * both the kernel code when compiled as C++, and other C++ non-kernel code. */
  22. #ifndef __KERNEL_GPU__
  23. # include <cmath>
  24. #endif
  25. #ifndef __KERNEL_OPENCL__
  26. # include <float.h>
  27. # include <math.h>
  28. # include <stdio.h>
  29. #endif /* __KERNEL_OPENCL__ */
  30. #include "util/util_types.h"
  31. CCL_NAMESPACE_BEGIN
  32. /* Float Pi variations */
  33. /* Division */
  34. #ifndef M_PI_F
  35. # define M_PI_F (3.1415926535897932f) /* pi */
  36. #endif
  37. #ifndef M_PI_2_F
  38. # define M_PI_2_F (1.5707963267948966f) /* pi/2 */
  39. #endif
  40. #ifndef M_PI_4_F
  41. # define M_PI_4_F (0.7853981633974830f) /* pi/4 */
  42. #endif
  43. #ifndef M_1_PI_F
  44. # define M_1_PI_F (0.3183098861837067f) /* 1/pi */
  45. #endif
  46. #ifndef M_2_PI_F
  47. # define M_2_PI_F (0.6366197723675813f) /* 2/pi */
  48. #endif
  49. #ifndef M_1_2PI_F
  50. # define M_1_2PI_F (0.1591549430918953f) /* 1/(2*pi) */
  51. #endif
  52. #ifndef M_SQRT_PI_8_F
  53. # define M_SQRT_PI_8_F (0.6266570686577501f) /* sqrt(pi/8) */
  54. #endif
  55. #ifndef M_LN_2PI_F
  56. # define M_LN_2PI_F (1.8378770664093454f) /* ln(2*pi) */
  57. #endif
  58. /* Multiplication */
  59. #ifndef M_2PI_F
  60. # define M_2PI_F (6.2831853071795864f) /* 2*pi */
  61. #endif
  62. #ifndef M_4PI_F
  63. # define M_4PI_F (12.566370614359172f) /* 4*pi */
  64. #endif
  65. /* Float sqrt variations */
  66. #ifndef M_SQRT2_F
  67. # define M_SQRT2_F (1.4142135623730950f) /* sqrt(2) */
  68. #endif
  69. #ifndef M_LN2_F
  70. # define M_LN2_F (0.6931471805599453f) /* ln(2) */
  71. #endif
  72. #ifndef M_LN10_F
  73. # define M_LN10_F (2.3025850929940457f) /* ln(10) */
  74. #endif
  75. /* Scalar */
  76. #ifdef _WIN32
  77. # ifndef __KERNEL_OPENCL__
  78. ccl_device_inline float fmaxf(float a, float b)
  79. {
  80. return (a > b) ? a : b;
  81. }
  82. ccl_device_inline float fminf(float a, float b)
  83. {
  84. return (a < b) ? a : b;
  85. }
  86. # endif /* !__KERNEL_OPENCL__ */
  87. #endif /* _WIN32 */
  88. #ifndef __KERNEL_GPU__
  89. using std::isfinite;
  90. using std::isnan;
  91. using std::sqrt;
  92. ccl_device_inline int abs(int x)
  93. {
  94. return (x > 0) ? x : -x;
  95. }
  96. ccl_device_inline int max(int a, int b)
  97. {
  98. return (a > b) ? a : b;
  99. }
  100. ccl_device_inline int min(int a, int b)
  101. {
  102. return (a < b) ? a : b;
  103. }
  104. ccl_device_inline float max(float a, float b)
  105. {
  106. return (a > b) ? a : b;
  107. }
  108. ccl_device_inline float min(float a, float b)
  109. {
  110. return (a < b) ? a : b;
  111. }
  112. ccl_device_inline double max(double a, double b)
  113. {
  114. return (a > b) ? a : b;
  115. }
  116. ccl_device_inline double min(double a, double b)
  117. {
  118. return (a < b) ? a : b;
  119. }
  120. /* These 2 guys are templated for usage with registers data.
  121. *
  122. * NOTE: Since this is CPU-only functions it is ok to use references here.
  123. * But for other devices we'll need to be careful about this.
  124. */
  125. template<typename T> ccl_device_inline T min4(const T &a, const T &b, const T &c, const T &d)
  126. {
  127. return min(min(a, b), min(c, d));
  128. }
  129. template<typename T> ccl_device_inline T max4(const T &a, const T &b, const T &c, const T &d)
  130. {
  131. return max(max(a, b), max(c, d));
  132. }
  133. #endif /* __KERNEL_GPU__ */
  134. ccl_device_inline float min4(float a, float b, float c, float d)
  135. {
  136. return min(min(a, b), min(c, d));
  137. }
  138. ccl_device_inline float max4(float a, float b, float c, float d)
  139. {
  140. return max(max(a, b), max(c, d));
  141. }
  142. #ifndef __KERNEL_OPENCL__
  143. /* Int/Float conversion */
  144. ccl_device_inline int as_int(uint i)
  145. {
  146. union {
  147. uint ui;
  148. int i;
  149. } u;
  150. u.ui = i;
  151. return u.i;
  152. }
  153. ccl_device_inline uint as_uint(int i)
  154. {
  155. union {
  156. uint ui;
  157. int i;
  158. } u;
  159. u.i = i;
  160. return u.ui;
  161. }
  162. ccl_device_inline uint as_uint(float f)
  163. {
  164. union {
  165. uint i;
  166. float f;
  167. } u;
  168. u.f = f;
  169. return u.i;
  170. }
  171. ccl_device_inline int __float_as_int(float f)
  172. {
  173. union {
  174. int i;
  175. float f;
  176. } u;
  177. u.f = f;
  178. return u.i;
  179. }
  180. ccl_device_inline float __int_as_float(int i)
  181. {
  182. union {
  183. int i;
  184. float f;
  185. } u;
  186. u.i = i;
  187. return u.f;
  188. }
  189. ccl_device_inline uint __float_as_uint(float f)
  190. {
  191. union {
  192. uint i;
  193. float f;
  194. } u;
  195. u.f = f;
  196. return u.i;
  197. }
  198. ccl_device_inline float __uint_as_float(uint i)
  199. {
  200. union {
  201. uint i;
  202. float f;
  203. } u;
  204. u.i = i;
  205. return u.f;
  206. }
  207. ccl_device_inline int4 __float4_as_int4(float4 f)
  208. {
  209. # ifdef __KERNEL_SSE__
  210. return int4(_mm_castps_si128(f.m128));
  211. # else
  212. return make_int4(
  213. __float_as_int(f.x), __float_as_int(f.y), __float_as_int(f.z), __float_as_int(f.w));
  214. # endif
  215. }
  216. ccl_device_inline float4 __int4_as_float4(int4 i)
  217. {
  218. # ifdef __KERNEL_SSE__
  219. return float4(_mm_castsi128_ps(i.m128));
  220. # else
  221. return make_float4(
  222. __int_as_float(i.x), __int_as_float(i.y), __int_as_float(i.z), __int_as_float(i.w));
  223. # endif
  224. }
  225. #endif /* __KERNEL_OPENCL__ */
  226. /* Versions of functions which are safe for fast math. */
  227. ccl_device_inline bool isnan_safe(float f)
  228. {
  229. unsigned int x = __float_as_uint(f);
  230. return (x << 1) > 0xff000000u;
  231. }
  232. ccl_device_inline bool isfinite_safe(float f)
  233. {
  234. /* By IEEE 754 rule, 2*Inf equals Inf */
  235. unsigned int x = __float_as_uint(f);
  236. return (f == f) && (x == 0 || x == (1u << 31) || (f != 2.0f * f)) && !((x << 1) > 0xff000000u);
  237. }
  238. ccl_device_inline float ensure_finite(float v)
  239. {
  240. return isfinite_safe(v) ? v : 0.0f;
  241. }
  242. #ifndef __KERNEL_OPENCL__
  243. ccl_device_inline int clamp(int a, int mn, int mx)
  244. {
  245. return min(max(a, mn), mx);
  246. }
  247. ccl_device_inline float clamp(float a, float mn, float mx)
  248. {
  249. return min(max(a, mn), mx);
  250. }
  251. ccl_device_inline float mix(float a, float b, float t)
  252. {
  253. return a + t * (b - a);
  254. }
  255. #endif /* __KERNEL_OPENCL__ */
  256. #ifndef __KERNEL_CUDA__
  257. ccl_device_inline float saturate(float a)
  258. {
  259. return clamp(a, 0.0f, 1.0f);
  260. }
  261. #endif /* __KERNEL_CUDA__ */
  262. ccl_device_inline int float_to_int(float f)
  263. {
  264. return (int)f;
  265. }
  266. ccl_device_inline int floor_to_int(float f)
  267. {
  268. return float_to_int(floorf(f));
  269. }
  270. ccl_device_inline int quick_floor_to_int(float x)
  271. {
  272. return float_to_int(x) - ((x < 0) ? 1 : 0);
  273. }
  274. ccl_device_inline int ceil_to_int(float f)
  275. {
  276. return float_to_int(ceilf(f));
  277. }
  278. ccl_device_inline float signf(float f)
  279. {
  280. return (f < 0.0f) ? -1.0f : 1.0f;
  281. }
  282. ccl_device_inline float nonzerof(float f, float eps)
  283. {
  284. if (fabsf(f) < eps)
  285. return signf(f) * eps;
  286. else
  287. return f;
  288. }
  289. ccl_device_inline float smoothstepf(float f)
  290. {
  291. float ff = f * f;
  292. return (3.0f * ff - 2.0f * ff * f);
  293. }
  294. ccl_device_inline int mod(int x, int m)
  295. {
  296. return (x % m + m) % m;
  297. }
  298. ccl_device_inline float3 float2_to_float3(const float2 a)
  299. {
  300. return make_float3(a.x, a.y, 0.0f);
  301. }
  302. ccl_device_inline float3 float4_to_float3(const float4 a)
  303. {
  304. return make_float3(a.x, a.y, a.z);
  305. }
  306. ccl_device_inline float4 float3_to_float4(const float3 a)
  307. {
  308. return make_float4(a.x, a.y, a.z, 1.0f);
  309. }
  310. ccl_device_inline float inverse_lerp(float a, float b, float x)
  311. {
  312. return (x - a) / (b - a);
  313. }
  314. /* Cubic interpolation between b and c, a and d are the previous and next point. */
  315. ccl_device_inline float cubic_interp(float a, float b, float c, float d, float x)
  316. {
  317. return 0.5f *
  318. (((d + 3.0f * (b - c) - a) * x + (2.0f * a - 5.0f * b + 4.0f * c - d)) * x +
  319. (c - a)) *
  320. x +
  321. b;
  322. }
  323. CCL_NAMESPACE_END
  324. #include "util/util_math_int2.h"
  325. #include "util/util_math_int3.h"
  326. #include "util/util_math_int4.h"
  327. #include "util/util_math_float2.h"
  328. #include "util/util_math_float3.h"
  329. #include "util/util_math_float4.h"
  330. #include "util/util_rect.h"
  331. CCL_NAMESPACE_BEGIN
  332. #ifndef __KERNEL_OPENCL__
  333. /* Interpolation */
  334. template<class A, class B> A lerp(const A &a, const A &b, const B &t)
  335. {
  336. return (A)(a * ((B)1 - t) + b * t);
  337. }
  338. #endif /* __KERNEL_OPENCL__ */
  339. /* Triangle */
  340. #ifndef __KERNEL_OPENCL__
  341. ccl_device_inline float triangle_area(const float3 &v1, const float3 &v2, const float3 &v3)
  342. #else
  343. ccl_device_inline float triangle_area(const float3 v1, const float3 v2, const float3 v3)
  344. #endif
  345. {
  346. return len(cross(v3 - v2, v1 - v2)) * 0.5f;
  347. }
  348. /* Orthonormal vectors */
  349. ccl_device_inline void make_orthonormals(const float3 N, float3 *a, float3 *b)
  350. {
  351. #if 0
  352. if (fabsf(N.y) >= 0.999f) {
  353. *a = make_float3(1, 0, 0);
  354. *b = make_float3(0, 0, 1);
  355. return;
  356. }
  357. if (fabsf(N.z) >= 0.999f) {
  358. *a = make_float3(1, 0, 0);
  359. *b = make_float3(0, 1, 0);
  360. return;
  361. }
  362. #endif
  363. if (N.x != N.y || N.x != N.z)
  364. *a = make_float3(N.z - N.y, N.x - N.z, N.y - N.x); //(1,1,1)x N
  365. else
  366. *a = make_float3(N.z - N.y, N.x + N.z, -N.y - N.x); //(-1,1,1)x N
  367. *a = normalize(*a);
  368. *b = cross(N, *a);
  369. }
  370. /* Color division */
  371. ccl_device_inline float3 safe_invert_color(float3 a)
  372. {
  373. float x, y, z;
  374. x = (a.x != 0.0f) ? 1.0f / a.x : 0.0f;
  375. y = (a.y != 0.0f) ? 1.0f / a.y : 0.0f;
  376. z = (a.z != 0.0f) ? 1.0f / a.z : 0.0f;
  377. return make_float3(x, y, z);
  378. }
  379. ccl_device_inline float3 safe_divide_color(float3 a, float3 b)
  380. {
  381. float x, y, z;
  382. x = (b.x != 0.0f) ? a.x / b.x : 0.0f;
  383. y = (b.y != 0.0f) ? a.y / b.y : 0.0f;
  384. z = (b.z != 0.0f) ? a.z / b.z : 0.0f;
  385. return make_float3(x, y, z);
  386. }
  387. ccl_device_inline float3 safe_divide_even_color(float3 a, float3 b)
  388. {
  389. float x, y, z;
  390. x = (b.x != 0.0f) ? a.x / b.x : 0.0f;
  391. y = (b.y != 0.0f) ? a.y / b.y : 0.0f;
  392. z = (b.z != 0.0f) ? a.z / b.z : 0.0f;
  393. /* try to get gray even if b is zero */
  394. if (b.x == 0.0f) {
  395. if (b.y == 0.0f) {
  396. x = z;
  397. y = z;
  398. }
  399. else if (b.z == 0.0f) {
  400. x = y;
  401. z = y;
  402. }
  403. else
  404. x = 0.5f * (y + z);
  405. }
  406. else if (b.y == 0.0f) {
  407. if (b.z == 0.0f) {
  408. y = x;
  409. z = x;
  410. }
  411. else
  412. y = 0.5f * (x + z);
  413. }
  414. else if (b.z == 0.0f) {
  415. z = 0.5f * (x + y);
  416. }
  417. return make_float3(x, y, z);
  418. }
  419. /* Rotation of point around axis and angle */
  420. ccl_device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle)
  421. {
  422. float costheta = cosf(angle);
  423. float sintheta = sinf(angle);
  424. float3 r;
  425. r.x = ((costheta + (1 - costheta) * axis.x * axis.x) * p.x) +
  426. (((1 - costheta) * axis.x * axis.y - axis.z * sintheta) * p.y) +
  427. (((1 - costheta) * axis.x * axis.z + axis.y * sintheta) * p.z);
  428. r.y = (((1 - costheta) * axis.x * axis.y + axis.z * sintheta) * p.x) +
  429. ((costheta + (1 - costheta) * axis.y * axis.y) * p.y) +
  430. (((1 - costheta) * axis.y * axis.z - axis.x * sintheta) * p.z);
  431. r.z = (((1 - costheta) * axis.x * axis.z - axis.y * sintheta) * p.x) +
  432. (((1 - costheta) * axis.y * axis.z + axis.x * sintheta) * p.y) +
  433. ((costheta + (1 - costheta) * axis.z * axis.z) * p.z);
  434. return r;
  435. }
  436. /* NaN-safe math ops */
  437. ccl_device_inline float safe_sqrtf(float f)
  438. {
  439. return sqrtf(max(f, 0.0f));
  440. }
  441. ccl_device float safe_asinf(float a)
  442. {
  443. return asinf(clamp(a, -1.0f, 1.0f));
  444. }
  445. ccl_device float safe_acosf(float a)
  446. {
  447. return acosf(clamp(a, -1.0f, 1.0f));
  448. }
  449. ccl_device float compatible_powf(float x, float y)
  450. {
  451. #ifdef __KERNEL_GPU__
  452. if (y == 0.0f) /* x^0 -> 1, including 0^0 */
  453. return 1.0f;
  454. /* GPU pow doesn't accept negative x, do manual checks here */
  455. if (x < 0.0f) {
  456. if (fmodf(-y, 2.0f) == 0.0f)
  457. return powf(-x, y);
  458. else
  459. return -powf(-x, y);
  460. }
  461. else if (x == 0.0f)
  462. return 0.0f;
  463. #endif
  464. return powf(x, y);
  465. }
  466. ccl_device float safe_powf(float a, float b)
  467. {
  468. if (UNLIKELY(a < 0.0f && b != float_to_int(b)))
  469. return 0.0f;
  470. return compatible_powf(a, b);
  471. }
  472. ccl_device float safe_divide(float a, float b)
  473. {
  474. return (b != 0.0f) ? a / b : 0.0f;
  475. }
  476. ccl_device float safe_logf(float a, float b)
  477. {
  478. if (UNLIKELY(a <= 0.0f || b <= 0.0f))
  479. return 0.0f;
  480. return safe_divide(logf(a), logf(b));
  481. }
  482. ccl_device float safe_modulo(float a, float b)
  483. {
  484. return (b != 0.0f) ? fmodf(a, b) : 0.0f;
  485. }
  486. ccl_device_inline float sqr(float a)
  487. {
  488. return a * a;
  489. }
  490. ccl_device_inline float pow20(float a)
  491. {
  492. return sqr(sqr(sqr(sqr(a)) * a));
  493. }
  494. ccl_device_inline float pow22(float a)
  495. {
  496. return sqr(a * sqr(sqr(sqr(a)) * a));
  497. }
  498. ccl_device_inline float beta(float x, float y)
  499. {
  500. #ifndef __KERNEL_OPENCL__
  501. return expf(lgammaf(x) + lgammaf(y) - lgammaf(x + y));
  502. #else
  503. return expf(lgamma(x) + lgamma(y) - lgamma(x + y));
  504. #endif
  505. }
  506. ccl_device_inline float xor_signmask(float x, int y)
  507. {
  508. return __int_as_float(__float_as_int(x) ^ y);
  509. }
  510. ccl_device float bits_to_01(uint bits)
  511. {
  512. return bits * (1.0f / (float)0xFFFFFFFF);
  513. }
  514. /* projections */
  515. ccl_device_inline float2 map_to_tube(const float3 co)
  516. {
  517. float len, u, v;
  518. len = sqrtf(co.x * co.x + co.y * co.y);
  519. if (len > 0.0f) {
  520. u = (1.0f - (atan2f(co.x / len, co.y / len) / M_PI_F)) * 0.5f;
  521. v = (co.z + 1.0f) * 0.5f;
  522. }
  523. else {
  524. u = v = 0.0f;
  525. }
  526. return make_float2(u, v);
  527. }
  528. ccl_device_inline float2 map_to_sphere(const float3 co)
  529. {
  530. float l = len(co);
  531. float u, v;
  532. if (l > 0.0f) {
  533. if (UNLIKELY(co.x == 0.0f && co.y == 0.0f)) {
  534. u = 0.0f; /* othwise domain error */
  535. }
  536. else {
  537. u = (1.0f - atan2f(co.x, co.y) / M_PI_F) / 2.0f;
  538. }
  539. v = 1.0f - safe_acosf(co.z / l) / M_PI_F;
  540. }
  541. else {
  542. u = v = 0.0f;
  543. }
  544. return make_float2(u, v);
  545. }
  546. /* Compares two floats.
  547. * Returns true if their absolute difference is smaller than abs_diff (for numbers near zero)
  548. * or their relative difference is less than ulp_diff ULPs.
  549. * Based on
  550. * https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
  551. */
  552. ccl_device_inline float compare_floats(float a, float b, float abs_diff, int ulp_diff)
  553. {
  554. if (fabsf(a - b) < abs_diff) {
  555. return true;
  556. }
  557. if ((a < 0.0f) != (b < 0.0f)) {
  558. return false;
  559. }
  560. return (abs(__float_as_int(a) - __float_as_int(b)) < ulp_diff);
  561. }
  562. CCL_NAMESPACE_END
  563. #endif /* __UTIL_MATH_H__ */