Cry_Math.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. /*
  2. * Copyright (c) Contributors to the Open 3D Engine Project.
  3. * For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. *
  5. * SPDX-License-Identifier: Apache-2.0 OR MIT
  6. *
  7. */
  8. // Description : Common math class
  9. #pragma once
  10. //========================================================================================
  11. #include <platform.h>
  12. #include "Cry_ValidNumber.h"
  13. #include <CryEndian.h> // eLittleEndian
  14. #include <CryHalf.inl>
  15. #include <float.h>
  16. #include <limits>
  17. ///////////////////////////////////////////////////////////////////////////////
  18. // Forward declarations //
  19. ///////////////////////////////////////////////////////////////////////////////
  20. template <typename F>
  21. struct Vec2_tpl;
  22. template <typename F>
  23. struct Vec3_tpl;
  24. struct Vec4;
  25. template <typename F>
  26. struct Ang3_tpl;
  27. template <typename F>
  28. struct Plane_tpl;
  29. template <typename F>
  30. struct AngleAxis_tpl;
  31. template <typename F>
  32. struct Quat_tpl;
  33. template <typename F>
  34. struct Diag33_tpl;
  35. template <typename F>
  36. struct Matrix33_tpl;
  37. template <typename F>
  38. struct Matrix34_tpl;
  39. template <typename F>
  40. struct Matrix44_tpl;
  41. ///////////////////////////////////////////////////////////////////////////////
  42. // Definitions //
  43. ///////////////////////////////////////////////////////////////////////////////
  44. const f32 gf_PI = f32(3.14159265358979323846264338327950288419716939937510);
  45. const f64 g_PI = 3.14159265358979323846264338327950288419716939937510; // pi
  46. const f32 gf_PI2 = f32(3.14159265358979323846264338327950288419716939937510 * 2.0);
  47. const f64 g_PI2 = 3.14159265358979323846264338327950288419716939937510 * 2.0; // 2*pi
  48. const f64 sqrt2 = 1.4142135623730950488016887242097;
  49. const f64 sqrt3 = 1.7320508075688772935274463415059;
  50. const f32 gf_halfPI = f32(1.57079632679489661923132169163975144209858469968755);
  51. #ifndef MAX
  52. #define MAX(a, b) (((a) > (b)) ? (a) : (b))
  53. #endif
  54. #ifndef MIN
  55. #define MIN(a, b) (((a) < (b)) ? (a) : (b))
  56. #endif
  57. #define VEC_EPSILON (0.05f)
  58. #define RAD_EPSILON (0.01f)
  59. #define DEG2RAD(a) ((a) * (gf_PI / 180.0f))
  60. #define RAD2DEG(a) ((a) * (180.0f / gf_PI))
  61. #define DEG2COS(a) (cos_tpl((a) * (gf_PI / 180.0f)))
  62. #define COS2DEG(a) (acos_tpl(a) * (180.0f / gf_PI))
  63. #define RAD2HCOS(a) (cos_tpl((a * 0.5f)))
  64. #define HCOS2RAD(a) (acos_tpl(a) * 2.0f)
  65. #define DEG2HCOS(a) (cos_tpl((a * 0.5f) * (gf_PI / 180.0f)))
  66. #define DEG2HSIN(a) (sin_tpl((a * 0.5f) * (gf_PI / 180.0f)))
  67. #define HCOS2DEG(a) (acos_tpl(a) * 2.0f * (180.0f / gf_PI))
  68. #define SIGN_MASK(x) ((intptr_t)(x) >> ((sizeof(size_t) * 8) - 1))
  69. #define TANGENT30 0.57735026918962576450914878050196f // tan(30)
  70. #define TANGENT30_2 0.57735026918962576450914878050196f * 2 // 2*tan(30)
  71. #define LN2 0.69314718055994530941723212145818f // ln(2)
  72. ILINE f32 fsel(const f32 _a, const f32 _b, const f32 _c) { return (_a < 0.0f) ? _c : _b; }
  73. ILINE f64 fsel(const f64 _a, const f64 _b, const f64 _c) { return (_a < 0.0f) ? _c : _b; }
  74. ILINE f32 fres(const f32 _a) { return 1.f / _a; }
  75. template<class T>
  76. ILINE T isel(int c, T a, T b) { return (c < 0) ? b : a; }
  77. template<class T>
  78. ILINE T isel(int64 c, T a, T b) { return (c < 0) ? b : a; }
  79. template<class T>
  80. ILINE T iselnz(int c, T a, T b) { return c ? a : b; }
  81. template<class T>
  82. ILINE T iselnz(uint32 c, T a, T b) { return c ? a : b; }
  83. template<class T>
  84. ILINE T iselnz(int64 c, T a, T b) { return c ? a : b; }
  85. template<class T>
  86. ILINE T iselnz(uint64 c, T a, T b) { return c ? a : b; }
  87. //provides fast way of checking against 0 (saves fcmp)
  88. ILINE bool fzero(const float& val) { return val == 0.0f; }
  89. ILINE bool fzero(float* pVal) { return *pVal == 0.0f; }
  90. //////////////////////////////////////////////////////////////////////////
  91. // Define min/max
  92. //////////////////////////////////////////////////////////////////////////
  93. #ifdef min
  94. #undef min
  95. #endif //min
  96. #ifdef max
  97. #undef max
  98. #endif //max
  99. // Bring min and max from std namespace to global scope.
  100. template <class T>
  101. ILINE T min(const T& a, const T& b) { return b < a ? b : a; }
  102. template <class T>
  103. ILINE T max(const T& a, const T& b) { return a < b ? b : a; }
  104. template <class T, class _Compare>
  105. ILINE const T& min(const T& a, const T& b, _Compare comp) { return comp(b, a) ? b : a; }
  106. template <class T, class _Compare>
  107. ILINE const T& max(const T& a, const T& b, _Compare comp) { return comp(a, b) ? b : a; }
  108. ILINE int min_branchless(int a, int b) { int diff = a - b; int mask = diff >> 31; return (b & (~mask)) | (a & mask); }
  109. template<class T>
  110. ILINE T clamp_tpl(T X, T Min, T Max) { return X < Min ? Min : X < Max ? X : Max; }
  111. template<class T>
  112. ILINE void Limit(T& val, const T& min, const T& max)
  113. {
  114. if (val < min)
  115. {
  116. val = min;
  117. }
  118. else if (val > max)
  119. {
  120. val = max;
  121. }
  122. }
  123. template<class T>
  124. ILINE T Lerp(const T& a, const T& b, float s) { return T(a + (b - a) * s); }
  125. //-------------------------------------------
  126. //-- the portability functions for CPU_X86
  127. //-------------------------------------------
  128. #if defined(_CPU_SSE) && !defined(__ARM_ARCH)
  129. #include <xmmintrin.h>
  130. #endif
  131. ILINE f32 fabs_tpl(f32 op) { return op < 0.0f ? -op : op; }
  132. ILINE f64 fabs_tpl(f64 op) { return fabs(op); }
  133. ILINE int32 fabs_tpl(int32 op) { int32 mask = op >> 31; return op + mask ^ mask; }
  134. ILINE f32 floor_tpl(f32 op) {return floorf(op); }
  135. ILINE f64 floor_tpl(f64 op) {return floor(op); }
  136. ILINE int32 floor_tpl(int32 op) {return op; }
  137. ILINE f32 ceil_tpl(f32 op) {return ceilf(op); }
  138. ILINE f64 ceil_tpl(f64 op) {return ceil(op); }
  139. ILINE int32 ceil_tpl(int32 op) {return op; }
  140. ILINE f32 fmod_tpl(f32 x, f32 y) {return (f32)fmodf(x, y); }
  141. ILINE f64 fmod_tpl(f64 x, f64 y) {return (f32)fmod(x, y); }
  142. ILINE void sincos_tpl (f32 angle, f32* pSin, f32* pCos) { *pSin = f32(sin(angle)); *pCos = f32(cos(angle)); }
  143. ILINE void sincos_tpl (f64 angle, f64* pSin, f64* pCos) { *pSin = f64(sin(angle)); *pCos = f64(cos(angle)); }
  144. ILINE f32 cos_tpl(f32 op) { return cosf(op); }
  145. ILINE f64 cos_tpl(f64 op) { return cos(op); }
  146. ILINE f32 sin_tpl(f32 op) { return sinf(op); }
  147. ILINE f64 sin_tpl(f64 op) { return sin(op); }
  148. ILINE f32 acos_tpl(f32 op) { return acosf(clamp_tpl(op, -1.0f, +1.0f)); }
  149. ILINE f64 acos_tpl(f64 op) { return acos(clamp_tpl(op, -1.0, +1.0)); }
  150. ILINE f32 asin_tpl(f32 op) { return asinf(clamp_tpl(op, -1.0f, +1.0f)); }
  151. ILINE f64 asin_tpl(f64 op) { return asin(clamp_tpl(op, -1.0, +1.0)); }
  152. ILINE f32 atan_tpl(f32 op) { return atanf(op); }
  153. ILINE f64 atan_tpl(f64 op) { return atan(op); }
  154. ILINE f32 atan2_tpl(f32 op1, f32 op2) { return atan2f(op1, op2); }
  155. ILINE f64 atan2_tpl(f64 op1, f64 op2) { return atan2(op1, op2); }
  156. ILINE f32 tan_tpl(f32 op) {return tanf(op); }
  157. ILINE f64 tan_tpl(f64 op) {return tan(op); }
  158. ILINE f32 exp_tpl(f32 op) { return expf(op); }
  159. ILINE f64 exp_tpl(f64 op) { return exp(op); }
  160. ILINE f32 log_tpl(f32 op) { return logf(op); }
  161. ILINE f64 log_tpl(f64 op) { return log(op); }
  162. ILINE f32 pow_tpl(f32 x, f32 y) {return (f32) pow((f64)x, (f64)y); }
  163. ILINE f64 pow_tpl(f64 x, f64 y) {return pow(x, y); }
  164. #if defined(_CPU_SSE)
  165. ILINE f32 sqrt_tpl(f32 op)
  166. {
  167. __m128 s = _mm_sqrt_ss(_mm_set_ss(op));
  168. float r;
  169. _mm_store_ss(&r, s);
  170. return r;
  171. }
  172. ILINE f64 sqrt_tpl(f64 op)
  173. {
  174. return sqrt(op);
  175. }
  176. ILINE f32 sqrt_fast_tpl(f32 op)
  177. {
  178. return sqrt_tpl(op);
  179. }
  180. ILINE f64 sqrt_fast_tpl(f64 op)
  181. {
  182. return sqrt_tpl(op);
  183. }
  184. ILINE f32 isqrt_tpl(f32 op)
  185. {
  186. __m128 value = _mm_set_ss(op);
  187. __m128 oneHalf = _mm_set_ss(0.5f);
  188. __m128 threeHalfs = _mm_set_ss(1.5f);
  189. __m128 simdRecipSqrt = _mm_rsqrt_ss(value);
  190. __m128 inverseMult = _mm_mul_ps(_mm_mul_ss(_mm_mul_ss(value, simdRecipSqrt), simdRecipSqrt), oneHalf);
  191. __m128 inverseInner = _mm_sub_ps(threeHalfs, inverseMult);
  192. __m128 newtonIteration1 = _mm_mul_ss(simdRecipSqrt, inverseInner);
  193. float r;
  194. _mm_store_ss(&r, newtonIteration1);
  195. return r;
  196. }
  197. ILINE f64 isqrt_tpl(f64 op)
  198. {
  199. return 1.0 / sqrt(op);
  200. }
  201. ILINE f32 isqrt_fast_tpl(f32 op)
  202. {
  203. return isqrt_tpl(op);
  204. }
  205. ILINE f64 isqrt_fast_tpl(f64 op)
  206. {
  207. return isqrt_tpl(op);
  208. }
  209. ILINE f32 isqrt_safe_tpl(f32 value)
  210. {
  211. return isqrt_tpl(value + (std::numeric_limits<f32>::min)());
  212. }
  213. ILINE f64 isqrt_safe_tpl(f64 value)
  214. {
  215. return isqrt_tpl(value + (std::numeric_limits<f64>::min)());
  216. }
  217. #elif defined(__ARM_NEON__) || defined(__ARM_NEON)
  218. #include "arm_neon.h"
  219. template <int n>
  220. float isqrt_helper(float f)
  221. {
  222. float32x2_t v = vdup_n_f32(f);
  223. float32x2_t r = vrsqrte_f32(v);
  224. // n+1 newton iterations because initial approximation is crude
  225. for (int i = 0; i <= n; ++i)
  226. {
  227. r = vrsqrts_f32(v * r, r) * r;
  228. }
  229. return vget_lane_f32(r, 0);
  230. }
  231. ILINE f32 sqrt_tpl(f32 op) { return op != 0.0f ? op* isqrt_helper<1>(op) : op; }
  232. ILINE f64 sqrt_tpl(f64 op) { return sqrt(op); }
  233. ILINE f32 sqrt_fast_tpl(f32 op) { return op != 0.0f ? op* isqrt_helper<0>(op) : op; }
  234. ILINE f64 sqrt_fast_tpl(f64 op) { return sqrt(op); }
  235. ILINE f32 isqrt_tpl(f32 op) { return isqrt_helper<1>(op); }
  236. ILINE f64 isqrt_tpl(f64 op) { return 1.0 / sqrt(op); }
  237. ILINE f32 isqrt_fast_tpl(f32 op) { return isqrt_helper<0>(op); }
  238. ILINE f64 isqrt_fast_tpl(f64 op) { return 1.0 / sqrt(op); }
  239. ILINE f32 isqrt_safe_tpl(f32 op) { return isqrt_helper<1>(op + FLT_MIN); }
  240. ILINE f64 isqrt_safe_tpl(f64 op) { return 1.0 / sqrt(op + DBL_MIN); }
  241. #else
  242. #error unsupported CPU
  243. #endif
  244. ILINE int32 int_round(f32 f) { return f < 0.f ? int32(f - 0.5f) : int32(f + 0.5f); }
  245. ILINE int32 pos_round(f32 f) { return int32(f + 0.5f); }
  246. ILINE int64 int_round(f64 f) { return f < 0.0 ? int64(f - 0.5) : int64(f + 0.5); }
  247. ILINE int64 pos_round(f64 f) { return int64(f + 0.5); }
  248. ILINE int32 int_ceil(f32 f) { int32 i = int32(f); return (f > f32(i)) ? i + 1 : i; }
  249. ILINE int64 int_ceil(f64 f) { int64 i = int64(f); return (f > f64(i)) ? i + 1 : i; }
  250. template<class F>
  251. ILINE F sqr(const F& op) { return op * op; }
  252. template<class F>
  253. ILINE F sqr(const Vec2_tpl<F>& op) { return op | op; }
  254. template<class F>
  255. ILINE F sqr(const Vec3_tpl<F>& op) { return op | op; }
  256. template<class F>
  257. ILINE F sqr_signed(const F& op) { return op * fabs_tpl(op); }
  258. template<class F>
  259. ILINE F cube(const F& op) { return op * op * op; }
  260. template<class F>
  261. ILINE F square(F fOp) { return(fOp * fOp); }
  262. ILINE float div_min(float n, float d, float m) { return n * d < m * d * d ? n / d : m; }
  263. // Utility functions for returning -1 if input is negative and non-zero, returns positive 1 otherwise
  264. // Uses extensive bit shifting for performance reasons.
  265. ILINE int32 sgnnz(f64 x)
  266. {
  267. union
  268. {
  269. f32 f;
  270. int32 i;
  271. } u;
  272. u.f = (f32)x;
  273. return ((u.i >> 31) << 1) + 1;
  274. }
  275. ILINE int32 sgnnz(f32 x)
  276. {
  277. union
  278. {
  279. f32 f;
  280. int32 i;
  281. } u;
  282. u.f = x;
  283. return ((u.i >> 31) << 1) + 1;
  284. }
  285. ILINE int32 sgnnz(int32 x) { return ((x >> 31) << 1) + 1; }
  286. ILINE f32 fsgnnz(f32 x)
  287. {
  288. union
  289. {
  290. f32 f;
  291. int32 i;
  292. } u;
  293. u.f = x;
  294. u.i = (u.i & 0x80000000) | 0x3f800000;
  295. return u.f;
  296. }
  297. ILINE int32 isneg(f32 x)
  298. {
  299. union
  300. {
  301. f32 f;
  302. uint32 i;
  303. } u;
  304. u.f = x;
  305. return (int32)(u.i >> 31);
  306. }
  307. ILINE int32 isneg(f64 x)
  308. {
  309. union
  310. {
  311. f32 f;
  312. uint32 i;
  313. } u;
  314. u.f = (f32)x;
  315. return (int32)(u.i >> 31);
  316. }
  317. ILINE int32 isneg(int32 x) { return (int32)((uint32)x >> 31); }
  318. ILINE int32 sgn(f64 x)
  319. {
  320. union
  321. {
  322. f32 f;
  323. int32 i;
  324. } u;
  325. u.f = (f32)x;
  326. return (u.i >> 31) + ((u.i - 1) >> 31) + 1;
  327. }
  328. ILINE int32 sgn(f32 x)
  329. {
  330. union
  331. {
  332. f32 f;
  333. int32 i;
  334. } u;
  335. u.f = x;
  336. return (u.i >> 31) + ((u.i - 1) >> 31) + 1;
  337. }
  338. ILINE int32 sgn(int32 x) { return (x >> 31) + ((x - 1) >> 31) + 1; }
  339. ILINE f32 fsgnf(f32 x) { return f32(sgn(x)); }
  340. ILINE int32 isnonneg(f32 x)
  341. {
  342. union
  343. {
  344. f32 f;
  345. uint32 i;
  346. } u;
  347. u.f = x;
  348. return (int32)(u.i >> 31 ^ 1);
  349. }
  350. ILINE int32 isnonneg(f64 x)
  351. {
  352. union
  353. {
  354. f32 f;
  355. uint32 i;
  356. } u;
  357. u.f = (f32)x;
  358. return (int32)(u.i >> 31 ^ 1);
  359. }
  360. ILINE int32 isnonneg(int32 x) { return (int32)((uint32)x >> 31 ^ 1); }
  361. ILINE int32 getexp(f32 x) { return (int32)(*(uint32*)&x >> 23 & 0x0FF) - 127; }
  362. ILINE int32 getexp(f64 x) { return (int32)(*((uint32*)&x + 1) >> 20 & 0x7FF) - 1023; }
  363. ILINE f32& setexp(f32& x, int32 iexp) { (*(uint32*)& x &= ~(0x0FF << 23)) |= (iexp + 127) << 23; return x; }
  364. ILINE f64& setexp(f64& x, int32 iexp) { (*((uint32*)&x + 1) &= ~(0x7FF << 20)) |= (iexp + 1023) << 20; return x; }
  365. ILINE int32 iszero(f32 x)
  366. {
  367. union
  368. {
  369. f32 f;
  370. int32 i;
  371. } u;
  372. u.f = x;
  373. u.i &= 0x7FFFFFFF;
  374. return -(u.i >> 31 ^ (u.i - 1) >> 31);
  375. }
  376. ILINE int32 iszero(f64 x)
  377. {
  378. union
  379. {
  380. f32 f;
  381. int32 i;
  382. } u;
  383. u.f = (f32)x;
  384. u.i &= 0x7FFFFFFF;
  385. return -((u.i >> 31) ^ (u.i - 1) >> 31);
  386. }
  387. ILINE int32 iszero(int32 x) { return -(x >> 31 ^ (x - 1) >> 31); }
  388. #if defined(PLATFORM_64BIT) && !defined(__clang__)
  389. ILINE int64 iszero(__int64 x) { return -(x >> 63 ^ (x - 1) >> 63); }
  390. #endif
  391. #if defined(PLATFORM_64BIT) && defined(__clang__) && !defined(LINUX)
  392. ILINE int64 iszero(int64_t x) { return -(x >> 63 ^ (x - 1) >> 63); }
  393. #endif
  394. #if defined(PLATFORM_64BIT) && (defined(LINUX) || defined(APPLE))
  395. ILINE int64 iszero(long int x) { return -(x >> 63 ^ (x - 1) >> 63); }
  396. #endif
  397. ILINE float if_neg_else(float test, float val_neg, float val_nonneg) { return (float)fsel(test, val_nonneg, val_neg); }
  398. template<class F>
  399. ILINE int32 inrange(F x, F end1, F end2) { return isneg(fabs_tpl(end1 + end2 - x * (F)2) - fabs_tpl(end1 - end2)); }
  400. template<class F>
  401. ILINE int32 idxmax3(const F* pdata)
  402. {
  403. int32 imax = isneg(pdata[0] - pdata[1]);
  404. imax |= isneg(pdata[imax] - pdata[2]) << 1;
  405. return imax & (2 | (imax >> 1 ^ 1));
  406. }
  407. template<class F>
  408. ILINE int32 idxmax3(const Vec3_tpl<F>& vec)
  409. {
  410. int32 imax = isneg(vec.x - vec.y);
  411. imax |= isneg(vec[imax] - vec.z) << 1;
  412. return imax & (2 | (imax >> 1 ^ 1));
  413. }
  414. static int32 inc_mod3[] = {1, 2, 0}, dec_mod3[] = {2, 0, 1};
  415. #ifdef PHYSICS_EXPORTS
  416. #define incm3(i) inc_mod3[i]
  417. #define decm3(i) dec_mod3[i]
  418. #else
  419. ILINE int32 incm3(int32 i) { return i + 1 & (i - 2) >> 31; }
  420. ILINE int32 decm3(int32 i) { return i - 1 + ((i - 1) >> 31 & 3); }
  421. #endif
  422. //////////////////////////////////////////////////////////////////////////
  423. enum type_zero
  424. {
  425. ZERO
  426. };
  427. enum type_min
  428. {
  429. VMIN
  430. };
  431. enum type_max
  432. {
  433. VMAX
  434. };
  435. enum type_identity
  436. {
  437. IDENTITY
  438. };
  439. #include "Cry_Vector2.h"
  440. #include "Cry_Vector3.h"
  441. #include "Cry_Vector4.h"
  442. #include "Cry_Matrix33.h"
  443. #include "Cry_Matrix34.h"
  444. #include "Cry_Matrix44.h"
  445. #include "Cry_Quat.h"
  446. // function for safe comparsion of floating point values
  447. ILINE bool fcmp(f32 fA, f32 fB, f32 fEpsilon = FLT_EPSILON)
  448. {
  449. return fabs(fA - fB) <= fEpsilon;
  450. }
  451. //! Given an arbitrary unit vector this function will compute two axes to build the orthonormal basis.
  452. //! \param n Unit 3D vector
  453. //! \param[out] b1 Orthonormal axis vector
  454. //! \param[out] b2 Orthonormal axis vector
  455. inline void GetBasisVectors(const Vec3& n, Vec3& b1, Vec3& b2)
  456. {
  457. if (n.z < FLT_EPSILON - 1.0f)
  458. {
  459. b1 = Vec3(0.0f, -1.0f, 0.0f);
  460. b2 = Vec3(-1.0f, 0.0f, 0.0f);
  461. return;
  462. }
  463. const float a = 1.0f / (1.0f + n.z);
  464. const float b = -n.x * n.y * a;
  465. b1 = Vec3(1.0f - n.x * n.x * a, b, -n.x);
  466. b2 = Vec3(b, 1.0f - n.y * n.y * a, -n.y);
  467. }