irrMath.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733
  1. // Copyright (C) 2002-2012 Nikolaus Gebhardt
  2. // This file is part of the "Irrlicht Engine".
  3. // For conditions of distribution and use, see copyright notice in irrlicht.h
  4. #ifndef __IRR_MATH_H_INCLUDED__
  5. #define __IRR_MATH_H_INCLUDED__
  6. #include "IrrCompileConfig.h"
  7. #include "irrTypes.h"
  8. #include <math.h>
  9. #include <float.h>
  10. #include <stdlib.h> // for abs() etc.
  11. #include <limits.h> // For INT_MAX / UINT_MAX
  12. #if defined(_IRR_SOLARIS_PLATFORM_) || defined(__BORLANDC__) || defined (__BCPLUSPLUS__) || defined (_WIN32_WCE)
  13. #define sqrtf(X) (irr::f32)sqrt((irr::f64)(X))
  14. #define sinf(X) (irr::f32)sin((irr::f64)(X))
  15. #define cosf(X) (irr::f32)cos((irr::f64)(X))
  16. #define asinf(X) (irr::f32)asin((irr::f64)(X))
  17. #define acosf(X) (irr::f32)acos((irr::f64)(X))
  18. #define atan2f(X,Y) (irr::f32)atan2((irr::f64)(X),(irr::f64)(Y))
  19. #define ceilf(X) (irr::f32)ceil((irr::f64)(X))
  20. #define floorf(X) (irr::f32)floor((irr::f64)(X))
  21. #define powf(X,Y) (irr::f32)pow((irr::f64)(X),(irr::f64)(Y))
  22. #define fmodf(X,Y) (irr::f32)fmod((irr::f64)(X),(irr::f64)(Y))
  23. #define fabsf(X) (irr::f32)fabs((irr::f64)(X))
  24. #define logf(X) (irr::f32)log((irr::f64)(X))
  25. #endif
  26. #ifndef FLT_MAX
  27. #define FLT_MAX 3.402823466E+38F
  28. #endif
  29. #ifndef FLT_MIN
  30. #define FLT_MIN 1.17549435e-38F
  31. #endif
  32. namespace irr
  33. {
  34. namespace core
  35. {
  36. //! Rounding error constant often used when comparing f32 values.
  37. const s32 ROUNDING_ERROR_S32 = 0;
  38. #ifdef __IRR_HAS_S64
  39. const s64 ROUNDING_ERROR_S64 = 0;
  40. #endif
  41. const f32 ROUNDING_ERROR_f32 = 0.000001f;
  42. const f64 ROUNDING_ERROR_f64 = 0.00000001;
  43. #ifdef PI // make sure we don't collide with a define
  44. #undef PI
  45. #endif
  46. //! Constant for PI.
  47. const f32 PI = 3.14159265359f;
  48. //! Constant for reciprocal of PI.
  49. const f32 RECIPROCAL_PI = 1.0f/PI;
  50. //! Constant for half of PI.
  51. const f32 HALF_PI = PI/2.0f;
  52. #ifdef PI64 // make sure we don't collide with a define
  53. #undef PI64
  54. #endif
  55. //! Constant for 64bit PI.
  56. const f64 PI64 = 3.1415926535897932384626433832795028841971693993751;
  57. //! Constant for 64bit reciprocal of PI.
  58. const f64 RECIPROCAL_PI64 = 1.0/PI64;
  59. //! 32bit Constant for converting from degrees to radians
  60. const f32 DEGTORAD = PI / 180.0f;
  61. //! 32bit constant for converting from radians to degrees (formally known as GRAD_PI)
  62. const f32 RADTODEG = 180.0f / PI;
  63. //! 64bit constant for converting from degrees to radians (formally known as GRAD_PI2)
  64. const f64 DEGTORAD64 = PI64 / 180.0;
  65. //! 64bit constant for converting from radians to degrees
  66. const f64 RADTODEG64 = 180.0 / PI64;
  67. //! Utility function to convert a radian value to degrees
  68. /** Provided as it can be clearer to write radToDeg(X) than RADTODEG * X
  69. \param radians The radians value to convert to degrees.
  70. */
  71. inline f32 radToDeg(f32 radians)
  72. {
  73. return RADTODEG * radians;
  74. }
  75. //! Utility function to convert a radian value to degrees
  76. /** Provided as it can be clearer to write radToDeg(X) than RADTODEG * X
  77. \param radians The radians value to convert to degrees.
  78. */
  79. inline f64 radToDeg(f64 radians)
  80. {
  81. return RADTODEG64 * radians;
  82. }
  83. //! Utility function to convert a degrees value to radians
  84. /** Provided as it can be clearer to write degToRad(X) than DEGTORAD * X
  85. \param degrees The degrees value to convert to radians.
  86. */
  87. inline f32 degToRad(f32 degrees)
  88. {
  89. return DEGTORAD * degrees;
  90. }
  91. //! Utility function to convert a degrees value to radians
  92. /** Provided as it can be clearer to write degToRad(X) than DEGTORAD * X
  93. \param degrees The degrees value to convert to radians.
  94. */
  95. inline f64 degToRad(f64 degrees)
  96. {
  97. return DEGTORAD64 * degrees;
  98. }
  99. //! returns minimum of two values. Own implementation to get rid of the STL (VS6 problems)
  100. template<class T>
  101. inline const T& min_(const T& a, const T& b)
  102. {
  103. return a < b ? a : b;
  104. }
  105. //! returns minimum of three values. Own implementation to get rid of the STL (VS6 problems)
  106. template<class T>
  107. inline const T& min_(const T& a, const T& b, const T& c)
  108. {
  109. return a < b ? min_(a, c) : min_(b, c);
  110. }
  111. //! returns maximum of two values. Own implementation to get rid of the STL (VS6 problems)
  112. template<class T>
  113. inline const T& max_(const T& a, const T& b)
  114. {
  115. return a < b ? b : a;
  116. }
  117. //! returns maximum of three values. Own implementation to get rid of the STL (VS6 problems)
  118. template<class T>
  119. inline const T& max_(const T& a, const T& b, const T& c)
  120. {
  121. return a < b ? max_(b, c) : max_(a, c);
  122. }
  123. //! returns abs of two values. Own implementation to get rid of STL (VS6 problems)
  124. template<class T>
  125. inline T abs_(const T& a)
  126. {
  127. return a < (T)0 ? -a : a;
  128. }
  129. //! returns linear interpolation of a and b with ratio t
  130. //! \return: a if t==0, b if t==1, and the linear interpolation else
  131. template<class T>
  132. inline T lerp(const T& a, const T& b, const f32 t)
  133. {
  134. return (T)(a*(1.f-t)) + (b*t);
  135. }
  136. //! clamps a value between low and high
  137. template <class T>
  138. inline const T clamp (const T& value, const T& low, const T& high)
  139. {
  140. return min_ (max_(value,low), high);
  141. }
  142. //! swaps the content of the passed parameters
  143. // Note: We use the same trick as boost and use two template arguments to
  144. // avoid ambiguity when swapping objects of an Irrlicht type that has not
  145. // it's own swap overload. Otherwise we get conflicts with some compilers
  146. // in combination with stl.
  147. template <class T1, class T2>
  148. inline void swap(T1& a, T2& b)
  149. {
  150. T1 c(a);
  151. a = b;
  152. b = c;
  153. }
  154. //! returns if a equals b, taking possible rounding errors into account
  155. inline bool equals(const f64 a, const f64 b, const f64 tolerance = ROUNDING_ERROR_f64)
  156. {
  157. return (a + tolerance >= b) && (a - tolerance <= b);
  158. }
  159. //! returns if a equals b, taking possible rounding errors into account
  160. inline bool equals(const f32 a, const f32 b, const f32 tolerance = ROUNDING_ERROR_f32)
  161. {
  162. return (a + tolerance >= b) && (a - tolerance <= b);
  163. }
  164. union FloatIntUnion32
  165. {
  166. FloatIntUnion32(float f1 = 0.0f) : f(f1) {}
  167. // Portable sign-extraction
  168. bool sign() const { return (i >> 31) != 0; }
  169. irr::s32 i;
  170. irr::f32 f;
  171. };
  172. //! We compare the difference in ULP's (spacing between floating-point numbers, aka ULP=1 means there exists no float between).
  173. //\result true when numbers have a ULP <= maxUlpDiff AND have the same sign.
  174. inline bool equalsByUlp(f32 a, f32 b, int maxUlpDiff)
  175. {
  176. // Based on the ideas and code from Bruce Dawson on
  177. // http://www.altdevblogaday.com/2012/02/22/comparing-floating-point-numbers-2012-edition/
  178. // When floats are interpreted as integers the two nearest possible float numbers differ just
  179. // by one integer number. Also works the other way round, an integer of 1 interpreted as float
  180. // is for example the smallest possible float number.
  181. FloatIntUnion32 fa(a);
  182. FloatIntUnion32 fb(b);
  183. // Different signs, we could maybe get difference to 0, but so close to 0 using epsilons is better.
  184. if ( fa.sign() != fb.sign() )
  185. {
  186. // Check for equality to make sure +0==-0
  187. if (fa.i == fb.i)
  188. return true;
  189. return false;
  190. }
  191. // Find the difference in ULPs.
  192. int ulpsDiff = abs_(fa.i- fb.i);
  193. if (ulpsDiff <= maxUlpDiff)
  194. return true;
  195. return false;
  196. }
  197. #if 0
  198. //! returns if a equals b, not using any rounding tolerance
  199. inline bool equals(const s32 a, const s32 b)
  200. {
  201. return (a == b);
  202. }
  203. //! returns if a equals b, not using any rounding tolerance
  204. inline bool equals(const u32 a, const u32 b)
  205. {
  206. return (a == b);
  207. }
  208. #endif
  209. //! returns if a equals b, taking an explicit rounding tolerance into account
  210. inline bool equals(const s32 a, const s32 b, const s32 tolerance = ROUNDING_ERROR_S32)
  211. {
  212. return (a + tolerance >= b) && (a - tolerance <= b);
  213. }
  214. //! returns if a equals b, taking an explicit rounding tolerance into account
  215. inline bool equals(const u32 a, const u32 b, const s32 tolerance = ROUNDING_ERROR_S32)
  216. {
  217. return (a + tolerance >= b) && (a - tolerance <= b);
  218. }
  219. #ifdef __IRR_HAS_S64
  220. //! returns if a equals b, taking an explicit rounding tolerance into account
  221. inline bool equals(const s64 a, const s64 b, const s64 tolerance = ROUNDING_ERROR_S64)
  222. {
  223. return (a + tolerance >= b) && (a - tolerance <= b);
  224. }
  225. #endif
  226. //! returns if a equals zero, taking rounding errors into account
  227. inline bool iszero(const f64 a, const f64 tolerance = ROUNDING_ERROR_f64)
  228. {
  229. return fabs(a) <= tolerance;
  230. }
  231. //! returns if a equals zero, taking rounding errors into account
  232. inline bool iszero(const f32 a, const f32 tolerance = ROUNDING_ERROR_f32)
  233. {
  234. return fabsf(a) <= tolerance;
  235. }
  236. //! returns if a equals not zero, taking rounding errors into account
  237. inline bool isnotzero(const f32 a, const f32 tolerance = ROUNDING_ERROR_f32)
  238. {
  239. return fabsf(a) > tolerance;
  240. }
  241. //! returns if a equals zero, taking rounding errors into account
  242. inline bool iszero(const s32 a, const s32 tolerance = 0)
  243. {
  244. return ( a & 0x7ffffff ) <= tolerance;
  245. }
  246. //! returns if a equals zero, taking rounding errors into account
  247. inline bool iszero(const u32 a, const u32 tolerance = 0)
  248. {
  249. return a <= tolerance;
  250. }
  251. #ifdef __IRR_HAS_S64
  252. //! returns if a equals zero, taking rounding errors into account
  253. inline bool iszero(const s64 a, const s64 tolerance = 0)
  254. {
  255. return abs_(a) <= tolerance;
  256. }
  257. #endif
  258. inline s32 s32_min(s32 a, s32 b)
  259. {
  260. const s32 mask = (a - b) >> 31;
  261. return (a & mask) | (b & ~mask);
  262. }
  263. inline s32 s32_max(s32 a, s32 b)
  264. {
  265. const s32 mask = (a - b) >> 31;
  266. return (b & mask) | (a & ~mask);
  267. }
  268. inline s32 s32_clamp (s32 value, s32 low, s32 high)
  269. {
  270. return s32_min(s32_max(value,low), high);
  271. }
  272. /*
  273. float IEEE-754 bit represenation
  274. 0 0x00000000
  275. 1.0 0x3f800000
  276. 0.5 0x3f000000
  277. 3 0x40400000
  278. +inf 0x7f800000
  279. -inf 0xff800000
  280. +NaN 0x7fc00000 or 0x7ff00000
  281. in general: number = (sign ? -1:1) * 2^(exponent) * 1.(mantissa bits)
  282. */
  283. typedef union { u32 u; s32 s; f32 f; } inttofloat;
  284. #define F32_AS_S32(f) (*((s32 *) &(f)))
  285. #define F32_AS_U32(f) (*((u32 *) &(f)))
  286. #define F32_AS_U32_POINTER(f) ( ((u32 *) &(f)))
  287. #define F32_VALUE_0 0x00000000
  288. #define F32_VALUE_1 0x3f800000
  289. #define F32_SIGN_BIT 0x80000000U
  290. #define F32_EXPON_MANTISSA 0x7FFFFFFFU
  291. //! code is taken from IceFPU
  292. //! Integer representation of a floating-point value.
  293. #ifdef IRRLICHT_FAST_MATH
  294. #define IR(x) ((u32&)(x))
  295. #else
  296. inline u32 IR(f32 x) {inttofloat tmp; tmp.f=x; return tmp.u;}
  297. #endif
  298. //! Absolute integer representation of a floating-point value
  299. #define AIR(x) (IR(x)&0x7fffffff)
  300. //! Floating-point representation of an integer value.
  301. #ifdef IRRLICHT_FAST_MATH
  302. #define FR(x) ((f32&)(x))
  303. #else
  304. inline f32 FR(u32 x) {inttofloat tmp; tmp.u=x; return tmp.f;}
  305. inline f32 FR(s32 x) {inttofloat tmp; tmp.s=x; return tmp.f;}
  306. #endif
  307. //! integer representation of 1.0
  308. #define IEEE_1_0 0x3f800000
  309. //! integer representation of 255.0
  310. #define IEEE_255_0 0x437f0000
  311. #ifdef IRRLICHT_FAST_MATH
  312. #define F32_LOWER_0(f) (F32_AS_U32(f) > F32_SIGN_BIT)
  313. #define F32_LOWER_EQUAL_0(f) (F32_AS_S32(f) <= F32_VALUE_0)
  314. #define F32_GREATER_0(f) (F32_AS_S32(f) > F32_VALUE_0)
  315. #define F32_GREATER_EQUAL_0(f) (F32_AS_U32(f) <= F32_SIGN_BIT)
  316. #define F32_EQUAL_1(f) (F32_AS_U32(f) == F32_VALUE_1)
  317. #define F32_EQUAL_0(f) ( (F32_AS_U32(f) & F32_EXPON_MANTISSA ) == F32_VALUE_0)
  318. // only same sign
  319. #define F32_A_GREATER_B(a,b) (F32_AS_S32((a)) > F32_AS_S32((b)))
  320. #else
  321. #define F32_LOWER_0(n) ((n) < 0.0f)
  322. #define F32_LOWER_EQUAL_0(n) ((n) <= 0.0f)
  323. #define F32_GREATER_0(n) ((n) > 0.0f)
  324. #define F32_GREATER_EQUAL_0(n) ((n) >= 0.0f)
  325. #define F32_EQUAL_1(n) ((n) == 1.0f)
  326. #define F32_EQUAL_0(n) ((n) == 0.0f)
  327. #define F32_A_GREATER_B(a,b) ((a) > (b))
  328. #endif
  329. #ifndef REALINLINE
  330. #ifdef _MSC_VER
  331. #define REALINLINE __forceinline
  332. #else
  333. #define REALINLINE inline
  334. #endif
  335. #endif
  336. #if defined(__BORLANDC__) || defined (__BCPLUSPLUS__)
  337. // 8-bit bools in borland builder
  338. //! conditional set based on mask and arithmetic shift
  339. REALINLINE u32 if_c_a_else_b ( const c8 condition, const u32 a, const u32 b )
  340. {
  341. return ( ( -condition >> 7 ) & ( a ^ b ) ) ^ b;
  342. }
  343. //! conditional set based on mask and arithmetic shift
  344. REALINLINE u32 if_c_a_else_0 ( const c8 condition, const u32 a )
  345. {
  346. return ( -condition >> 31 ) & a;
  347. }
  348. #else
  349. //! conditional set based on mask and arithmetic shift
  350. REALINLINE u32 if_c_a_else_b ( const s32 condition, const u32 a, const u32 b )
  351. {
  352. return ( ( -condition >> 31 ) & ( a ^ b ) ) ^ b;
  353. }
  354. //! conditional set based on mask and arithmetic shift
  355. REALINLINE u16 if_c_a_else_b ( const s16 condition, const u16 a, const u16 b )
  356. {
  357. return ( ( -condition >> 15 ) & ( a ^ b ) ) ^ b;
  358. }
  359. //! conditional set based on mask and arithmetic shift
  360. REALINLINE u32 if_c_a_else_0 ( const s32 condition, const u32 a )
  361. {
  362. return ( -condition >> 31 ) & a;
  363. }
  364. #endif
  365. /*
  366. if (condition) state |= m; else state &= ~m;
  367. */
  368. REALINLINE void setbit_cond ( u32 &state, s32 condition, u32 mask )
  369. {
  370. // 0, or any postive to mask
  371. //s32 conmask = -condition >> 31;
  372. state ^= ( ( -condition >> 31 ) ^ state ) & mask;
  373. }
  374. inline f32 round_( f32 x )
  375. {
  376. return floorf( x + 0.5f );
  377. }
  378. REALINLINE void clearFPUException ()
  379. {
  380. #ifdef IRRLICHT_FAST_MATH
  381. return;
  382. #ifdef feclearexcept
  383. feclearexcept(FE_ALL_EXCEPT);
  384. #elif defined(_MSC_VER)
  385. __asm fnclex;
  386. #elif defined(__GNUC__) && defined(__x86__)
  387. __asm__ __volatile__ ("fclex \n\t");
  388. #else
  389. # warn clearFPUException not supported.
  390. #endif
  391. #endif
  392. }
  393. // calculate: sqrt ( x )
  394. REALINLINE f32 squareroot(const f32 f)
  395. {
  396. return sqrtf(f);
  397. }
  398. // calculate: sqrt ( x )
  399. REALINLINE f64 squareroot(const f64 f)
  400. {
  401. return sqrt(f);
  402. }
  403. // calculate: sqrt ( x )
  404. REALINLINE s32 squareroot(const s32 f)
  405. {
  406. return static_cast<s32>(squareroot(static_cast<f32>(f)));
  407. }
  408. #ifdef __IRR_HAS_S64
  409. // calculate: sqrt ( x )
  410. REALINLINE s64 squareroot(const s64 f)
  411. {
  412. return static_cast<s64>(squareroot(static_cast<f64>(f)));
  413. }
  414. #endif
  415. // calculate: 1 / sqrt ( x )
  416. REALINLINE f64 reciprocal_squareroot(const f64 x)
  417. {
  418. return 1.0 / sqrt(x);
  419. }
  420. // calculate: 1 / sqrtf ( x )
  421. REALINLINE f32 reciprocal_squareroot(const f32 f)
  422. {
  423. #if defined ( IRRLICHT_FAST_MATH )
  424. #if defined(_MSC_VER)
  425. // SSE reciprocal square root estimate, accurate to 12 significant
  426. // bits of the mantissa
  427. f32 recsqrt;
  428. __asm rsqrtss xmm0, f // xmm0 = rsqrtss(f)
  429. __asm movss recsqrt, xmm0 // return xmm0
  430. return recsqrt;
  431. /*
  432. // comes from Nvidia
  433. u32 tmp = (u32(IEEE_1_0 << 1) + IEEE_1_0 - *(u32*)&x) >> 1;
  434. f32 y = *(f32*)&tmp;
  435. return y * (1.47f - 0.47f * x * y * y);
  436. */
  437. #else
  438. return 1.f / sqrtf(f);
  439. #endif
  440. #else // no fast math
  441. return 1.f / sqrtf(f);
  442. #endif
  443. }
  444. // calculate: 1 / sqrtf( x )
  445. REALINLINE s32 reciprocal_squareroot(const s32 x)
  446. {
  447. return static_cast<s32>(reciprocal_squareroot(static_cast<f32>(x)));
  448. }
  449. // calculate: 1 / x
  450. REALINLINE f32 reciprocal( const f32 f )
  451. {
  452. #if defined (IRRLICHT_FAST_MATH)
  453. // SSE Newton-Raphson reciprocal estimate, accurate to 23 significant
  454. // bi ts of the mantissa
  455. // One Newtown-Raphson Iteration:
  456. // f(i+1) = 2 * rcpss(f) - f * rcpss(f) * rcpss(f)
  457. f32 rec;
  458. __asm rcpss xmm0, f // xmm0 = rcpss(f)
  459. __asm movss xmm1, f // xmm1 = f
  460. __asm mulss xmm1, xmm0 // xmm1 = f * rcpss(f)
  461. __asm mulss xmm1, xmm0 // xmm2 = f * rcpss(f) * rcpss(f)
  462. __asm addss xmm0, xmm0 // xmm0 = 2 * rcpss(f)
  463. __asm subss xmm0, xmm1 // xmm0 = 2 * rcpss(f)
  464. // - f * rcpss(f) * rcpss(f)
  465. __asm movss rec, xmm0 // return xmm0
  466. return rec;
  467. //! i do not divide through 0.. (fpu expection)
  468. // instead set f to a high value to get a return value near zero..
  469. // -1000000000000.f.. is use minus to stay negative..
  470. // must test's here (plane.normal dot anything ) checks on <= 0.f
  471. //u32 x = (-(AIR(f) != 0 ) >> 31 ) & ( IR(f) ^ 0xd368d4a5 ) ^ 0xd368d4a5;
  472. //return 1.f / FR ( x );
  473. #else // no fast math
  474. return 1.f / f;
  475. #endif
  476. }
  477. // calculate: 1 / x
  478. REALINLINE f64 reciprocal ( const f64 f )
  479. {
  480. return 1.0 / f;
  481. }
  482. // calculate: 1 / x, low precision allowed
  483. REALINLINE f32 reciprocal_approxim ( const f32 f )
  484. {
  485. #if defined( IRRLICHT_FAST_MATH)
  486. // SSE Newton-Raphson reciprocal estimate, accurate to 23 significant
  487. // bi ts of the mantissa
  488. // One Newtown-Raphson Iteration:
  489. // f(i+1) = 2 * rcpss(f) - f * rcpss(f) * rcpss(f)
  490. f32 rec;
  491. __asm rcpss xmm0, f // xmm0 = rcpss(f)
  492. __asm movss xmm1, f // xmm1 = f
  493. __asm mulss xmm1, xmm0 // xmm1 = f * rcpss(f)
  494. __asm mulss xmm1, xmm0 // xmm2 = f * rcpss(f) * rcpss(f)
  495. __asm addss xmm0, xmm0 // xmm0 = 2 * rcpss(f)
  496. __asm subss xmm0, xmm1 // xmm0 = 2 * rcpss(f)
  497. // - f * rcpss(f) * rcpss(f)
  498. __asm movss rec, xmm0 // return xmm0
  499. return rec;
  500. /*
  501. // SSE reciprocal estimate, accurate to 12 significant bits of
  502. f32 rec;
  503. __asm rcpss xmm0, f // xmm0 = rcpss(f)
  504. __asm movss rec , xmm0 // return xmm0
  505. return rec;
  506. */
  507. /*
  508. register u32 x = 0x7F000000 - IR ( p );
  509. const f32 r = FR ( x );
  510. return r * (2.0f - p * r);
  511. */
  512. #else // no fast math
  513. return 1.f / f;
  514. #endif
  515. }
  516. REALINLINE s32 floor32(f32 x)
  517. {
  518. #ifdef IRRLICHT_FAST_MATH
  519. const f32 h = 0.5f;
  520. s32 t;
  521. #if defined(_MSC_VER)
  522. __asm
  523. {
  524. fld x
  525. fsub h
  526. fistp t
  527. }
  528. #elif defined(__GNUC__)
  529. __asm__ __volatile__ (
  530. "fsub %2 \n\t"
  531. "fistpl %0"
  532. : "=m" (t)
  533. : "t" (x), "f" (h)
  534. : "st"
  535. );
  536. #else
  537. # warn IRRLICHT_FAST_MATH not supported.
  538. return (s32) floorf ( x );
  539. #endif
  540. return t;
  541. #else // no fast math
  542. return (s32) floorf ( x );
  543. #endif
  544. }
  545. REALINLINE s32 ceil32 ( f32 x )
  546. {
  547. #ifdef IRRLICHT_FAST_MATH
  548. const f32 h = 0.5f;
  549. s32 t;
  550. #if defined(_MSC_VER)
  551. __asm
  552. {
  553. fld x
  554. fadd h
  555. fistp t
  556. }
  557. #elif defined(__GNUC__)
  558. __asm__ __volatile__ (
  559. "fadd %2 \n\t"
  560. "fistpl %0 \n\t"
  561. : "=m"(t)
  562. : "t"(x), "f"(h)
  563. : "st"
  564. );
  565. #else
  566. # warn IRRLICHT_FAST_MATH not supported.
  567. return (s32) ceilf ( x );
  568. #endif
  569. return t;
  570. #else // not fast math
  571. return (s32) ceilf ( x );
  572. #endif
  573. }
  574. REALINLINE s32 round32(f32 x)
  575. {
  576. #if defined(IRRLICHT_FAST_MATH)
  577. s32 t;
  578. #if defined(_MSC_VER)
  579. __asm
  580. {
  581. fld x
  582. fistp t
  583. }
  584. #elif defined(__GNUC__)
  585. __asm__ __volatile__ (
  586. "fistpl %0 \n\t"
  587. : "=m"(t)
  588. : "t"(x)
  589. : "st"
  590. );
  591. #else
  592. # warn IRRLICHT_FAST_MATH not supported.
  593. return (s32) round_(x);
  594. #endif
  595. return t;
  596. #else // no fast math
  597. return (s32) round_(x);
  598. #endif
  599. }
  600. inline f32 f32_max3(const f32 a, const f32 b, const f32 c)
  601. {
  602. return a > b ? (a > c ? a : c) : (b > c ? b : c);
  603. }
  604. inline f32 f32_min3(const f32 a, const f32 b, const f32 c)
  605. {
  606. return a < b ? (a < c ? a : c) : (b < c ? b : c);
  607. }
  608. inline f32 fract ( f32 x )
  609. {
  610. return x - floorf ( x );
  611. }
  612. } // end namespace core
  613. } // end namespace irr
  614. #ifndef IRRLICHT_FAST_MATH
  615. using irr::core::IR;
  616. using irr::core::FR;
  617. #endif
  618. #endif