MathUtil.cpp 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. // Copyright 2008 Dolphin Emulator Project
  2. // Licensed under GPLv2+
  3. // Refer to the license.txt file included.
  4. #include <cmath>
  5. #include <cstring>
  6. #include <limits>
  7. #include <numeric>
  8. #include "Common/CommonTypes.h"
  9. #include "Common/MathUtil.h"
  10. namespace MathUtil
  11. {
  12. u32 ClassifyDouble(double dvalue)
  13. {
  14. // TODO: Optimize the below to be as fast as possible.
  15. IntDouble value(dvalue);
  16. u64 sign = value.i & DOUBLE_SIGN;
  17. u64 exp = value.i & DOUBLE_EXP;
  18. if (exp > DOUBLE_ZERO && exp < DOUBLE_EXP)
  19. {
  20. // Nice normalized number.
  21. return sign ? PPC_FPCLASS_NN : PPC_FPCLASS_PN;
  22. }
  23. else
  24. {
  25. u64 mantissa = value.i & DOUBLE_FRAC;
  26. if (mantissa)
  27. {
  28. if (exp)
  29. {
  30. return PPC_FPCLASS_QNAN;
  31. }
  32. else
  33. {
  34. // Denormalized number.
  35. return sign ? PPC_FPCLASS_ND : PPC_FPCLASS_PD;
  36. }
  37. }
  38. else if (exp)
  39. {
  40. //Infinite
  41. return sign ? PPC_FPCLASS_NINF : PPC_FPCLASS_PINF;
  42. }
  43. else
  44. {
  45. //Zero
  46. return sign ? PPC_FPCLASS_NZ : PPC_FPCLASS_PZ;
  47. }
  48. }
  49. }
  50. u32 ClassifyFloat(float fvalue)
  51. {
  52. // TODO: Optimize the below to be as fast as possible.
  53. IntFloat value(fvalue);
  54. u32 sign = value.i & FLOAT_SIGN;
  55. u32 exp = value.i & FLOAT_EXP;
  56. if (exp > FLOAT_ZERO && exp < FLOAT_EXP)
  57. {
  58. // Nice normalized number.
  59. return sign ? PPC_FPCLASS_NN : PPC_FPCLASS_PN;
  60. }
  61. else
  62. {
  63. u32 mantissa = value.i & FLOAT_FRAC;
  64. if (mantissa)
  65. {
  66. if (exp)
  67. {
  68. return PPC_FPCLASS_QNAN; // Quiet NAN
  69. }
  70. else
  71. {
  72. // Denormalized number.
  73. return sign ? PPC_FPCLASS_ND : PPC_FPCLASS_PD;
  74. }
  75. }
  76. else if (exp)
  77. {
  78. // Infinite
  79. return sign ? PPC_FPCLASS_NINF : PPC_FPCLASS_PINF;
  80. }
  81. else
  82. {
  83. //Zero
  84. return sign ? PPC_FPCLASS_NZ : PPC_FPCLASS_PZ;
  85. }
  86. }
  87. }
  88. const int frsqrte_expected_base[] =
  89. {
  90. 0x3ffa000, 0x3c29000, 0x38aa000, 0x3572000,
  91. 0x3279000, 0x2fb7000, 0x2d26000, 0x2ac0000,
  92. 0x2881000, 0x2665000, 0x2468000, 0x2287000,
  93. 0x20c1000, 0x1f12000, 0x1d79000, 0x1bf4000,
  94. 0x1a7e800, 0x17cb800, 0x1552800, 0x130c000,
  95. 0x10f2000, 0x0eff000, 0x0d2e000, 0x0b7c000,
  96. 0x09e5000, 0x0867000, 0x06ff000, 0x05ab800,
  97. 0x046a000, 0x0339800, 0x0218800, 0x0105800,
  98. };
  99. const int frsqrte_expected_dec[] =
  100. {
  101. 0x7a4, 0x700, 0x670, 0x5f2,
  102. 0x584, 0x524, 0x4cc, 0x47e,
  103. 0x43a, 0x3fa, 0x3c2, 0x38e,
  104. 0x35e, 0x332, 0x30a, 0x2e6,
  105. 0x568, 0x4f3, 0x48d, 0x435,
  106. 0x3e7, 0x3a2, 0x365, 0x32e,
  107. 0x2fc, 0x2d0, 0x2a8, 0x283,
  108. 0x261, 0x243, 0x226, 0x20b,
  109. };
  110. double ApproximateReciprocalSquareRoot(double val)
  111. {
  112. union
  113. {
  114. double valf;
  115. s64 vali;
  116. };
  117. valf = val;
  118. s64 mantissa = vali & ((1LL << 52) - 1);
  119. s64 sign = vali & (1ULL << 63);
  120. s64 exponent = vali & (0x7FFLL << 52);
  121. // Special case 0
  122. if (mantissa == 0 && exponent == 0)
  123. return sign ? -std::numeric_limits<double>::infinity() :
  124. std::numeric_limits<double>::infinity();
  125. // Special case NaN-ish numbers
  126. if (exponent == (0x7FFLL << 52))
  127. {
  128. if (mantissa == 0)
  129. {
  130. if (sign)
  131. return std::numeric_limits<double>::quiet_NaN();
  132. return 0.0;
  133. }
  134. return 0.0 + valf;
  135. }
  136. // Negative numbers return NaN
  137. if (sign)
  138. return std::numeric_limits<double>::quiet_NaN();
  139. if (!exponent)
  140. {
  141. // "Normalize" denormal values
  142. do
  143. {
  144. exponent -= 1LL << 52;
  145. mantissa <<= 1;
  146. } while (!(mantissa & (1LL << 52)));
  147. mantissa &= (1LL << 52) - 1;
  148. exponent += 1LL << 52;
  149. }
  150. bool odd_exponent = !(exponent & (1LL << 52));
  151. exponent = ((0x3FFLL << 52) - ((exponent - (0x3FELL << 52)) / 2)) & (0x7FFLL << 52);
  152. int i = (int)(mantissa >> 37);
  153. vali = sign | exponent;
  154. int index = i / 2048 + (odd_exponent ? 16 : 0);
  155. vali |= (s64)(frsqrte_expected_base[index] - frsqrte_expected_dec[index] * (i % 2048)) << 26;
  156. return valf;
  157. }
  158. const int fres_expected_base[] =
  159. {
  160. 0x7ff800, 0x783800, 0x70ea00, 0x6a0800,
  161. 0x638800, 0x5d6200, 0x579000, 0x520800,
  162. 0x4cc800, 0x47ca00, 0x430800, 0x3e8000,
  163. 0x3a2c00, 0x360800, 0x321400, 0x2e4a00,
  164. 0x2aa800, 0x272c00, 0x23d600, 0x209e00,
  165. 0x1d8800, 0x1a9000, 0x17ae00, 0x14f800,
  166. 0x124400, 0x0fbe00, 0x0d3800, 0x0ade00,
  167. 0x088400, 0x065000, 0x041c00, 0x020c00,
  168. };
  169. const int fres_expected_dec[] =
  170. {
  171. 0x3e1, 0x3a7, 0x371, 0x340,
  172. 0x313, 0x2ea, 0x2c4, 0x2a0,
  173. 0x27f, 0x261, 0x245, 0x22a,
  174. 0x212, 0x1fb, 0x1e5, 0x1d1,
  175. 0x1be, 0x1ac, 0x19b, 0x18b,
  176. 0x17c, 0x16e, 0x15b, 0x15b,
  177. 0x143, 0x143, 0x12d, 0x12d,
  178. 0x11a, 0x11a, 0x108, 0x106,
  179. };
  180. // Used by fres and ps_res.
  181. double ApproximateReciprocal(double val)
  182. {
  183. union
  184. {
  185. double valf;
  186. s64 vali;
  187. };
  188. valf = val;
  189. s64 mantissa = vali & ((1LL << 52) - 1);
  190. s64 sign = vali & (1ULL << 63);
  191. s64 exponent = vali & (0x7FFLL << 52);
  192. // Special case 0
  193. if (mantissa == 0 && exponent == 0)
  194. return sign ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity();
  195. // Special case NaN-ish numbers
  196. if (exponent == (0x7FFLL << 52))
  197. {
  198. if (mantissa == 0)
  199. return sign ? -0.0 : 0.0;
  200. return 0.0 + valf;
  201. }
  202. // Special case small inputs
  203. if (exponent < (895LL << 52))
  204. return sign ? -std::numeric_limits<float>::max() : std::numeric_limits<float>::max();
  205. // Special case large inputs
  206. if (exponent >= (1149LL << 52))
  207. return sign ? -0.0f : 0.0f;
  208. exponent = (0x7FDLL << 52) - exponent;
  209. int i = (int)(mantissa >> 37);
  210. vali = sign | exponent;
  211. vali |= (s64)(fres_expected_base[i / 1024] - (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) << 29;
  212. return valf;
  213. }
  214. } // namespace
  215. inline void MatrixMul(int n, const float *a, const float *b, float *result)
  216. {
  217. for (int i = 0; i < n; ++i)
  218. {
  219. for (int j = 0; j < n; ++j)
  220. {
  221. float temp = 0;
  222. for (int k = 0; k < n; ++k)
  223. {
  224. temp += a[i * n + k] * b[k * n + j];
  225. }
  226. result[i * n + j] = temp;
  227. }
  228. }
  229. }
  230. // Calculate sum of a float list
  231. float MathFloatVectorSum(const std::vector<float>& Vec)
  232. {
  233. return std::accumulate(Vec.begin(), Vec.end(), 0.0f);
  234. }
  235. void Matrix33::LoadIdentity(Matrix33 &mtx)
  236. {
  237. memset(mtx.data, 0, sizeof(mtx.data));
  238. mtx.data[0] = 1.0f;
  239. mtx.data[4] = 1.0f;
  240. mtx.data[8] = 1.0f;
  241. }
  242. void Matrix33::RotateX(Matrix33 &mtx, float rad)
  243. {
  244. float s = sin(rad);
  245. float c = cos(rad);
  246. memset(mtx.data, 0, sizeof(mtx.data));
  247. mtx.data[0] = 1;
  248. mtx.data[4] = c;
  249. mtx.data[5] = -s;
  250. mtx.data[7] = s;
  251. mtx.data[8] = c;
  252. }
  253. void Matrix33::RotateY(Matrix33 &mtx, float rad)
  254. {
  255. float s = sin(rad);
  256. float c = cos(rad);
  257. memset(mtx.data, 0, sizeof(mtx.data));
  258. mtx.data[0] = c;
  259. mtx.data[2] = s;
  260. mtx.data[4] = 1;
  261. mtx.data[6] = -s;
  262. mtx.data[8] = c;
  263. }
  264. void Matrix33::Multiply(const Matrix33 &a, const Matrix33 &b, Matrix33 &result)
  265. {
  266. MatrixMul(3, a.data, b.data, result.data);
  267. }
  268. void Matrix33::Multiply(const Matrix33 &a, const float vec[3], float result[3])
  269. {
  270. for (int i = 0; i < 3; ++i)
  271. {
  272. result[i] = 0;
  273. for (int k = 0; k < 3; ++k)
  274. {
  275. result[i] += a.data[i * 3 + k] * vec[k];
  276. }
  277. }
  278. }
  279. void Matrix44::LoadIdentity(Matrix44 &mtx)
  280. {
  281. memset(mtx.data, 0, sizeof(mtx.data));
  282. mtx.data[0] = 1.0f;
  283. mtx.data[5] = 1.0f;
  284. mtx.data[10] = 1.0f;
  285. mtx.data[15] = 1.0f;
  286. }
  287. void Matrix44::LoadMatrix33(Matrix44 &mtx, const Matrix33 &m33)
  288. {
  289. for (int i = 0; i < 3; ++i)
  290. {
  291. for (int j = 0; j < 3; ++j)
  292. {
  293. mtx.data[i * 4 + j] = m33.data[i * 3 + j];
  294. }
  295. }
  296. for (int i = 0; i < 3; ++i)
  297. {
  298. mtx.data[i * 4 + 3] = 0;
  299. mtx.data[i + 12] = 0;
  300. }
  301. mtx.data[15] = 1.0f;
  302. }
  303. void Matrix44::Set(Matrix44 &mtx, const float mtxArray[16])
  304. {
  305. for (int i = 0; i < 16; ++i)
  306. {
  307. mtx.data[i] = mtxArray[i];
  308. }
  309. }
  310. void Matrix44::Translate(Matrix44 &mtx, const float vec[3])
  311. {
  312. LoadIdentity(mtx);
  313. mtx.data[3] = vec[0];
  314. mtx.data[7] = vec[1];
  315. mtx.data[11] = vec[2];
  316. }
  317. void Matrix44::Shear(Matrix44 &mtx, const float a, const float b)
  318. {
  319. LoadIdentity(mtx);
  320. mtx.data[2] = a;
  321. mtx.data[6] = b;
  322. }
  323. void Matrix44::Multiply(const Matrix44 &a, const Matrix44 &b, Matrix44 &result)
  324. {
  325. MatrixMul(4, a.data, b.data, result.data);
  326. }