softfloat.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931
  1. /*
  2. * Floating point emulation support for subnormalised numbers on SH4
  3. * architecture This file is derived from the SoftFloat IEC/IEEE
  4. * Floating-point Arithmetic Package, Release 2 the original license of
  5. * which is reproduced below.
  6. *
  7. * ========================================================================
  8. *
  9. * This C source file is part of the SoftFloat IEC/IEEE Floating-point
  10. * Arithmetic Package, Release 2.
  11. *
  12. * Written by John R. Hauser. This work was made possible in part by the
  13. * International Computer Science Institute, located at Suite 600, 1947 Center
  14. * Street, Berkeley, California 94704. Funding was partially provided by the
  15. * National Science Foundation under grant MIP-9311980. The original version
  16. * of this code was written as part of a project to build a fixed-point vector
  17. * processor in collaboration with the University of California at Berkeley,
  18. * overseen by Profs. Nelson Morgan and John Wawrzynek. More information
  19. * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  20. * arithmetic/softfloat.html'.
  21. *
  22. * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
  23. * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  24. * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
  25. * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  26. * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  27. *
  28. * Derivative works are acceptable, even for commercial purposes, so long as
  29. * (1) they include prominent notice that the work is derivative, and (2) they
  30. * include prominent notice akin to these three paragraphs for those parts of
  31. * this code that are retained.
  32. *
  33. * ========================================================================
  34. *
  35. * SH4 modifications by Ismail Dhaoui <ismail.dhaoui@st.com>
  36. * and Kamel Khelifi <kamel.khelifi@st.com>
  37. */
  38. #include <linux/kernel.h>
  39. #include <cpu/fpu.h>
  40. #include <asm/div64.h>
  41. #define LIT64( a ) a##LL
  42. typedef char flag;
  43. typedef unsigned char uint8;
  44. typedef signed char int8;
  45. typedef int uint16;
  46. typedef int int16;
  47. typedef unsigned int uint32;
  48. typedef signed int int32;
  49. typedef unsigned long long int bits64;
  50. typedef signed long long int sbits64;
  51. typedef unsigned char bits8;
  52. typedef signed char sbits8;
  53. typedef unsigned short int bits16;
  54. typedef signed short int sbits16;
  55. typedef unsigned int bits32;
  56. typedef signed int sbits32;
  57. typedef unsigned long long int uint64;
  58. typedef signed long long int int64;
  59. typedef unsigned long int float32;
  60. typedef unsigned long long float64;
  61. extern void float_raise(unsigned int flags); /* in fpu.c */
  62. extern int float_rounding_mode(void); /* in fpu.c */
  63. bits64 extractFloat64Frac(float64 a);
  64. flag extractFloat64Sign(float64 a);
  65. int16 extractFloat64Exp(float64 a);
  66. int16 extractFloat32Exp(float32 a);
  67. flag extractFloat32Sign(float32 a);
  68. bits32 extractFloat32Frac(float32 a);
  69. float64 packFloat64(flag zSign, int16 zExp, bits64 zSig);
  70. void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr);
  71. float32 packFloat32(flag zSign, int16 zExp, bits32 zSig);
  72. void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr);
  73. float64 float64_sub(float64 a, float64 b);
  74. float32 float32_sub(float32 a, float32 b);
  75. float32 float32_add(float32 a, float32 b);
  76. float64 float64_add(float64 a, float64 b);
  77. float64 float64_div(float64 a, float64 b);
  78. float32 float32_div(float32 a, float32 b);
  79. float32 float32_mul(float32 a, float32 b);
  80. float64 float64_mul(float64 a, float64 b);
  81. float32 float64_to_float32(float64 a);
  82. void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
  83. bits64 * z1Ptr);
  84. void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
  85. bits64 * z1Ptr);
  86. void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr);
  87. static int8 countLeadingZeros32(bits32 a);
  88. static int8 countLeadingZeros64(bits64 a);
  89. static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp,
  90. bits64 zSig);
  91. static float64 subFloat64Sigs(float64 a, float64 b, flag zSign);
  92. static float64 addFloat64Sigs(float64 a, float64 b, flag zSign);
  93. static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig);
  94. static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp,
  95. bits32 zSig);
  96. static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig);
  97. static float32 subFloat32Sigs(float32 a, float32 b, flag zSign);
  98. static float32 addFloat32Sigs(float32 a, float32 b, flag zSign);
  99. static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr,
  100. bits64 * zSigPtr);
  101. static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b);
  102. static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
  103. bits32 * zSigPtr);
  104. bits64 extractFloat64Frac(float64 a)
  105. {
  106. return a & LIT64(0x000FFFFFFFFFFFFF);
  107. }
  108. flag extractFloat64Sign(float64 a)
  109. {
  110. return a >> 63;
  111. }
  112. int16 extractFloat64Exp(float64 a)
  113. {
  114. return (a >> 52) & 0x7FF;
  115. }
  116. int16 extractFloat32Exp(float32 a)
  117. {
  118. return (a >> 23) & 0xFF;
  119. }
  120. flag extractFloat32Sign(float32 a)
  121. {
  122. return a >> 31;
  123. }
  124. bits32 extractFloat32Frac(float32 a)
  125. {
  126. return a & 0x007FFFFF;
  127. }
  128. float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
  129. {
  130. return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
  131. }
  132. void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr)
  133. {
  134. bits64 z;
  135. if (count == 0) {
  136. z = a;
  137. } else if (count < 64) {
  138. z = (a >> count) | ((a << ((-count) & 63)) != 0);
  139. } else {
  140. z = (a != 0);
  141. }
  142. *zPtr = z;
  143. }
  144. static int8 countLeadingZeros32(bits32 a)
  145. {
  146. static const int8 countLeadingZerosHigh[] = {
  147. 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
  148. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  149. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  150. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  151. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  152. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  153. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  154. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  155. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  156. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  157. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  158. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  159. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  160. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  161. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  162. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
  163. };
  164. int8 shiftCount;
  165. shiftCount = 0;
  166. if (a < 0x10000) {
  167. shiftCount += 16;
  168. a <<= 16;
  169. }
  170. if (a < 0x1000000) {
  171. shiftCount += 8;
  172. a <<= 8;
  173. }
  174. shiftCount += countLeadingZerosHigh[a >> 24];
  175. return shiftCount;
  176. }
  177. static int8 countLeadingZeros64(bits64 a)
  178. {
  179. int8 shiftCount;
  180. shiftCount = 0;
  181. if (a < ((bits64) 1) << 32) {
  182. shiftCount += 32;
  183. } else {
  184. a >>= 32;
  185. }
  186. shiftCount += countLeadingZeros32(a);
  187. return shiftCount;
  188. }
  189. static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
  190. {
  191. int8 shiftCount;
  192. shiftCount = countLeadingZeros64(zSig) - 1;
  193. return roundAndPackFloat64(zSign, zExp - shiftCount,
  194. zSig << shiftCount);
  195. }
  196. static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
  197. {
  198. int16 aExp, bExp, zExp;
  199. bits64 aSig, bSig, zSig;
  200. int16 expDiff;
  201. aSig = extractFloat64Frac(a);
  202. aExp = extractFloat64Exp(a);
  203. bSig = extractFloat64Frac(b);
  204. bExp = extractFloat64Exp(b);
  205. expDiff = aExp - bExp;
  206. aSig <<= 10;
  207. bSig <<= 10;
  208. if (0 < expDiff)
  209. goto aExpBigger;
  210. if (expDiff < 0)
  211. goto bExpBigger;
  212. if (aExp == 0) {
  213. aExp = 1;
  214. bExp = 1;
  215. }
  216. if (bSig < aSig)
  217. goto aBigger;
  218. if (aSig < bSig)
  219. goto bBigger;
  220. return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
  221. bExpBigger:
  222. if (bExp == 0x7FF) {
  223. return packFloat64(zSign ^ 1, 0x7FF, 0);
  224. }
  225. if (aExp == 0) {
  226. ++expDiff;
  227. } else {
  228. aSig |= LIT64(0x4000000000000000);
  229. }
  230. shift64RightJamming(aSig, -expDiff, &aSig);
  231. bSig |= LIT64(0x4000000000000000);
  232. bBigger:
  233. zSig = bSig - aSig;
  234. zExp = bExp;
  235. zSign ^= 1;
  236. goto normalizeRoundAndPack;
  237. aExpBigger:
  238. if (aExp == 0x7FF) {
  239. return a;
  240. }
  241. if (bExp == 0) {
  242. --expDiff;
  243. } else {
  244. bSig |= LIT64(0x4000000000000000);
  245. }
  246. shift64RightJamming(bSig, expDiff, &bSig);
  247. aSig |= LIT64(0x4000000000000000);
  248. aBigger:
  249. zSig = aSig - bSig;
  250. zExp = aExp;
  251. normalizeRoundAndPack:
  252. --zExp;
  253. return normalizeRoundAndPackFloat64(zSign, zExp, zSig);
  254. }
  255. static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
  256. {
  257. int16 aExp, bExp, zExp;
  258. bits64 aSig, bSig, zSig;
  259. int16 expDiff;
  260. aSig = extractFloat64Frac(a);
  261. aExp = extractFloat64Exp(a);
  262. bSig = extractFloat64Frac(b);
  263. bExp = extractFloat64Exp(b);
  264. expDiff = aExp - bExp;
  265. aSig <<= 9;
  266. bSig <<= 9;
  267. if (0 < expDiff) {
  268. if (aExp == 0x7FF) {
  269. return a;
  270. }
  271. if (bExp == 0) {
  272. --expDiff;
  273. } else {
  274. bSig |= LIT64(0x2000000000000000);
  275. }
  276. shift64RightJamming(bSig, expDiff, &bSig);
  277. zExp = aExp;
  278. } else if (expDiff < 0) {
  279. if (bExp == 0x7FF) {
  280. return packFloat64(zSign, 0x7FF, 0);
  281. }
  282. if (aExp == 0) {
  283. ++expDiff;
  284. } else {
  285. aSig |= LIT64(0x2000000000000000);
  286. }
  287. shift64RightJamming(aSig, -expDiff, &aSig);
  288. zExp = bExp;
  289. } else {
  290. if (aExp == 0x7FF) {
  291. return a;
  292. }
  293. if (aExp == 0)
  294. return packFloat64(zSign, 0, (aSig + bSig) >> 9);
  295. zSig = LIT64(0x4000000000000000) + aSig + bSig;
  296. zExp = aExp;
  297. goto roundAndPack;
  298. }
  299. aSig |= LIT64(0x2000000000000000);
  300. zSig = (aSig + bSig) << 1;
  301. --zExp;
  302. if ((sbits64) zSig < 0) {
  303. zSig = aSig + bSig;
  304. ++zExp;
  305. }
  306. roundAndPack:
  307. return roundAndPackFloat64(zSign, zExp, zSig);
  308. }
  309. float32 packFloat32(flag zSign, int16 zExp, bits32 zSig)
  310. {
  311. return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig;
  312. }
  313. void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr)
  314. {
  315. bits32 z;
  316. if (count == 0) {
  317. z = a;
  318. } else if (count < 32) {
  319. z = (a >> count) | ((a << ((-count) & 31)) != 0);
  320. } else {
  321. z = (a != 0);
  322. }
  323. *zPtr = z;
  324. }
  325. static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
  326. {
  327. flag roundNearestEven;
  328. int8 roundIncrement, roundBits;
  329. flag isTiny;
  330. /* SH4 has only 2 rounding modes - round to nearest and round to zero */
  331. roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
  332. roundIncrement = 0x40;
  333. if (!roundNearestEven) {
  334. roundIncrement = 0;
  335. }
  336. roundBits = zSig & 0x7F;
  337. if (0xFD <= (bits16) zExp) {
  338. if ((0xFD < zExp)
  339. || ((zExp == 0xFD)
  340. && ((sbits32) (zSig + roundIncrement) < 0))
  341. ) {
  342. float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
  343. return packFloat32(zSign, 0xFF,
  344. 0) - (roundIncrement == 0);
  345. }
  346. if (zExp < 0) {
  347. isTiny = (zExp < -1)
  348. || (zSig + roundIncrement < 0x80000000);
  349. shift32RightJamming(zSig, -zExp, &zSig);
  350. zExp = 0;
  351. roundBits = zSig & 0x7F;
  352. if (isTiny && roundBits)
  353. float_raise(FPSCR_CAUSE_UNDERFLOW);
  354. }
  355. }
  356. if (roundBits)
  357. float_raise(FPSCR_CAUSE_INEXACT);
  358. zSig = (zSig + roundIncrement) >> 7;
  359. zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
  360. if (zSig == 0)
  361. zExp = 0;
  362. return packFloat32(zSign, zExp, zSig);
  363. }
  364. static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
  365. {
  366. int8 shiftCount;
  367. shiftCount = countLeadingZeros32(zSig) - 1;
  368. return roundAndPackFloat32(zSign, zExp - shiftCount,
  369. zSig << shiftCount);
  370. }
  371. static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
  372. {
  373. flag roundNearestEven;
  374. int16 roundIncrement, roundBits;
  375. flag isTiny;
  376. /* SH4 has only 2 rounding modes - round to nearest and round to zero */
  377. roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
  378. roundIncrement = 0x200;
  379. if (!roundNearestEven) {
  380. roundIncrement = 0;
  381. }
  382. roundBits = zSig & 0x3FF;
  383. if (0x7FD <= (bits16) zExp) {
  384. if ((0x7FD < zExp)
  385. || ((zExp == 0x7FD)
  386. && ((sbits64) (zSig + roundIncrement) < 0))
  387. ) {
  388. float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
  389. return packFloat64(zSign, 0x7FF,
  390. 0) - (roundIncrement == 0);
  391. }
  392. if (zExp < 0) {
  393. isTiny = (zExp < -1)
  394. || (zSig + roundIncrement <
  395. LIT64(0x8000000000000000));
  396. shift64RightJamming(zSig, -zExp, &zSig);
  397. zExp = 0;
  398. roundBits = zSig & 0x3FF;
  399. if (isTiny && roundBits)
  400. float_raise(FPSCR_CAUSE_UNDERFLOW);
  401. }
  402. }
  403. if (roundBits)
  404. float_raise(FPSCR_CAUSE_INEXACT);
  405. zSig = (zSig + roundIncrement) >> 10;
  406. zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
  407. if (zSig == 0)
  408. zExp = 0;
  409. return packFloat64(zSign, zExp, zSig);
  410. }
  411. static float32 subFloat32Sigs(float32 a, float32 b, flag zSign)
  412. {
  413. int16 aExp, bExp, zExp;
  414. bits32 aSig, bSig, zSig;
  415. int16 expDiff;
  416. aSig = extractFloat32Frac(a);
  417. aExp = extractFloat32Exp(a);
  418. bSig = extractFloat32Frac(b);
  419. bExp = extractFloat32Exp(b);
  420. expDiff = aExp - bExp;
  421. aSig <<= 7;
  422. bSig <<= 7;
  423. if (0 < expDiff)
  424. goto aExpBigger;
  425. if (expDiff < 0)
  426. goto bExpBigger;
  427. if (aExp == 0) {
  428. aExp = 1;
  429. bExp = 1;
  430. }
  431. if (bSig < aSig)
  432. goto aBigger;
  433. if (aSig < bSig)
  434. goto bBigger;
  435. return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
  436. bExpBigger:
  437. if (bExp == 0xFF) {
  438. return packFloat32(zSign ^ 1, 0xFF, 0);
  439. }
  440. if (aExp == 0) {
  441. ++expDiff;
  442. } else {
  443. aSig |= 0x40000000;
  444. }
  445. shift32RightJamming(aSig, -expDiff, &aSig);
  446. bSig |= 0x40000000;
  447. bBigger:
  448. zSig = bSig - aSig;
  449. zExp = bExp;
  450. zSign ^= 1;
  451. goto normalizeRoundAndPack;
  452. aExpBigger:
  453. if (aExp == 0xFF) {
  454. return a;
  455. }
  456. if (bExp == 0) {
  457. --expDiff;
  458. } else {
  459. bSig |= 0x40000000;
  460. }
  461. shift32RightJamming(bSig, expDiff, &bSig);
  462. aSig |= 0x40000000;
  463. aBigger:
  464. zSig = aSig - bSig;
  465. zExp = aExp;
  466. normalizeRoundAndPack:
  467. --zExp;
  468. return normalizeRoundAndPackFloat32(zSign, zExp, zSig);
  469. }
  470. static float32 addFloat32Sigs(float32 a, float32 b, flag zSign)
  471. {
  472. int16 aExp, bExp, zExp;
  473. bits32 aSig, bSig, zSig;
  474. int16 expDiff;
  475. aSig = extractFloat32Frac(a);
  476. aExp = extractFloat32Exp(a);
  477. bSig = extractFloat32Frac(b);
  478. bExp = extractFloat32Exp(b);
  479. expDiff = aExp - bExp;
  480. aSig <<= 6;
  481. bSig <<= 6;
  482. if (0 < expDiff) {
  483. if (aExp == 0xFF) {
  484. return a;
  485. }
  486. if (bExp == 0) {
  487. --expDiff;
  488. } else {
  489. bSig |= 0x20000000;
  490. }
  491. shift32RightJamming(bSig, expDiff, &bSig);
  492. zExp = aExp;
  493. } else if (expDiff < 0) {
  494. if (bExp == 0xFF) {
  495. return packFloat32(zSign, 0xFF, 0);
  496. }
  497. if (aExp == 0) {
  498. ++expDiff;
  499. } else {
  500. aSig |= 0x20000000;
  501. }
  502. shift32RightJamming(aSig, -expDiff, &aSig);
  503. zExp = bExp;
  504. } else {
  505. if (aExp == 0xFF) {
  506. return a;
  507. }
  508. if (aExp == 0)
  509. return packFloat32(zSign, 0, (aSig + bSig) >> 6);
  510. zSig = 0x40000000 + aSig + bSig;
  511. zExp = aExp;
  512. goto roundAndPack;
  513. }
  514. aSig |= 0x20000000;
  515. zSig = (aSig + bSig) << 1;
  516. --zExp;
  517. if ((sbits32) zSig < 0) {
  518. zSig = aSig + bSig;
  519. ++zExp;
  520. }
  521. roundAndPack:
  522. return roundAndPackFloat32(zSign, zExp, zSig);
  523. }
  524. float64 float64_sub(float64 a, float64 b)
  525. {
  526. flag aSign, bSign;
  527. aSign = extractFloat64Sign(a);
  528. bSign = extractFloat64Sign(b);
  529. if (aSign == bSign) {
  530. return subFloat64Sigs(a, b, aSign);
  531. } else {
  532. return addFloat64Sigs(a, b, aSign);
  533. }
  534. }
  535. float32 float32_sub(float32 a, float32 b)
  536. {
  537. flag aSign, bSign;
  538. aSign = extractFloat32Sign(a);
  539. bSign = extractFloat32Sign(b);
  540. if (aSign == bSign) {
  541. return subFloat32Sigs(a, b, aSign);
  542. } else {
  543. return addFloat32Sigs(a, b, aSign);
  544. }
  545. }
  546. float32 float32_add(float32 a, float32 b)
  547. {
  548. flag aSign, bSign;
  549. aSign = extractFloat32Sign(a);
  550. bSign = extractFloat32Sign(b);
  551. if (aSign == bSign) {
  552. return addFloat32Sigs(a, b, aSign);
  553. } else {
  554. return subFloat32Sigs(a, b, aSign);
  555. }
  556. }
  557. float64 float64_add(float64 a, float64 b)
  558. {
  559. flag aSign, bSign;
  560. aSign = extractFloat64Sign(a);
  561. bSign = extractFloat64Sign(b);
  562. if (aSign == bSign) {
  563. return addFloat64Sigs(a, b, aSign);
  564. } else {
  565. return subFloat64Sigs(a, b, aSign);
  566. }
  567. }
  568. static void
  569. normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
  570. {
  571. int8 shiftCount;
  572. shiftCount = countLeadingZeros64(aSig) - 11;
  573. *zSigPtr = aSig << shiftCount;
  574. *zExpPtr = 1 - shiftCount;
  575. }
  576. void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
  577. bits64 * z1Ptr)
  578. {
  579. bits64 z1;
  580. z1 = a1 + b1;
  581. *z1Ptr = z1;
  582. *z0Ptr = a0 + b0 + (z1 < a1);
  583. }
  584. void
  585. sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
  586. bits64 * z1Ptr)
  587. {
  588. *z1Ptr = a1 - b1;
  589. *z0Ptr = a0 - b0 - (a1 < b1);
  590. }
  591. static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b)
  592. {
  593. bits64 b0, b1;
  594. bits64 rem0, rem1, term0, term1;
  595. bits64 z, tmp;
  596. if (b <= a0)
  597. return LIT64(0xFFFFFFFFFFFFFFFF);
  598. b0 = b >> 32;
  599. tmp = a0;
  600. do_div(tmp, b0);
  601. z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : tmp << 32;
  602. mul64To128(b, z, &term0, &term1);
  603. sub128(a0, a1, term0, term1, &rem0, &rem1);
  604. while (((sbits64) rem0) < 0) {
  605. z -= LIT64(0x100000000);
  606. b1 = b << 32;
  607. add128(rem0, rem1, b0, b1, &rem0, &rem1);
  608. }
  609. rem0 = (rem0 << 32) | (rem1 >> 32);
  610. tmp = rem0;
  611. do_div(tmp, b0);
  612. z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : tmp;
  613. return z;
  614. }
  615. void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr)
  616. {
  617. bits32 aHigh, aLow, bHigh, bLow;
  618. bits64 z0, zMiddleA, zMiddleB, z1;
  619. aLow = a;
  620. aHigh = a >> 32;
  621. bLow = b;
  622. bHigh = b >> 32;
  623. z1 = ((bits64) aLow) * bLow;
  624. zMiddleA = ((bits64) aLow) * bHigh;
  625. zMiddleB = ((bits64) aHigh) * bLow;
  626. z0 = ((bits64) aHigh) * bHigh;
  627. zMiddleA += zMiddleB;
  628. z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32);
  629. zMiddleA <<= 32;
  630. z1 += zMiddleA;
  631. z0 += (z1 < zMiddleA);
  632. *z1Ptr = z1;
  633. *z0Ptr = z0;
  634. }
  635. static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
  636. bits32 * zSigPtr)
  637. {
  638. int8 shiftCount;
  639. shiftCount = countLeadingZeros32(aSig) - 8;
  640. *zSigPtr = aSig << shiftCount;
  641. *zExpPtr = 1 - shiftCount;
  642. }
  643. float64 float64_div(float64 a, float64 b)
  644. {
  645. flag aSign, bSign, zSign;
  646. int16 aExp, bExp, zExp;
  647. bits64 aSig, bSig, zSig;
  648. bits64 rem0, rem1;
  649. bits64 term0, term1;
  650. aSig = extractFloat64Frac(a);
  651. aExp = extractFloat64Exp(a);
  652. aSign = extractFloat64Sign(a);
  653. bSig = extractFloat64Frac(b);
  654. bExp = extractFloat64Exp(b);
  655. bSign = extractFloat64Sign(b);
  656. zSign = aSign ^ bSign;
  657. if (aExp == 0x7FF) {
  658. if (bExp == 0x7FF) {
  659. }
  660. return packFloat64(zSign, 0x7FF, 0);
  661. }
  662. if (bExp == 0x7FF) {
  663. return packFloat64(zSign, 0, 0);
  664. }
  665. if (bExp == 0) {
  666. if (bSig == 0) {
  667. if ((aExp | aSig) == 0) {
  668. float_raise(FPSCR_CAUSE_INVALID);
  669. }
  670. return packFloat64(zSign, 0x7FF, 0);
  671. }
  672. normalizeFloat64Subnormal(bSig, &bExp, &bSig);
  673. }
  674. if (aExp == 0) {
  675. if (aSig == 0)
  676. return packFloat64(zSign, 0, 0);
  677. normalizeFloat64Subnormal(aSig, &aExp, &aSig);
  678. }
  679. zExp = aExp - bExp + 0x3FD;
  680. aSig = (aSig | LIT64(0x0010000000000000)) << 10;
  681. bSig = (bSig | LIT64(0x0010000000000000)) << 11;
  682. if (bSig <= (aSig + aSig)) {
  683. aSig >>= 1;
  684. ++zExp;
  685. }
  686. zSig = estimateDiv128To64(aSig, 0, bSig);
  687. if ((zSig & 0x1FF) <= 2) {
  688. mul64To128(bSig, zSig, &term0, &term1);
  689. sub128(aSig, 0, term0, term1, &rem0, &rem1);
  690. while ((sbits64) rem0 < 0) {
  691. --zSig;
  692. add128(rem0, rem1, 0, bSig, &rem0, &rem1);
  693. }
  694. zSig |= (rem1 != 0);
  695. }
  696. return roundAndPackFloat64(zSign, zExp, zSig);
  697. }
  698. float32 float32_div(float32 a, float32 b)
  699. {
  700. flag aSign, bSign, zSign;
  701. int16 aExp, bExp, zExp;
  702. bits32 aSig, bSig;
  703. uint64_t zSig;
  704. aSig = extractFloat32Frac(a);
  705. aExp = extractFloat32Exp(a);
  706. aSign = extractFloat32Sign(a);
  707. bSig = extractFloat32Frac(b);
  708. bExp = extractFloat32Exp(b);
  709. bSign = extractFloat32Sign(b);
  710. zSign = aSign ^ bSign;
  711. if (aExp == 0xFF) {
  712. if (bExp == 0xFF) {
  713. }
  714. return packFloat32(zSign, 0xFF, 0);
  715. }
  716. if (bExp == 0xFF) {
  717. return packFloat32(zSign, 0, 0);
  718. }
  719. if (bExp == 0) {
  720. if (bSig == 0) {
  721. return packFloat32(zSign, 0xFF, 0);
  722. }
  723. normalizeFloat32Subnormal(bSig, &bExp, &bSig);
  724. }
  725. if (aExp == 0) {
  726. if (aSig == 0)
  727. return packFloat32(zSign, 0, 0);
  728. normalizeFloat32Subnormal(aSig, &aExp, &aSig);
  729. }
  730. zExp = aExp - bExp + 0x7D;
  731. aSig = (aSig | 0x00800000) << 7;
  732. bSig = (bSig | 0x00800000) << 8;
  733. if (bSig <= (aSig + aSig)) {
  734. aSig >>= 1;
  735. ++zExp;
  736. }
  737. zSig = (((bits64) aSig) << 32);
  738. do_div(zSig, bSig);
  739. if ((zSig & 0x3F) == 0) {
  740. zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32);
  741. }
  742. return roundAndPackFloat32(zSign, zExp, (bits32)zSig);
  743. }
  744. float32 float32_mul(float32 a, float32 b)
  745. {
  746. char aSign, bSign, zSign;
  747. int aExp, bExp, zExp;
  748. unsigned int aSig, bSig;
  749. unsigned long long zSig64;
  750. unsigned int zSig;
  751. aSig = extractFloat32Frac(a);
  752. aExp = extractFloat32Exp(a);
  753. aSign = extractFloat32Sign(a);
  754. bSig = extractFloat32Frac(b);
  755. bExp = extractFloat32Exp(b);
  756. bSign = extractFloat32Sign(b);
  757. zSign = aSign ^ bSign;
  758. if (aExp == 0) {
  759. if (aSig == 0)
  760. return packFloat32(zSign, 0, 0);
  761. normalizeFloat32Subnormal(aSig, &aExp, &aSig);
  762. }
  763. if (bExp == 0) {
  764. if (bSig == 0)
  765. return packFloat32(zSign, 0, 0);
  766. normalizeFloat32Subnormal(bSig, &bExp, &bSig);
  767. }
  768. if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0))
  769. return roundAndPackFloat32(zSign, 0xff, 0);
  770. zExp = aExp + bExp - 0x7F;
  771. aSig = (aSig | 0x00800000) << 7;
  772. bSig = (bSig | 0x00800000) << 8;
  773. shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64);
  774. zSig = zSig64;
  775. if (0 <= (signed int)(zSig << 1)) {
  776. zSig <<= 1;
  777. --zExp;
  778. }
  779. return roundAndPackFloat32(zSign, zExp, zSig);
  780. }
  781. float64 float64_mul(float64 a, float64 b)
  782. {
  783. char aSign, bSign, zSign;
  784. int aExp, bExp, zExp;
  785. unsigned long long int aSig, bSig, zSig0, zSig1;
  786. aSig = extractFloat64Frac(a);
  787. aExp = extractFloat64Exp(a);
  788. aSign = extractFloat64Sign(a);
  789. bSig = extractFloat64Frac(b);
  790. bExp = extractFloat64Exp(b);
  791. bSign = extractFloat64Sign(b);
  792. zSign = aSign ^ bSign;
  793. if (aExp == 0) {
  794. if (aSig == 0)
  795. return packFloat64(zSign, 0, 0);
  796. normalizeFloat64Subnormal(aSig, &aExp, &aSig);
  797. }
  798. if (bExp == 0) {
  799. if (bSig == 0)
  800. return packFloat64(zSign, 0, 0);
  801. normalizeFloat64Subnormal(bSig, &bExp, &bSig);
  802. }
  803. if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0))
  804. return roundAndPackFloat64(zSign, 0x7ff, 0);
  805. zExp = aExp + bExp - 0x3FF;
  806. aSig = (aSig | 0x0010000000000000LL) << 10;
  807. bSig = (bSig | 0x0010000000000000LL) << 11;
  808. mul64To128(aSig, bSig, &zSig0, &zSig1);
  809. zSig0 |= (zSig1 != 0);
  810. if (0 <= (signed long long int)(zSig0 << 1)) {
  811. zSig0 <<= 1;
  812. --zExp;
  813. }
  814. return roundAndPackFloat64(zSign, zExp, zSig0);
  815. }
  816. /*
  817. * -------------------------------------------------------------------------------
  818. * Returns the result of converting the double-precision floating-point value
  819. * `a' to the single-precision floating-point format. The conversion is
  820. * performed according to the IEC/IEEE Standard for Binary Floating-point
  821. * Arithmetic.
  822. * -------------------------------------------------------------------------------
  823. * */
  824. float32 float64_to_float32(float64 a)
  825. {
  826. flag aSign;
  827. int16 aExp;
  828. bits64 aSig;
  829. bits32 zSig;
  830. aSig = extractFloat64Frac( a );
  831. aExp = extractFloat64Exp( a );
  832. aSign = extractFloat64Sign( a );
  833. shift64RightJamming( aSig, 22, &aSig );
  834. zSig = aSig;
  835. if ( aExp || zSig ) {
  836. zSig |= 0x40000000;
  837. aExp -= 0x381;
  838. }
  839. return roundAndPackFloat32(aSign, aExp, zSig);
  840. }