satan.S 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. |
  2. | satan.sa 3.3 12/19/90
  3. |
  4. | The entry point satan computes the arctangent of an
  5. | input value. satand does the same except the input value is a
  6. | denormalized number.
  7. |
  8. | Input: Double-extended value in memory location pointed to by address
  9. | register a0.
  10. |
  11. | Output: Arctan(X) returned in floating-point register Fp0.
  12. |
  13. | Accuracy and Monotonicity: The returned result is within 2 ulps in
  14. | 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
  15. | result is subsequently rounded to double precision. The
  16. | result is provably monotonic in double precision.
  17. |
  18. | Speed: The program satan takes approximately 160 cycles for input
  19. | argument X such that 1/16 < |X| < 16. For the other arguments,
  20. | the program will run no worse than 10% slower.
  21. |
  22. | Algorithm:
  23. | Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.
  24. |
  25. | Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3.
  26. | Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits
  27. | of X with a bit-1 attached at the 6-th bit position. Define u
  28. | to be u = (X-F) / (1 + X*F).
  29. |
  30. | Step 3. Approximate arctan(u) by a polynomial poly.
  31. |
  32. | Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values
  33. | calculated beforehand. Exit.
  34. |
  35. | Step 5. If |X| >= 16, go to Step 7.
  36. |
  37. | Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.
  38. |
  39. | Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'.
  40. | Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.
  41. |
  42. | Copyright (C) Motorola, Inc. 1990
  43. | All Rights Reserved
  44. |
  45. | For details on the license for this file, please see the
  46. | file, README, in this same directory.
  47. |satan idnt 2,1 | Motorola 040 Floating Point Software Package
  48. |section 8
  49. #include "fpsp.h"
  50. BOUNDS1: .long 0x3FFB8000,0x4002FFFF
  51. ONE: .long 0x3F800000
  52. .long 0x00000000
  53. ATANA3: .long 0xBFF6687E,0x314987D8
  54. ATANA2: .long 0x4002AC69,0x34A26DB3
  55. ATANA1: .long 0xBFC2476F,0x4E1DA28E
  56. ATANB6: .long 0x3FB34444,0x7F876989
  57. ATANB5: .long 0xBFB744EE,0x7FAF45DB
  58. ATANB4: .long 0x3FBC71C6,0x46940220
  59. ATANB3: .long 0xBFC24924,0x921872F9
  60. ATANB2: .long 0x3FC99999,0x99998FA9
  61. ATANB1: .long 0xBFD55555,0x55555555
  62. ATANC5: .long 0xBFB70BF3,0x98539E6A
  63. ATANC4: .long 0x3FBC7187,0x962D1D7D
  64. ATANC3: .long 0xBFC24924,0x827107B8
  65. ATANC2: .long 0x3FC99999,0x9996263E
  66. ATANC1: .long 0xBFD55555,0x55555536
  67. PPIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
  68. NPIBY2: .long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
  69. PTINY: .long 0x00010000,0x80000000,0x00000000,0x00000000
  70. NTINY: .long 0x80010000,0x80000000,0x00000000,0x00000000
  71. ATANTBL:
  72. .long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
  73. .long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
  74. .long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
  75. .long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
  76. .long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
  77. .long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000
  78. .long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
  79. .long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
  80. .long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
  81. .long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
  82. .long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
  83. .long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
  84. .long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
  85. .long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
  86. .long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
  87. .long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
  88. .long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
  89. .long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
  90. .long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
  91. .long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
  92. .long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
  93. .long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
  94. .long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
  95. .long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
  96. .long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
  97. .long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
  98. .long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
  99. .long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
  100. .long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
  101. .long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
  102. .long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
  103. .long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
  104. .long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
  105. .long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
  106. .long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
  107. .long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
  108. .long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
  109. .long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
  110. .long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
  111. .long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
  112. .long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
  113. .long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
  114. .long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
  115. .long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
  116. .long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
  117. .long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
  118. .long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
  119. .long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
  120. .long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
  121. .long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
  122. .long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
  123. .long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
  124. .long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
  125. .long 0x3FFE0000,0x97731420,0x365E538C,0x00000000
  126. .long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
  127. .long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
  128. .long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
  129. .long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
  130. .long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
  131. .long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
  132. .long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
  133. .long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
  134. .long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
  135. .long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
  136. .long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
  137. .long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
  138. .long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
  139. .long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
  140. .long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000
  141. .long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
  142. .long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
  143. .long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
  144. .long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
  145. .long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
  146. .long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
  147. .long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
  148. .long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
  149. .long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
  150. .long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
  151. .long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
  152. .long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
  153. .long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
  154. .long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
  155. .long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
  156. .long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
  157. .long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
  158. .long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
  159. .long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000
  160. .long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
  161. .long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
  162. .long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
  163. .long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
  164. .long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
  165. .long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
  166. .long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
  167. .long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
  168. .long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
  169. .long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
  170. .long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
  171. .long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
  172. .long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
  173. .long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
  174. .long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
  175. .long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
  176. .long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
  177. .long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000
  178. .long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
  179. .long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
  180. .long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
  181. .long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
  182. .long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
  183. .long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
  184. .long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
  185. .long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
  186. .long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
  187. .long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
  188. .long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
  189. .long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
  190. .long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
  191. .long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
  192. .long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
  193. .long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
  194. .long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
  195. .long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
  196. .long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
  197. .long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
  198. .long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
  199. .long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
  200. .set X,FP_SCR1
  201. .set XDCARE,X+2
  202. .set XFRAC,X+4
  203. .set XFRACLO,X+8
  204. .set ATANF,FP_SCR2
  205. .set ATANFHI,ATANF+4
  206. .set ATANFLO,ATANF+8
  207. | xref t_frcinx
  208. |xref t_extdnrm
  209. .global satand
  210. satand:
  211. |--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
  212. bra t_extdnrm
  213. .global satan
  214. satan:
  215. |--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
  216. fmovex (%a0),%fp0 | ...LOAD INPUT
  217. movel (%a0),%d0
  218. movew 4(%a0),%d0
  219. fmovex %fp0,X(%a6)
  220. andil #0x7FFFFFFF,%d0
  221. cmpil #0x3FFB8000,%d0 | ...|X| >= 1/16?
  222. bges ATANOK1
  223. bra ATANSM
  224. ATANOK1:
  225. cmpil #0x4002FFFF,%d0 | ...|X| < 16 ?
  226. bles ATANMAIN
  227. bra ATANBIG
  228. |--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
  229. |--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
  230. |--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
  231. |--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
  232. |--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
  233. |--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
  234. |--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
  235. |--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
  236. |--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
  237. |--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
  238. |--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
  239. |--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
  240. |--WILL INVOLVE A VERY LONG POLYNOMIAL.
  241. |--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
  242. |--WE CHOSE F TO BE +-2^K * 1.BBBB1
  243. |--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
  244. |--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
  245. |--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
  246. |-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
  247. ATANMAIN:
  248. movew #0x0000,XDCARE(%a6) | ...CLEAN UP X JUST IN CASE
  249. andil #0xF8000000,XFRAC(%a6) | ...FIRST 5 BITS
  250. oril #0x04000000,XFRAC(%a6) | ...SET 6-TH BIT TO 1
  251. movel #0x00000000,XFRACLO(%a6) | ...LOCATION OF X IS NOW F
  252. fmovex %fp0,%fp1 | ...FP1 IS X
  253. fmulx X(%a6),%fp1 | ...FP1 IS X*F, NOTE THAT X*F > 0
  254. fsubx X(%a6),%fp0 | ...FP0 IS X-F
  255. fadds #0x3F800000,%fp1 | ...FP1 IS 1 + X*F
  256. fdivx %fp1,%fp0 | ...FP0 IS U = (X-F)/(1+X*F)
  257. |--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
  258. |--CREATE ATAN(F) AND STORE IT IN ATANF, AND
  259. |--SAVE REGISTERS FP2.
  260. movel %d2,-(%a7) | ...SAVE d2 TEMPORARILY
  261. movel %d0,%d2 | ...THE EXPO AND 16 BITS OF X
  262. andil #0x00007800,%d0 | ...4 VARYING BITS OF F'S FRACTION
  263. andil #0x7FFF0000,%d2 | ...EXPONENT OF F
  264. subil #0x3FFB0000,%d2 | ...K+4
  265. asrl #1,%d2
  266. addl %d2,%d0 | ...THE 7 BITS IDENTIFYING F
  267. asrl #7,%d0 | ...INDEX INTO TBL OF ATAN(|F|)
  268. lea ATANTBL,%a1
  269. addal %d0,%a1 | ...ADDRESS OF ATAN(|F|)
  270. movel (%a1)+,ATANF(%a6)
  271. movel (%a1)+,ATANFHI(%a6)
  272. movel (%a1)+,ATANFLO(%a6) | ...ATANF IS NOW ATAN(|F|)
  273. movel X(%a6),%d0 | ...LOAD SIGN AND EXPO. AGAIN
  274. andil #0x80000000,%d0 | ...SIGN(F)
  275. orl %d0,ATANF(%a6) | ...ATANF IS NOW SIGN(F)*ATAN(|F|)
  276. movel (%a7)+,%d2 | ...RESTORE d2
  277. |--THAT'S ALL I HAVE TO DO FOR NOW,
  278. |--BUT ALAS, THE DIVIDE IS STILL CRANKING!
  279. |--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
  280. |--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
  281. |--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
  282. |--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
  283. |--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.
  284. |--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
  285. |--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
  286. fmovex %fp0,%fp1
  287. fmulx %fp1,%fp1
  288. fmoved ATANA3,%fp2
  289. faddx %fp1,%fp2 | ...A3+V
  290. fmulx %fp1,%fp2 | ...V*(A3+V)
  291. fmulx %fp0,%fp1 | ...U*V
  292. faddd ATANA2,%fp2 | ...A2+V*(A3+V)
  293. fmuld ATANA1,%fp1 | ...A1*U*V
  294. fmulx %fp2,%fp1 | ...A1*U*V*(A2+V*(A3+V))
  295. faddx %fp1,%fp0 | ...ATAN(U), FP1 RELEASED
  296. fmovel %d1,%FPCR |restore users exceptions
  297. faddx ATANF(%a6),%fp0 | ...ATAN(X)
  298. bra t_frcinx
  299. ATANBORS:
  300. |--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
  301. |--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
  302. cmpil #0x3FFF8000,%d0
  303. bgt ATANBIG | ...I.E. |X| >= 16
  304. ATANSM:
  305. |--|X| <= 1/16
  306. |--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
  307. |--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
  308. |--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
  309. |--WHERE Y = X*X, AND Z = Y*Y.
  310. cmpil #0x3FD78000,%d0
  311. blt ATANTINY
  312. |--COMPUTE POLYNOMIAL
  313. fmulx %fp0,%fp0 | ...FP0 IS Y = X*X
  314. movew #0x0000,XDCARE(%a6)
  315. fmovex %fp0,%fp1
  316. fmulx %fp1,%fp1 | ...FP1 IS Z = Y*Y
  317. fmoved ATANB6,%fp2
  318. fmoved ATANB5,%fp3
  319. fmulx %fp1,%fp2 | ...Z*B6
  320. fmulx %fp1,%fp3 | ...Z*B5
  321. faddd ATANB4,%fp2 | ...B4+Z*B6
  322. faddd ATANB3,%fp3 | ...B3+Z*B5
  323. fmulx %fp1,%fp2 | ...Z*(B4+Z*B6)
  324. fmulx %fp3,%fp1 | ...Z*(B3+Z*B5)
  325. faddd ATANB2,%fp2 | ...B2+Z*(B4+Z*B6)
  326. faddd ATANB1,%fp1 | ...B1+Z*(B3+Z*B5)
  327. fmulx %fp0,%fp2 | ...Y*(B2+Z*(B4+Z*B6))
  328. fmulx X(%a6),%fp0 | ...X*Y
  329. faddx %fp2,%fp1 | ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
  330. fmulx %fp1,%fp0 | ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
  331. fmovel %d1,%FPCR |restore users exceptions
  332. faddx X(%a6),%fp0
  333. bra t_frcinx
  334. ATANTINY:
  335. |--|X| < 2^(-40), ATAN(X) = X
  336. movew #0x0000,XDCARE(%a6)
  337. fmovel %d1,%FPCR |restore users exceptions
  338. fmovex X(%a6),%fp0 |last inst - possible exception set
  339. bra t_frcinx
  340. ATANBIG:
  341. |--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE,
  342. |--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
  343. cmpil #0x40638000,%d0
  344. bgt ATANHUGE
  345. |--APPROXIMATE ATAN(-1/X) BY
  346. |--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
  347. |--THIS CAN BE RE-WRITTEN AS
  348. |--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
  349. fmoves #0xBF800000,%fp1 | ...LOAD -1
  350. fdivx %fp0,%fp1 | ...FP1 IS -1/X
  351. |--DIVIDE IS STILL CRANKING
  352. fmovex %fp1,%fp0 | ...FP0 IS X'
  353. fmulx %fp0,%fp0 | ...FP0 IS Y = X'*X'
  354. fmovex %fp1,X(%a6) | ...X IS REALLY X'
  355. fmovex %fp0,%fp1
  356. fmulx %fp1,%fp1 | ...FP1 IS Z = Y*Y
  357. fmoved ATANC5,%fp3
  358. fmoved ATANC4,%fp2
  359. fmulx %fp1,%fp3 | ...Z*C5
  360. fmulx %fp1,%fp2 | ...Z*B4
  361. faddd ATANC3,%fp3 | ...C3+Z*C5
  362. faddd ATANC2,%fp2 | ...C2+Z*C4
  363. fmulx %fp3,%fp1 | ...Z*(C3+Z*C5), FP3 RELEASED
  364. fmulx %fp0,%fp2 | ...Y*(C2+Z*C4)
  365. faddd ATANC1,%fp1 | ...C1+Z*(C3+Z*C5)
  366. fmulx X(%a6),%fp0 | ...X'*Y
  367. faddx %fp2,%fp1 | ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
  368. fmulx %fp1,%fp0 | ...X'*Y*([B1+Z*(B3+Z*B5)]
  369. | ... +[Y*(B2+Z*(B4+Z*B6))])
  370. faddx X(%a6),%fp0
  371. fmovel %d1,%FPCR |restore users exceptions
  372. btstb #7,(%a0)
  373. beqs pos_big
  374. neg_big:
  375. faddx NPIBY2,%fp0
  376. bra t_frcinx
  377. pos_big:
  378. faddx PPIBY2,%fp0
  379. bra t_frcinx
  380. ATANHUGE:
  381. |--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
  382. btstb #7,(%a0)
  383. beqs pos_huge
  384. neg_huge:
  385. fmovex NPIBY2,%fp0
  386. fmovel %d1,%fpcr
  387. fsubx NTINY,%fp0
  388. bra t_frcinx
  389. pos_huge:
  390. fmovex PPIBY2,%fp0
  391. fmovel %d1,%fpcr
  392. fsubx PTINY,%fp0
  393. bra t_frcinx
  394. |end