wm_sqrt.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. .file "wm_sqrt.S"
  3. /*---------------------------------------------------------------------------+
  4. | wm_sqrt.S |
  5. | |
  6. | Fixed point arithmetic square root evaluation. |
  7. | |
  8. | Copyright (C) 1992,1993,1995,1997 |
  9. | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
  10. | Australia. E-mail billm@suburbia.net |
  11. | |
  12. | Call from C as: |
  13. | int wm_sqrt(FPU_REG *n, unsigned int control_word) |
  14. | |
  15. +---------------------------------------------------------------------------*/
  16. /*---------------------------------------------------------------------------+
  17. | wm_sqrt(FPU_REG *n, unsigned int control_word) |
  18. | returns the square root of n in n. |
  19. | |
  20. | Use Newton's method to compute the square root of a number, which must |
  21. | be in the range [1.0 .. 4.0), to 64 bits accuracy. |
  22. | Does not check the sign or tag of the argument. |
  23. | Sets the exponent, but not the sign or tag of the result. |
  24. | |
  25. | The guess is kept in %esi:%edi |
  26. +---------------------------------------------------------------------------*/
  27. #include "exception.h"
  28. #include "fpu_emu.h"
  29. #ifndef NON_REENTRANT_FPU
  30. /* Local storage on the stack: */
  31. #define FPU_accum_3 -4(%ebp) /* ms word */
  32. #define FPU_accum_2 -8(%ebp)
  33. #define FPU_accum_1 -12(%ebp)
  34. #define FPU_accum_0 -16(%ebp)
  35. /*
  36. * The de-normalised argument:
  37. * sq_2 sq_1 sq_0
  38. * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
  39. * ^ binary point here
  40. */
  41. #define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */
  42. #define FPU_fsqrt_arg_1 -24(%ebp)
  43. #define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */
  44. #else
  45. /* Local storage in a static area: */
  46. .data
  47. .align 4,0
  48. FPU_accum_3:
  49. .long 0 /* ms word */
  50. FPU_accum_2:
  51. .long 0
  52. FPU_accum_1:
  53. .long 0
  54. FPU_accum_0:
  55. .long 0
  56. /* The de-normalised argument:
  57. sq_2 sq_1 sq_0
  58. b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
  59. ^ binary point here
  60. */
  61. FPU_fsqrt_arg_2:
  62. .long 0 /* ms word */
  63. FPU_fsqrt_arg_1:
  64. .long 0
  65. FPU_fsqrt_arg_0:
  66. .long 0 /* ls word, at most the ms bit is set */
  67. #endif /* NON_REENTRANT_FPU */
  68. .text
  69. ENTRY(wm_sqrt)
  70. pushl %ebp
  71. movl %esp,%ebp
  72. #ifndef NON_REENTRANT_FPU
  73. subl $28,%esp
  74. #endif /* NON_REENTRANT_FPU */
  75. pushl %esi
  76. pushl %edi
  77. pushl %ebx
  78. movl PARAM1,%esi
  79. movl SIGH(%esi),%eax
  80. movl SIGL(%esi),%ecx
  81. xorl %edx,%edx
  82. /* We use a rough linear estimate for the first guess.. */
  83. cmpw EXP_BIAS,EXP(%esi)
  84. jnz sqrt_arg_ge_2
  85. shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
  86. rcrl $1,%ecx
  87. rcrl $1,%edx
  88. sqrt_arg_ge_2:
  89. /* From here on, n is never accessed directly again until it is
  90. replaced by the answer. */
  91. movl %eax,FPU_fsqrt_arg_2 /* ms word of n */
  92. movl %ecx,FPU_fsqrt_arg_1
  93. movl %edx,FPU_fsqrt_arg_0
  94. /* Make a linear first estimate */
  95. shrl $1,%eax
  96. addl $0x40000000,%eax
  97. movl $0xaaaaaaaa,%ecx
  98. mull %ecx
  99. shll %edx /* max result was 7fff... */
  100. testl $0x80000000,%edx /* but min was 3fff... */
  101. jnz sqrt_prelim_no_adjust
  102. movl $0x80000000,%edx /* round up */
  103. sqrt_prelim_no_adjust:
  104. movl %edx,%esi /* Our first guess */
  105. /* We have now computed (approx) (2 + x) / 3, which forms the basis
  106. for a few iterations of Newton's method */
  107. movl FPU_fsqrt_arg_2,%ecx /* ms word */
  108. /*
  109. * From our initial estimate, three iterations are enough to get us
  110. * to 30 bits or so. This will then allow two iterations at better
  111. * precision to complete the process.
  112. */
  113. /* Compute (g + n/g)/2 at each iteration (g is the guess). */
  114. shrl %ecx /* Doing this first will prevent a divide */
  115. /* overflow later. */
  116. movl %ecx,%edx /* msw of the arg / 2 */
  117. divl %esi /* current estimate */
  118. shrl %esi /* divide by 2 */
  119. addl %eax,%esi /* the new estimate */
  120. movl %ecx,%edx
  121. divl %esi
  122. shrl %esi
  123. addl %eax,%esi
  124. movl %ecx,%edx
  125. divl %esi
  126. shrl %esi
  127. addl %eax,%esi
  128. /*
  129. * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
  130. * we improve it to 60 bits or so.
  131. *
  132. * The strategy from now on is to compute new estimates from
  133. * guess := guess + (n - guess^2) / (2 * guess)
  134. */
  135. /* First, find the square of the guess */
  136. movl %esi,%eax
  137. mull %esi
  138. /* guess^2 now in %edx:%eax */
  139. movl FPU_fsqrt_arg_1,%ecx
  140. subl %ecx,%eax
  141. movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */
  142. sbbl %ecx,%edx
  143. jnc sqrt_stage_2_positive
  144. /* Subtraction gives a negative result,
  145. negate the result before division. */
  146. notl %edx
  147. notl %eax
  148. addl $1,%eax
  149. adcl $0,%edx
  150. divl %esi
  151. movl %eax,%ecx
  152. movl %edx,%eax
  153. divl %esi
  154. jmp sqrt_stage_2_finish
  155. sqrt_stage_2_positive:
  156. divl %esi
  157. movl %eax,%ecx
  158. movl %edx,%eax
  159. divl %esi
  160. notl %ecx
  161. notl %eax
  162. addl $1,%eax
  163. adcl $0,%ecx
  164. sqrt_stage_2_finish:
  165. sarl $1,%ecx /* divide by 2 */
  166. rcrl $1,%eax
  167. /* Form the new estimate in %esi:%edi */
  168. movl %eax,%edi
  169. addl %ecx,%esi
  170. jnz sqrt_stage_2_done /* result should be [1..2) */
  171. #ifdef PARANOID
  172. /* It should be possible to get here only if the arg is ffff....ffff */
  173. cmp $0xffffffff,FPU_fsqrt_arg_1
  174. jnz sqrt_stage_2_error
  175. #endif /* PARANOID */
  176. /* The best rounded result. */
  177. xorl %eax,%eax
  178. decl %eax
  179. movl %eax,%edi
  180. movl %eax,%esi
  181. movl $0x7fffffff,%eax
  182. jmp sqrt_round_result
  183. #ifdef PARANOID
  184. sqrt_stage_2_error:
  185. pushl EX_INTERNAL|0x213
  186. call EXCEPTION
  187. #endif /* PARANOID */
  188. sqrt_stage_2_done:
  189. /* Now the square root has been computed to better than 60 bits. */
  190. /* Find the square of the guess. */
  191. movl %edi,%eax /* ls word of guess */
  192. mull %edi
  193. movl %edx,FPU_accum_1
  194. movl %esi,%eax
  195. mull %esi
  196. movl %edx,FPU_accum_3
  197. movl %eax,FPU_accum_2
  198. movl %edi,%eax
  199. mull %esi
  200. addl %eax,FPU_accum_1
  201. adcl %edx,FPU_accum_2
  202. adcl $0,FPU_accum_3
  203. /* movl %esi,%eax */
  204. /* mull %edi */
  205. addl %eax,FPU_accum_1
  206. adcl %edx,FPU_accum_2
  207. adcl $0,FPU_accum_3
  208. /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
  209. movl FPU_fsqrt_arg_0,%eax /* get normalized n */
  210. subl %eax,FPU_accum_1
  211. movl FPU_fsqrt_arg_1,%eax
  212. sbbl %eax,FPU_accum_2
  213. movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */
  214. sbbl %eax,FPU_accum_3
  215. jnc sqrt_stage_3_positive
  216. /* Subtraction gives a negative result,
  217. negate the result before division */
  218. notl FPU_accum_1
  219. notl FPU_accum_2
  220. notl FPU_accum_3
  221. addl $1,FPU_accum_1
  222. adcl $0,FPU_accum_2
  223. #ifdef PARANOID
  224. adcl $0,FPU_accum_3 /* This must be zero */
  225. jz sqrt_stage_3_no_error
  226. sqrt_stage_3_error:
  227. pushl EX_INTERNAL|0x207
  228. call EXCEPTION
  229. sqrt_stage_3_no_error:
  230. #endif /* PARANOID */
  231. movl FPU_accum_2,%edx
  232. movl FPU_accum_1,%eax
  233. divl %esi
  234. movl %eax,%ecx
  235. movl %edx,%eax
  236. divl %esi
  237. sarl $1,%ecx /* divide by 2 */
  238. rcrl $1,%eax
  239. /* prepare to round the result */
  240. addl %ecx,%edi
  241. adcl $0,%esi
  242. jmp sqrt_stage_3_finished
  243. sqrt_stage_3_positive:
  244. movl FPU_accum_2,%edx
  245. movl FPU_accum_1,%eax
  246. divl %esi
  247. movl %eax,%ecx
  248. movl %edx,%eax
  249. divl %esi
  250. sarl $1,%ecx /* divide by 2 */
  251. rcrl $1,%eax
  252. /* prepare to round the result */
  253. notl %eax /* Negate the correction term */
  254. notl %ecx
  255. addl $1,%eax
  256. adcl $0,%ecx /* carry here ==> correction == 0 */
  257. adcl $0xffffffff,%esi
  258. addl %ecx,%edi
  259. adcl $0,%esi
  260. sqrt_stage_3_finished:
  261. /*
  262. * The result in %esi:%edi:%esi should be good to about 90 bits here,
  263. * and the rounding information here does not have sufficient accuracy
  264. * in a few rare cases.
  265. */
  266. cmpl $0xffffffe0,%eax
  267. ja sqrt_near_exact_x
  268. cmpl $0x00000020,%eax
  269. jb sqrt_near_exact
  270. cmpl $0x7fffffe0,%eax
  271. jb sqrt_round_result
  272. cmpl $0x80000020,%eax
  273. jb sqrt_get_more_precision
  274. sqrt_round_result:
  275. /* Set up for rounding operations */
  276. movl %eax,%edx
  277. movl %esi,%eax
  278. movl %edi,%ebx
  279. movl PARAM1,%edi
  280. movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */
  281. jmp fpu_reg_round
  282. sqrt_near_exact_x:
  283. /* First, the estimate must be rounded up. */
  284. addl $1,%edi
  285. adcl $0,%esi
  286. sqrt_near_exact:
  287. /*
  288. * This is an easy case because x^1/2 is monotonic.
  289. * We need just find the square of our estimate, compare it
  290. * with the argument, and deduce whether our estimate is
  291. * above, below, or exact. We use the fact that the estimate
  292. * is known to be accurate to about 90 bits.
  293. */
  294. movl %edi,%eax /* ls word of guess */
  295. mull %edi
  296. movl %edx,%ebx /* 2nd ls word of square */
  297. movl %eax,%ecx /* ls word of square */
  298. movl %edi,%eax
  299. mull %esi
  300. addl %eax,%ebx
  301. addl %eax,%ebx
  302. #ifdef PARANOID
  303. cmp $0xffffffb0,%ebx
  304. jb sqrt_near_exact_ok
  305. cmp $0x00000050,%ebx
  306. ja sqrt_near_exact_ok
  307. pushl EX_INTERNAL|0x214
  308. call EXCEPTION
  309. sqrt_near_exact_ok:
  310. #endif /* PARANOID */
  311. or %ebx,%ebx
  312. js sqrt_near_exact_small
  313. jnz sqrt_near_exact_large
  314. or %ebx,%edx
  315. jnz sqrt_near_exact_large
  316. /* Our estimate is exactly the right answer */
  317. xorl %eax,%eax
  318. jmp sqrt_round_result
  319. sqrt_near_exact_small:
  320. /* Our estimate is too small */
  321. movl $0x000000ff,%eax
  322. jmp sqrt_round_result
  323. sqrt_near_exact_large:
  324. /* Our estimate is too large, we need to decrement it */
  325. subl $1,%edi
  326. sbbl $0,%esi
  327. movl $0xffffff00,%eax
  328. jmp sqrt_round_result
  329. sqrt_get_more_precision:
  330. /* This case is almost the same as the above, except we start
  331. with an extra bit of precision in the estimate. */
  332. stc /* The extra bit. */
  333. rcll $1,%edi /* Shift the estimate left one bit */
  334. rcll $1,%esi
  335. movl %edi,%eax /* ls word of guess */
  336. mull %edi
  337. movl %edx,%ebx /* 2nd ls word of square */
  338. movl %eax,%ecx /* ls word of square */
  339. movl %edi,%eax
  340. mull %esi
  341. addl %eax,%ebx
  342. addl %eax,%ebx
  343. /* Put our estimate back to its original value */
  344. stc /* The ms bit. */
  345. rcrl $1,%esi /* Shift the estimate left one bit */
  346. rcrl $1,%edi
  347. #ifdef PARANOID
  348. cmp $0xffffff60,%ebx
  349. jb sqrt_more_prec_ok
  350. cmp $0x000000a0,%ebx
  351. ja sqrt_more_prec_ok
  352. pushl EX_INTERNAL|0x215
  353. call EXCEPTION
  354. sqrt_more_prec_ok:
  355. #endif /* PARANOID */
  356. or %ebx,%ebx
  357. js sqrt_more_prec_small
  358. jnz sqrt_more_prec_large
  359. or %ebx,%ecx
  360. jnz sqrt_more_prec_large
  361. /* Our estimate is exactly the right answer */
  362. movl $0x80000000,%eax
  363. jmp sqrt_round_result
  364. sqrt_more_prec_small:
  365. /* Our estimate is too small */
  366. movl $0x800000ff,%eax
  367. jmp sqrt_round_result
  368. sqrt_more_prec_large:
  369. /* Our estimate is too large */
  370. movl $0x7fffff00,%eax
  371. jmp sqrt_round_result
  372. ENDPROC(wm_sqrt)