div_Xsig.S 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. .file "div_Xsig.S"
  3. /*---------------------------------------------------------------------------+
  4. | div_Xsig.S |
  5. | |
  6. | Division subroutine for 96 bit quantities |
  7. | |
  8. | Copyright (C) 1994,1995 |
  9. | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
  10. | Australia. E-mail billm@jacobi.maths.monash.edu.au |
  11. | |
  12. | |
  13. +---------------------------------------------------------------------------*/
  14. /*---------------------------------------------------------------------------+
  15. | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and |
  16. | put the 96 bit result at the location d. |
  17. | |
  18. | The result may not be accurate to 96 bits. It is intended for use where |
  19. | a result better than 64 bits is required. The result should usually be |
  20. | good to at least 94 bits. |
  21. | The returned result is actually divided by one half. This is done to |
  22. | prevent overflow. |
  23. | |
  24. | .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd |
  25. | |
  26. | void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) |
  27. | |
  28. +---------------------------------------------------------------------------*/
  29. #include "exception.h"
  30. #include "fpu_emu.h"
  31. #define XsigLL(x) (x)
  32. #define XsigL(x) 4(x)
  33. #define XsigH(x) 8(x)
  34. #ifndef NON_REENTRANT_FPU
  35. /*
  36. Local storage on the stack:
  37. Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
  38. */
  39. #define FPU_accum_3 -4(%ebp)
  40. #define FPU_accum_2 -8(%ebp)
  41. #define FPU_accum_1 -12(%ebp)
  42. #define FPU_accum_0 -16(%ebp)
  43. #define FPU_result_3 -20(%ebp)
  44. #define FPU_result_2 -24(%ebp)
  45. #define FPU_result_1 -28(%ebp)
  46. #else
  47. .data
  48. /*
  49. Local storage in a static area:
  50. Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
  51. */
  52. .align 4,0
  53. FPU_accum_3:
  54. .long 0
  55. FPU_accum_2:
  56. .long 0
  57. FPU_accum_1:
  58. .long 0
  59. FPU_accum_0:
  60. .long 0
  61. FPU_result_3:
  62. .long 0
  63. FPU_result_2:
  64. .long 0
  65. FPU_result_1:
  66. .long 0
  67. #endif /* NON_REENTRANT_FPU */
  68. .text
  69. ENTRY(div_Xsig)
  70. pushl %ebp
  71. movl %esp,%ebp
  72. #ifndef NON_REENTRANT_FPU
  73. subl $28,%esp
  74. #endif /* NON_REENTRANT_FPU */
  75. pushl %esi
  76. pushl %edi
  77. pushl %ebx
  78. movl PARAM1,%esi /* pointer to num */
  79. movl PARAM2,%ebx /* pointer to denom */
  80. #ifdef PARANOID
  81. testl $0x80000000, XsigH(%ebx) /* Divisor */
  82. je L_bugged
  83. #endif /* PARANOID */
  84. /*---------------------------------------------------------------------------+
  85. | Divide: Return arg1/arg2 to arg3. |
  86. | |
  87. | The maximum returned value is (ignoring exponents) |
  88. | .ffffffff ffffffff |
  89. | ------------------ = 1.ffffffff fffffffe |
  90. | .80000000 00000000 |
  91. | and the minimum is |
  92. | .80000000 00000000 |
  93. | ------------------ = .80000000 00000001 (rounded) |
  94. | .ffffffff ffffffff |
  95. | |
  96. +---------------------------------------------------------------------------*/
  97. /* Save extended dividend in local register */
  98. /* Divide by 2 to prevent overflow */
  99. clc
  100. movl XsigH(%esi),%eax
  101. rcrl %eax
  102. movl %eax,FPU_accum_3
  103. movl XsigL(%esi),%eax
  104. rcrl %eax
  105. movl %eax,FPU_accum_2
  106. movl XsigLL(%esi),%eax
  107. rcrl %eax
  108. movl %eax,FPU_accum_1
  109. movl $0,%eax
  110. rcrl %eax
  111. movl %eax,FPU_accum_0
  112. movl FPU_accum_2,%eax /* Get the current num */
  113. movl FPU_accum_3,%edx
  114. /*----------------------------------------------------------------------*/
  115. /* Initialization done.
  116. Do the first 32 bits. */
  117. /* We will divide by a number which is too large */
  118. movl XsigH(%ebx),%ecx
  119. addl $1,%ecx
  120. jnc LFirst_div_not_1
  121. /* here we need to divide by 100000000h,
  122. i.e., no division at all.. */
  123. mov %edx,%eax
  124. jmp LFirst_div_done
  125. LFirst_div_not_1:
  126. divl %ecx /* Divide the numerator by the augmented
  127. denom ms dw */
  128. LFirst_div_done:
  129. movl %eax,FPU_result_3 /* Put the result in the answer */
  130. mull XsigH(%ebx) /* mul by the ms dw of the denom */
  131. subl %eax,FPU_accum_2 /* Subtract from the num local reg */
  132. sbbl %edx,FPU_accum_3
  133. movl FPU_result_3,%eax /* Get the result back */
  134. mull XsigL(%ebx) /* now mul the ls dw of the denom */
  135. subl %eax,FPU_accum_1 /* Subtract from the num local reg */
  136. sbbl %edx,FPU_accum_2
  137. sbbl $0,FPU_accum_3
  138. je LDo_2nd_32_bits /* Must check for non-zero result here */
  139. #ifdef PARANOID
  140. jb L_bugged_1
  141. #endif /* PARANOID */
  142. /* need to subtract another once of the denom */
  143. incl FPU_result_3 /* Correct the answer */
  144. movl XsigL(%ebx),%eax
  145. movl XsigH(%ebx),%edx
  146. subl %eax,FPU_accum_1 /* Subtract from the num local reg */
  147. sbbl %edx,FPU_accum_2
  148. #ifdef PARANOID
  149. sbbl $0,FPU_accum_3
  150. jne L_bugged_1 /* Must check for non-zero result here */
  151. #endif /* PARANOID */
  152. /*----------------------------------------------------------------------*/
  153. /* Half of the main problem is done, there is just a reduced numerator
  154. to handle now.
  155. Work with the second 32 bits, FPU_accum_0 not used from now on */
  156. LDo_2nd_32_bits:
  157. movl FPU_accum_2,%edx /* get the reduced num */
  158. movl FPU_accum_1,%eax
  159. /* need to check for possible subsequent overflow */
  160. cmpl XsigH(%ebx),%edx
  161. jb LDo_2nd_div
  162. ja LPrevent_2nd_overflow
  163. cmpl XsigL(%ebx),%eax
  164. jb LDo_2nd_div
  165. LPrevent_2nd_overflow:
  166. /* The numerator is greater or equal, would cause overflow */
  167. /* prevent overflow */
  168. subl XsigL(%ebx),%eax
  169. sbbl XsigH(%ebx),%edx
  170. movl %edx,FPU_accum_2
  171. movl %eax,FPU_accum_1
  172. incl FPU_result_3 /* Reflect the subtraction in the answer */
  173. #ifdef PARANOID
  174. je L_bugged_2 /* Can't bump the result to 1.0 */
  175. #endif /* PARANOID */
  176. LDo_2nd_div:
  177. cmpl $0,%ecx /* augmented denom msw */
  178. jnz LSecond_div_not_1
  179. /* %ecx == 0, we are dividing by 1.0 */
  180. mov %edx,%eax
  181. jmp LSecond_div_done
  182. LSecond_div_not_1:
  183. divl %ecx /* Divide the numerator by the denom ms dw */
  184. LSecond_div_done:
  185. movl %eax,FPU_result_2 /* Put the result in the answer */
  186. mull XsigH(%ebx) /* mul by the ms dw of the denom */
  187. subl %eax,FPU_accum_1 /* Subtract from the num local reg */
  188. sbbl %edx,FPU_accum_2
  189. #ifdef PARANOID
  190. jc L_bugged_2
  191. #endif /* PARANOID */
  192. movl FPU_result_2,%eax /* Get the result back */
  193. mull XsigL(%ebx) /* now mul the ls dw of the denom */
  194. subl %eax,FPU_accum_0 /* Subtract from the num local reg */
  195. sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */
  196. sbbl $0,FPU_accum_2
  197. #ifdef PARANOID
  198. jc L_bugged_2
  199. #endif /* PARANOID */
  200. jz LDo_3rd_32_bits
  201. #ifdef PARANOID
  202. cmpl $1,FPU_accum_2
  203. jne L_bugged_2
  204. #endif /* PARANOID */
  205. /* need to subtract another once of the denom */
  206. movl XsigL(%ebx),%eax
  207. movl XsigH(%ebx),%edx
  208. subl %eax,FPU_accum_0 /* Subtract from the num local reg */
  209. sbbl %edx,FPU_accum_1
  210. sbbl $0,FPU_accum_2
  211. #ifdef PARANOID
  212. jc L_bugged_2
  213. jne L_bugged_2
  214. #endif /* PARANOID */
  215. addl $1,FPU_result_2 /* Correct the answer */
  216. adcl $0,FPU_result_3
  217. #ifdef PARANOID
  218. jc L_bugged_2 /* Must check for non-zero result here */
  219. #endif /* PARANOID */
  220. /*----------------------------------------------------------------------*/
  221. /* The division is essentially finished here, we just need to perform
  222. tidying operations.
  223. Deal with the 3rd 32 bits */
  224. LDo_3rd_32_bits:
  225. /* We use an approximation for the third 32 bits.
  226. To take account of the 3rd 32 bits of the divisor
  227. (call them del), we subtract del * (a/b) */
  228. movl FPU_result_3,%eax /* a/b */
  229. mull XsigLL(%ebx) /* del */
  230. subl %edx,FPU_accum_1
  231. /* A borrow indicates that the result is negative */
  232. jnb LTest_over
  233. movl XsigH(%ebx),%edx
  234. addl %edx,FPU_accum_1
  235. subl $1,FPU_result_2 /* Adjust the answer */
  236. sbbl $0,FPU_result_3
  237. /* The above addition might not have been enough, check again. */
  238. movl FPU_accum_1,%edx /* get the reduced num */
  239. cmpl XsigH(%ebx),%edx /* denom */
  240. jb LDo_3rd_div
  241. movl XsigH(%ebx),%edx
  242. addl %edx,FPU_accum_1
  243. subl $1,FPU_result_2 /* Adjust the answer */
  244. sbbl $0,FPU_result_3
  245. jmp LDo_3rd_div
  246. LTest_over:
  247. movl FPU_accum_1,%edx /* get the reduced num */
  248. /* need to check for possible subsequent overflow */
  249. cmpl XsigH(%ebx),%edx /* denom */
  250. jb LDo_3rd_div
  251. /* prevent overflow */
  252. subl XsigH(%ebx),%edx
  253. movl %edx,FPU_accum_1
  254. addl $1,FPU_result_2 /* Reflect the subtraction in the answer */
  255. adcl $0,FPU_result_3
  256. LDo_3rd_div:
  257. movl FPU_accum_0,%eax
  258. movl FPU_accum_1,%edx
  259. divl XsigH(%ebx)
  260. movl %eax,FPU_result_1 /* Rough estimate of third word */
  261. movl PARAM3,%esi /* pointer to answer */
  262. movl FPU_result_1,%eax
  263. movl %eax,XsigLL(%esi)
  264. movl FPU_result_2,%eax
  265. movl %eax,XsigL(%esi)
  266. movl FPU_result_3,%eax
  267. movl %eax,XsigH(%esi)
  268. L_exit:
  269. popl %ebx
  270. popl %edi
  271. popl %esi
  272. leave
  273. ret
  274. #ifdef PARANOID
  275. /* The logic is wrong if we got here */
  276. L_bugged:
  277. pushl EX_INTERNAL|0x240
  278. call EXCEPTION
  279. pop %ebx
  280. jmp L_exit
  281. L_bugged_1:
  282. pushl EX_INTERNAL|0x241
  283. call EXCEPTION
  284. pop %ebx
  285. jmp L_exit
  286. L_bugged_2:
  287. pushl EX_INTERNAL|0x242
  288. call EXCEPTION
  289. pop %ebx
  290. jmp L_exit
  291. #endif /* PARANOID */
  292. ENDPROC(div_Xsig)