wm_shrx.S 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. .file "wm_shrx.S"
  3. /*---------------------------------------------------------------------------+
  4. | wm_shrx.S |
  5. | |
  6. | 64 bit right shift functions |
  7. | |
  8. | Copyright (C) 1992,1995 |
  9. | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
  10. | Australia. E-mail billm@jacobi.maths.monash.edu.au |
  11. | |
  12. | Call from C as: |
  13. | unsigned FPU_shrx(void *arg1, unsigned arg2) |
  14. | and |
  15. | unsigned FPU_shrxs(void *arg1, unsigned arg2) |
  16. | |
  17. +---------------------------------------------------------------------------*/
  18. #include "fpu_emu.h"
  19. .text
  20. /*---------------------------------------------------------------------------+
  21. | unsigned FPU_shrx(void *arg1, unsigned arg2) |
  22. | |
  23. | Extended shift right function. |
  24. | Fastest for small shifts. |
  25. | Shifts the 64 bit quantity pointed to by the first arg (arg1) |
  26. | right by the number of bits specified by the second arg (arg2). |
  27. | Forms a 96 bit quantity from the 64 bit arg and eax: |
  28. | [ 64 bit arg ][ eax ] |
  29. | shift right ---------> |
  30. | The eax register is initialized to 0 before the shifting. |
  31. | Results returned in the 64 bit arg and eax. |
  32. +---------------------------------------------------------------------------*/
  33. ENTRY(FPU_shrx)
  34. push %ebp
  35. movl %esp,%ebp
  36. pushl %esi
  37. movl PARAM2,%ecx
  38. movl PARAM1,%esi
  39. cmpl $32,%ecx /* shrd only works for 0..31 bits */
  40. jnc L_more_than_31
  41. /* less than 32 bits */
  42. pushl %ebx
  43. movl (%esi),%ebx /* lsl */
  44. movl 4(%esi),%edx /* msl */
  45. xorl %eax,%eax /* extension */
  46. shrd %cl,%ebx,%eax
  47. shrd %cl,%edx,%ebx
  48. shr %cl,%edx
  49. movl %ebx,(%esi)
  50. movl %edx,4(%esi)
  51. popl %ebx
  52. popl %esi
  53. leave
  54. ret
  55. L_more_than_31:
  56. cmpl $64,%ecx
  57. jnc L_more_than_63
  58. subb $32,%cl
  59. movl (%esi),%eax /* lsl */
  60. movl 4(%esi),%edx /* msl */
  61. shrd %cl,%edx,%eax
  62. shr %cl,%edx
  63. movl %edx,(%esi)
  64. movl $0,4(%esi)
  65. popl %esi
  66. leave
  67. ret
  68. L_more_than_63:
  69. cmpl $96,%ecx
  70. jnc L_more_than_95
  71. subb $64,%cl
  72. movl 4(%esi),%eax /* msl */
  73. shr %cl,%eax
  74. xorl %edx,%edx
  75. movl %edx,(%esi)
  76. movl %edx,4(%esi)
  77. popl %esi
  78. leave
  79. ret
  80. L_more_than_95:
  81. xorl %eax,%eax
  82. movl %eax,(%esi)
  83. movl %eax,4(%esi)
  84. popl %esi
  85. leave
  86. ret
  87. ENDPROC(FPU_shrx)
  88. /*---------------------------------------------------------------------------+
  89. | unsigned FPU_shrxs(void *arg1, unsigned arg2) |
  90. | |
  91. | Extended shift right function (optimized for small floating point |
  92. | integers). |
  93. | Shifts the 64 bit quantity pointed to by the first arg (arg1) |
  94. | right by the number of bits specified by the second arg (arg2). |
  95. | Forms a 96 bit quantity from the 64 bit arg and eax: |
  96. | [ 64 bit arg ][ eax ] |
  97. | shift right ---------> |
  98. | The eax register is initialized to 0 before the shifting. |
  99. | The lower 8 bits of eax are lost and replaced by a flag which is |
  100. | set (to 0x01) if any bit, apart from the first one, is set in the |
  101. | part which has been shifted out of the arg. |
  102. | Results returned in the 64 bit arg and eax. |
  103. +---------------------------------------------------------------------------*/
  104. ENTRY(FPU_shrxs)
  105. push %ebp
  106. movl %esp,%ebp
  107. pushl %esi
  108. pushl %ebx
  109. movl PARAM2,%ecx
  110. movl PARAM1,%esi
  111. cmpl $64,%ecx /* shrd only works for 0..31 bits */
  112. jnc Ls_more_than_63
  113. cmpl $32,%ecx /* shrd only works for 0..31 bits */
  114. jc Ls_less_than_32
  115. /* We got here without jumps by assuming that the most common requirement
  116. is for small integers */
  117. /* Shift by [32..63] bits */
  118. subb $32,%cl
  119. movl (%esi),%eax /* lsl */
  120. movl 4(%esi),%edx /* msl */
  121. xorl %ebx,%ebx
  122. shrd %cl,%eax,%ebx
  123. shrd %cl,%edx,%eax
  124. shr %cl,%edx
  125. orl %ebx,%ebx /* test these 32 bits */
  126. setne %bl
  127. test $0x7fffffff,%eax /* and 31 bits here */
  128. setne %bh
  129. orw %bx,%bx /* Any of the 63 bit set ? */
  130. setne %al
  131. movl %edx,(%esi)
  132. movl $0,4(%esi)
  133. popl %ebx
  134. popl %esi
  135. leave
  136. ret
  137. /* Shift by [0..31] bits */
  138. Ls_less_than_32:
  139. movl (%esi),%ebx /* lsl */
  140. movl 4(%esi),%edx /* msl */
  141. xorl %eax,%eax /* extension */
  142. shrd %cl,%ebx,%eax
  143. shrd %cl,%edx,%ebx
  144. shr %cl,%edx
  145. test $0x7fffffff,%eax /* only need to look at eax here */
  146. setne %al
  147. movl %ebx,(%esi)
  148. movl %edx,4(%esi)
  149. popl %ebx
  150. popl %esi
  151. leave
  152. ret
  153. /* Shift by [64..95] bits */
  154. Ls_more_than_63:
  155. cmpl $96,%ecx
  156. jnc Ls_more_than_95
  157. subb $64,%cl
  158. movl (%esi),%ebx /* lsl */
  159. movl 4(%esi),%eax /* msl */
  160. xorl %edx,%edx /* extension */
  161. shrd %cl,%ebx,%edx
  162. shrd %cl,%eax,%ebx
  163. shr %cl,%eax
  164. orl %ebx,%edx
  165. setne %bl
  166. test $0x7fffffff,%eax /* only need to look at eax here */
  167. setne %bh
  168. orw %bx,%bx
  169. setne %al
  170. xorl %edx,%edx
  171. movl %edx,(%esi) /* set to zero */
  172. movl %edx,4(%esi) /* set to zero */
  173. popl %ebx
  174. popl %esi
  175. leave
  176. ret
  177. Ls_more_than_95:
  178. /* Shift by [96..inf) bits */
  179. xorl %eax,%eax
  180. movl (%esi),%ebx
  181. orl 4(%esi),%ebx
  182. setne %al
  183. xorl %ebx,%ebx
  184. movl %ebx,(%esi)
  185. movl %ebx,4(%esi)
  186. popl %ebx
  187. popl %esi
  188. leave
  189. ret
  190. ENDPROC(FPU_shrxs)