__reml.S 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. /* $OpenBSD: __reml.S,v 1.1 2007/11/25 18:25:34 deraadt Exp $ */
  2. /* $NetBSD: divrem.m4,v 1.5 1996/10/17 04:26:25 cgd Exp $ */
  3. /*
  4. * Copyright (c) 1994, 1995 Carnegie-Mellon University.
  5. * All rights reserved.
  6. *
  7. * Author: Chris G. Demetriou
  8. *
  9. * Permission to use, copy, modify and distribute this software and
  10. * its documentation is hereby granted, provided that both the copyright
  11. * notice and this permission notice appear in all copies of the
  12. * software, derivative works or modified versions, and any portions
  13. * thereof, and that both notices appear in supporting documentation.
  14. *
  15. * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  16. * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  17. * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  18. *
  19. * Carnegie Mellon requests users of this software to return to
  20. *
  21. * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
  22. * School of Computer Science
  23. * Carnegie Mellon University
  24. * Pittsburgh PA 15213-3890
  25. *
  26. * any improvements or extensions that they make and grant Carnegie the
  27. * rights to redistribute these changes.
  28. */
  29. /*
  30. * Division and remainder.
  31. *
  32. * The use of m4 is modeled after the sparc code, but the algorithm is
  33. * simple binary long division.
  34. *
  35. * Note that the loops could probably benefit from unrolling.
  36. */
  37. /*
  38. * M4 Parameters
  39. * __reml name of function to generate
  40. * rem rem=div: t10 / t11 -> t12; rem=rem: t10 % t11 -> t12
  41. * true true=true: signed; true=false: unsigned
  42. * 32 total number of bits
  43. */
  44. #include <machine/asm.h>
  45. LEAF(__reml, 0) /* XXX */
  46. lda sp, -64(sp)
  47. stq t0, 0(sp)
  48. stq t1, 8(sp)
  49. stq t2, 16(sp)
  50. stq t3, 24(sp)
  51. stq t4, 32(sp)
  52. stq t10, 40(sp)
  53. stq t11, 48(sp)
  54. mov zero, t12 /* Initialize result to zero */
  55. /* Compute sign of result. If either is negative, this is easy. */
  56. or t10, t11, t4 /* not the sign, but... */
  57. srl t4, 32 - 1, t4 /* rather, or of high bits */
  58. blbc t4, Ldoit /* neither negative? do it! */
  59. mov t10, t4 /* sign follows t10. */
  60. srl t4, 32 - 1, t4 /* make negation the low bit. */
  61. srl t10, 32 - 1, t1 /* is t10 negative? */
  62. blbc t1, LnegB /* no. */
  63. /* t10 is negative; flip it. */
  64. /* top 32 bits may be random junk */
  65. zap t10, 0xf0, t10
  66. subq zero, t10, t10
  67. srl t11, 32 - 1, t1 /* is t11 negative? */
  68. blbc t1, Ldoit /* no. */
  69. LnegB:
  70. /* t11 is definitely negative, no matter how we got here. */
  71. /* top 32 bits may be random junk */
  72. zap t11, 0xf0, t11
  73. subq zero, t11, t11
  74. Ldoit:
  75. /*
  76. * Clear the top 32 bits of each operand, as they may
  77. * sign extension (if negated above), or random junk.
  78. */
  79. zap t10, 0xf0, t10
  80. zap t11, 0xf0, t11
  81. /* kill the special cases. */
  82. beq t11, Ldotrap /* division by zero! */
  83. cmpult t10, t11, t2 /* t10 < t11? */
  84. /* t12 is already zero, from above. t10 is untouched. */
  85. bne t2, Lret_result
  86. cmpeq t10, t11, t2 /* t10 == t11? */
  87. cmovne t2, 1, t12
  88. cmovne t2, zero, t10
  89. bne t2, Lret_result
  90. /*
  91. * Find out how many bits of zeros are at the beginning of the divisor.
  92. */
  93. LBbits:
  94. ldiq t3, 1 /* t1 = 0; t0 = 1<<32-1 */
  95. mov zero, t1
  96. sll t3, 32-1, t0
  97. LBloop:
  98. and t11, t0, t2 /* if bit in t11 is set, done. */
  99. bne t2, LAbits
  100. addq t1, 1, t1 /* increment t1, bit */
  101. srl t0, 1, t0
  102. cmplt t1, 32-1, t2 /* if t1 leaves one bit, done. */
  103. bne t2, LBloop
  104. LAbits:
  105. beq t1, Ldodiv /* If t1 = 0, divide now. */
  106. ldiq t3, 1 /* t0 = 1<<32-1 */
  107. sll t3, 32-1, t0
  108. LAloop:
  109. and t10, t0, t2 /* if bit in t10 is set, done. */
  110. bne t2, Ldodiv
  111. subq t1, 1, t1 /* decrement t1, bit */
  112. srl t0, 1, t0
  113. bne t1, LAloop /* If t1 != 0, loop again */
  114. Ldodiv:
  115. sll t11, t1, t11 /* t11 <<= i */
  116. ldiq t3, 1
  117. sll t3, t1, t0
  118. Ldivloop:
  119. cmpult t10, t11, t2
  120. or t12, t0, t3
  121. cmoveq t2, t3, t12
  122. subq t10, t11, t3
  123. cmoveq t2, t3, t10
  124. srl t0, 1, t0
  125. srl t11, 1, t11
  126. beq t10, Lret_result
  127. bne t0, Ldivloop
  128. Lret_result:
  129. mov t10, t12
  130. /* Check to see if we should negate it. */
  131. subqv zero, t12, t3
  132. cmovlbs t4, t3, t12
  133. ldq t0, 0(sp)
  134. ldq t1, 8(sp)
  135. ldq t2, 16(sp)
  136. ldq t3, 24(sp)
  137. ldq t4, 32(sp)
  138. ldq t10, 40(sp)
  139. ldq t11, 48(sp)
  140. lda sp, 64(sp)
  141. ret zero, (t9), 1
  142. Ldotrap:
  143. ldiq a0, -2 /* This is the signal to SIGFPE! */
  144. call_pal PAL_gentrap
  145. mov zero, t10 /* so that zero will be returned */
  146. br zero, Lret_result
  147. END(__reml)