divrem.m4 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. /* $OpenBSD: divrem.m4,v 1.3 1996/10/31 00:43:17 niklas Exp $ */
  2. /* $NetBSD: divrem.m4,v 1.5 1996/10/17 04:26:25 cgd Exp $ */
  3. /*
  4. * Copyright (c) 1994, 1995 Carnegie-Mellon University.
  5. * All rights reserved.
  6. *
  7. * Author: Chris G. Demetriou
  8. *
  9. * Permission to use, copy, modify and distribute this software and
  10. * its documentation is hereby granted, provided that both the copyright
  11. * notice and this permission notice appear in all copies of the
  12. * software, derivative works or modified versions, and any portions
  13. * thereof, and that both notices appear in supporting documentation.
  14. *
  15. * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  16. * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  17. * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  18. *
  19. * Carnegie Mellon requests users of this software to return to
  20. *
  21. * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
  22. * School of Computer Science
  23. * Carnegie Mellon University
  24. * Pittsburgh PA 15213-3890
  25. *
  26. * any improvements or extensions that they make and grant Carnegie the
  27. * rights to redistribute these changes.
  28. */
  29. /*
  30. * Division and remainder.
  31. *
  32. * The use of m4 is modeled after the sparc code, but the algorithm is
  33. * simple binary long division.
  34. *
  35. * Note that the loops could probably benefit from unrolling.
  36. */
  37. /*
  38. * M4 Parameters
  39. * NAME name of function to generate
  40. * OP OP=div: t10 / t11 -> t12; OP=rem: t10 % t11 -> t12
  41. * S S=true: signed; S=false: unsigned
  42. * WORDSIZE total number of bits
  43. */
  44. define(A, `t10')
  45. define(B, `t11')
  46. define(RESULT, `t12')
  47. define(BIT, `t0')
  48. define(I, `t1')
  49. define(CC, `t2')
  50. define(T_0, `t3')
  51. ifelse(S, `true', `define(NEG, `t4')')
  52. #include <machine/asm.h>
  53. LEAF(NAME, 0) /* XXX */
  54. lda sp, -64(sp)
  55. stq BIT, 0(sp)
  56. stq I, 8(sp)
  57. stq CC, 16(sp)
  58. stq T_0, 24(sp)
  59. ifelse(S, `true',
  60. ` stq NEG, 32(sp)')
  61. stq A, 40(sp)
  62. stq B, 48(sp)
  63. mov zero, RESULT /* Initialize result to zero */
  64. ifelse(S, `true',
  65. `
  66. /* Compute sign of result. If either is negative, this is easy. */
  67. or A, B, NEG /* not the sign, but... */
  68. srl NEG, WORDSIZE - 1, NEG /* rather, or of high bits */
  69. blbc NEG, Ldoit /* neither negative? do it! */
  70. ifelse(OP, `div',
  71. ` xor A, B, NEG /* THIS is the sign! */
  72. ', ` mov A, NEG /* sign follows A. */
  73. ')
  74. srl NEG, WORDSIZE - 1, NEG /* make negation the low bit. */
  75. srl A, WORDSIZE - 1, I /* is A negative? */
  76. blbc I, LnegB /* no. */
  77. /* A is negative; flip it. */
  78. ifelse(WORDSIZE, `32', `
  79. /* top 32 bits may be random junk */
  80. zap A, 0xf0, A
  81. ')
  82. subq zero, A, A
  83. srl B, WORDSIZE - 1, I /* is B negative? */
  84. blbc I, Ldoit /* no. */
  85. LnegB:
  86. /* B is definitely negative, no matter how we got here. */
  87. ifelse(WORDSIZE, `32', `
  88. /* top 32 bits may be random junk */
  89. zap B, 0xf0, B
  90. ')
  91. subq zero, B, B
  92. Ldoit:
  93. ')
  94. ifelse(WORDSIZE, `32', `
  95. /*
  96. * Clear the top 32 bits of each operand, as they may
  97. * sign extension (if negated above), or random junk.
  98. */
  99. zap A, 0xf0, A
  100. zap B, 0xf0, B
  101. ')
  102. /* kill the special cases. */
  103. beq B, Ldotrap /* division by zero! */
  104. cmpult A, B, CC /* A < B? */
  105. /* RESULT is already zero, from above. A is untouched. */
  106. bne CC, Lret_result
  107. cmpeq A, B, CC /* A == B? */
  108. cmovne CC, 1, RESULT
  109. cmovne CC, zero, A
  110. bne CC, Lret_result
  111. /*
  112. * Find out how many bits of zeros are at the beginning of the divisor.
  113. */
  114. LBbits:
  115. ldiq T_0, 1 /* I = 0; BIT = 1<<WORDSIZE-1 */
  116. mov zero, I
  117. sll T_0, WORDSIZE-1, BIT
  118. LBloop:
  119. and B, BIT, CC /* if bit in B is set, done. */
  120. bne CC, LAbits
  121. addq I, 1, I /* increment I, shift bit */
  122. srl BIT, 1, BIT
  123. cmplt I, WORDSIZE-1, CC /* if I leaves one bit, done. */
  124. bne CC, LBloop
  125. LAbits:
  126. beq I, Ldodiv /* If I = 0, divide now. */
  127. ldiq T_0, 1 /* BIT = 1<<WORDSIZE-1 */
  128. sll T_0, WORDSIZE-1, BIT
  129. LAloop:
  130. and A, BIT, CC /* if bit in A is set, done. */
  131. bne CC, Ldodiv
  132. subq I, 1, I /* decrement I, shift bit */
  133. srl BIT, 1, BIT
  134. bne I, LAloop /* If I != 0, loop again */
  135. Ldodiv:
  136. sll B, I, B /* B <<= i */
  137. ldiq T_0, 1
  138. sll T_0, I, BIT
  139. Ldivloop:
  140. cmpult A, B, CC
  141. or RESULT, BIT, T_0
  142. cmoveq CC, T_0, RESULT
  143. subq A, B, T_0
  144. cmoveq CC, T_0, A
  145. srl BIT, 1, BIT
  146. srl B, 1, B
  147. beq A, Lret_result
  148. bne BIT, Ldivloop
  149. Lret_result:
  150. ifelse(OP, `div',
  151. `', ` mov A, RESULT
  152. ')
  153. ifelse(S, `true',
  154. `
  155. /* Check to see if we should negate it. */
  156. subqv zero, RESULT, T_0
  157. cmovlbs NEG, T_0, RESULT
  158. ')
  159. ldq BIT, 0(sp)
  160. ldq I, 8(sp)
  161. ldq CC, 16(sp)
  162. ldq T_0, 24(sp)
  163. ifelse(S, `true',
  164. ` ldq NEG, 32(sp)')
  165. ldq A, 40(sp)
  166. ldq B, 48(sp)
  167. lda sp, 64(sp)
  168. ret zero, (t9), 1
  169. Ldotrap:
  170. ldiq a0, -2 /* This is the signal to SIGFPE! */
  171. call_pal PAL_gentrap
  172. ifelse(OP, `div',
  173. `', ` mov zero, A /* so that zero will be returned */
  174. ')
  175. br zero, Lret_result
  176. END(NAME)