123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193 |
- /* $OpenBSD: __reml.S,v 1.1 2007/11/25 18:25:34 deraadt Exp $ */
- /* $NetBSD: divrem.m4,v 1.5 1996/10/17 04:26:25 cgd Exp $ */
- /*
- * Copyright (c) 1994, 1995 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Author: Chris G. Demetriou
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
- /*
- * Division and remainder.
- *
- * The use of m4 is modeled after the sparc code, but the algorithm is
- * simple binary long division.
- *
- * Note that the loops could probably benefit from unrolling.
- */
- /*
- * M4 Parameters
- * __reml name of function to generate
- * rem rem=div: t10 / t11 -> t12; rem=rem: t10 % t11 -> t12
- * true true=true: signed; true=false: unsigned
- * 32 total number of bits
- */
- #include <machine/asm.h>
- LEAF(__reml, 0) /* XXX */
- lda sp, -64(sp)
- stq t0, 0(sp)
- stq t1, 8(sp)
- stq t2, 16(sp)
- stq t3, 24(sp)
- stq t4, 32(sp)
- stq t10, 40(sp)
- stq t11, 48(sp)
- mov zero, t12 /* Initialize result to zero */
- /* Compute sign of result. If either is negative, this is easy. */
- or t10, t11, t4 /* not the sign, but... */
- srl t4, 32 - 1, t4 /* rather, or of high bits */
- blbc t4, Ldoit /* neither negative? do it! */
- mov t10, t4 /* sign follows t10. */
- srl t4, 32 - 1, t4 /* make negation the low bit. */
- srl t10, 32 - 1, t1 /* is t10 negative? */
- blbc t1, LnegB /* no. */
- /* t10 is negative; flip it. */
- /* top 32 bits may be random junk */
- zap t10, 0xf0, t10
- subq zero, t10, t10
- srl t11, 32 - 1, t1 /* is t11 negative? */
- blbc t1, Ldoit /* no. */
- LnegB:
- /* t11 is definitely negative, no matter how we got here. */
- /* top 32 bits may be random junk */
- zap t11, 0xf0, t11
- subq zero, t11, t11
- Ldoit:
- /*
- * Clear the top 32 bits of each operand, as they may
- * sign extension (if negated above), or random junk.
- */
- zap t10, 0xf0, t10
- zap t11, 0xf0, t11
- /* kill the special cases. */
- beq t11, Ldotrap /* division by zero! */
- cmpult t10, t11, t2 /* t10 < t11? */
- /* t12 is already zero, from above. t10 is untouched. */
- bne t2, Lret_result
- cmpeq t10, t11, t2 /* t10 == t11? */
- cmovne t2, 1, t12
- cmovne t2, zero, t10
- bne t2, Lret_result
- /*
- * Find out how many bits of zeros are at the beginning of the divisor.
- */
- LBbits:
- ldiq t3, 1 /* t1 = 0; t0 = 1<<32-1 */
- mov zero, t1
- sll t3, 32-1, t0
- LBloop:
- and t11, t0, t2 /* if bit in t11 is set, done. */
- bne t2, LAbits
- addq t1, 1, t1 /* increment t1, bit */
- srl t0, 1, t0
- cmplt t1, 32-1, t2 /* if t1 leaves one bit, done. */
- bne t2, LBloop
- LAbits:
- beq t1, Ldodiv /* If t1 = 0, divide now. */
- ldiq t3, 1 /* t0 = 1<<32-1 */
- sll t3, 32-1, t0
- LAloop:
- and t10, t0, t2 /* if bit in t10 is set, done. */
- bne t2, Ldodiv
- subq t1, 1, t1 /* decrement t1, bit */
- srl t0, 1, t0
- bne t1, LAloop /* If t1 != 0, loop again */
- Ldodiv:
- sll t11, t1, t11 /* t11 <<= i */
- ldiq t3, 1
- sll t3, t1, t0
- Ldivloop:
- cmpult t10, t11, t2
- or t12, t0, t3
- cmoveq t2, t3, t12
- subq t10, t11, t3
- cmoveq t2, t3, t10
- srl t0, 1, t0
- srl t11, 1, t11
- beq t10, Lret_result
- bne t0, Ldivloop
- Lret_result:
- mov t10, t12
- /* Check to see if we should negate it. */
- subqv zero, t12, t3
- cmovlbs t4, t3, t12
- ldq t0, 0(sp)
- ldq t1, 8(sp)
- ldq t2, 16(sp)
- ldq t3, 24(sp)
- ldq t4, 32(sp)
- ldq t10, 40(sp)
- ldq t11, 48(sp)
- lda sp, 64(sp)
- ret zero, (t9), 1
- Ldotrap:
- ldiq a0, -2 /* This is the signal to SIGFPE! */
- call_pal PAL_gentrap
- mov zero, t10 /* so that zero will be returned */
- br zero, Lret_result
- END(__reml)
|