123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712 |
- /* SPDX-License-Identifier: GPL-2.0 */
- .file "reg_round.S"
- /*---------------------------------------------------------------------------+
- | reg_round.S |
- | |
- | Rounding/truncation/etc for FPU basic arithmetic functions. |
- | |
- | Copyright (C) 1993,1995,1997 |
- | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
- | Australia. E-mail billm@suburbia.net |
- | |
- | This code has four possible entry points. |
- | The following must be entered by a jmp instruction: |
- | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. |
- | |
- | The FPU_round entry point is intended to be used by C code. |
- | From C, call as: |
- | int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
- | |
- | Return value is the tag of the answer, or-ed with FPU_Exception if |
- | one was raised, or -1 on internal error. |
- | |
- | For correct "up" and "down" rounding, the argument must have the correct |
- | sign. |
- | |
- +---------------------------------------------------------------------------*/
- /*---------------------------------------------------------------------------+
- | Four entry points. |
- | |
- | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: |
- | %eax:%ebx 64 bit significand |
- | %edx 32 bit extension of the significand |
- | %edi pointer to an FPU_REG for the result to be stored |
- | stack calling function must have set up a C stack frame and |
- | pushed %esi, %edi, and %ebx |
- | |
- | Needed just for the fpu_reg_round_sqrt entry point: |
- | %cx A control word in the same format as the FPU control word. |
- | Otherwise, PARAM4 must give such a value. |
- | |
- | |
- | The significand and its extension are assumed to be exact in the |
- | following sense: |
- | If the significand by itself is the exact result then the significand |
- | extension (%edx) must contain 0, otherwise the significand extension |
- | must be non-zero. |
- | If the significand extension is non-zero then the significand is |
- | smaller than the magnitude of the correct exact result by an amount |
- | greater than zero and less than one ls bit of the significand. |
- | The significand extension is only required to have three possible |
- | non-zero values: |
- | less than 0x80000000 <=> the significand is less than 1/2 an ls |
- | bit smaller than the magnitude of the |
- | true exact result. |
- | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit |
- | smaller than the magnitude of the true |
- | exact result. |
- | greater than 0x80000000 <=> the significand is more than 1/2 an ls |
- | bit smaller than the magnitude of the |
- | true exact result. |
- | |
- +---------------------------------------------------------------------------*/
- /*---------------------------------------------------------------------------+
- | The code in this module has become quite complex, but it should handle |
- | all of the FPU flags which are set at this stage of the basic arithmetic |
- | computations. |
- | There are a few rare cases where the results are not set identically to |
- | a real FPU. These require a bit more thought because at this stage the |
- | results of the code here appear to be more consistent... |
- | This may be changed in a future version. |
- +---------------------------------------------------------------------------*/
- #include "fpu_emu.h"
- #include "exception.h"
- #include "control_w.h"
- /* Flags for FPU_bits_lost */
- #define LOST_DOWN $1
- #define LOST_UP $2
- /* Flags for FPU_denormal */
- #define DENORMAL $1
- #define UNMASKED_UNDERFLOW $2
- #ifndef NON_REENTRANT_FPU
- /* Make the code re-entrant by putting
- local storage on the stack: */
- #define FPU_bits_lost (%esp)
- #define FPU_denormal 1(%esp)
- #else
- /* Not re-entrant, so we can gain speed by putting
- local storage in a static area: */
- .data
- .align 4,0
- FPU_bits_lost:
- .byte 0
- FPU_denormal:
- .byte 0
- #endif /* NON_REENTRANT_FPU */
- .text
- .globl fpu_reg_round
- .globl fpu_Arith_exit
- /* Entry point when called from C */
- ENTRY(FPU_round)
- pushl %ebp
- movl %esp,%ebp
- pushl %esi
- pushl %edi
- pushl %ebx
- movl PARAM1,%edi
- movl SIGH(%edi),%eax
- movl SIGL(%edi),%ebx
- movl PARAM2,%edx
- fpu_reg_round: /* Normal entry point */
- movl PARAM4,%ecx
- #ifndef NON_REENTRANT_FPU
- pushl %ebx /* adjust the stack pointer */
- #endif /* NON_REENTRANT_FPU */
- #ifdef PARANOID
- /* Cannot use this here yet */
- /* orl %eax,%eax */
- /* jns L_entry_bugged */
- #endif /* PARANOID */
- cmpw EXP_UNDER,EXP(%edi)
- jle L_Make_denorm /* The number is a de-normal */
- movb $0,FPU_denormal /* 0 -> not a de-normal */
- Denorm_done:
- movb $0,FPU_bits_lost /* No bits yet lost in rounding */
- movl %ecx,%esi
- andl CW_PC,%ecx
- cmpl PR_64_BITS,%ecx
- je LRound_To_64
- cmpl PR_53_BITS,%ecx
- je LRound_To_53
- cmpl PR_24_BITS,%ecx
- je LRound_To_24
- #ifdef PECULIAR_486
- /* With the precision control bits set to 01 "(reserved)", a real 80486
- behaves as if the precision control bits were set to 11 "64 bits" */
- cmpl PR_RESERVED_BITS,%ecx
- je LRound_To_64
- #ifdef PARANOID
- jmp L_bugged_denorm_486
- #endif /* PARANOID */
- #else
- #ifdef PARANOID
- jmp L_bugged_denorm /* There is no bug, just a bad control word */
- #endif /* PARANOID */
- #endif /* PECULIAR_486 */
- /* Round etc to 24 bit precision */
- LRound_To_24:
- movl %esi,%ecx
- andl CW_RC,%ecx
- cmpl RC_RND,%ecx
- je LRound_nearest_24
- cmpl RC_CHOP,%ecx
- je LCheck_truncate_24
- cmpl RC_UP,%ecx /* Towards +infinity */
- je LUp_24
- cmpl RC_DOWN,%ecx /* Towards -infinity */
- je LDown_24
- #ifdef PARANOID
- jmp L_bugged_round24
- #endif /* PARANOID */
- LUp_24:
- cmpb SIGN_POS,PARAM5
- jne LCheck_truncate_24 /* If negative then up==truncate */
- jmp LCheck_24_round_up
- LDown_24:
- cmpb SIGN_POS,PARAM5
- je LCheck_truncate_24 /* If positive then down==truncate */
- LCheck_24_round_up:
- movl %eax,%ecx
- andl $0x000000ff,%ecx
- orl %ebx,%ecx
- orl %edx,%ecx
- jnz LDo_24_round_up
- jmp L_Re_normalise
- LRound_nearest_24:
- /* Do rounding of the 24th bit if needed (nearest or even) */
- movl %eax,%ecx
- andl $0x000000ff,%ecx
- cmpl $0x00000080,%ecx
- jc LCheck_truncate_24 /* less than half, no increment needed */
- jne LGreater_Half_24 /* greater than half, increment needed */
- /* Possibly half, we need to check the ls bits */
- orl %ebx,%ebx
- jnz LGreater_Half_24 /* greater than half, increment needed */
- orl %edx,%edx
- jnz LGreater_Half_24 /* greater than half, increment needed */
- /* Exactly half, increment only if 24th bit is 1 (round to even) */
- testl $0x00000100,%eax
- jz LDo_truncate_24
- LGreater_Half_24: /* Rounding: increment at the 24th bit */
- LDo_24_round_up:
- andl $0xffffff00,%eax /* Truncate to 24 bits */
- xorl %ebx,%ebx
- movb LOST_UP,FPU_bits_lost
- addl $0x00000100,%eax
- jmp LCheck_Round_Overflow
- LCheck_truncate_24:
- movl %eax,%ecx
- andl $0x000000ff,%ecx
- orl %ebx,%ecx
- orl %edx,%ecx
- jz L_Re_normalise /* No truncation needed */
- LDo_truncate_24:
- andl $0xffffff00,%eax /* Truncate to 24 bits */
- xorl %ebx,%ebx
- movb LOST_DOWN,FPU_bits_lost
- jmp L_Re_normalise
- /* Round etc to 53 bit precision */
- LRound_To_53:
- movl %esi,%ecx
- andl CW_RC,%ecx
- cmpl RC_RND,%ecx
- je LRound_nearest_53
- cmpl RC_CHOP,%ecx
- je LCheck_truncate_53
- cmpl RC_UP,%ecx /* Towards +infinity */
- je LUp_53
- cmpl RC_DOWN,%ecx /* Towards -infinity */
- je LDown_53
- #ifdef PARANOID
- jmp L_bugged_round53
- #endif /* PARANOID */
- LUp_53:
- cmpb SIGN_POS,PARAM5
- jne LCheck_truncate_53 /* If negative then up==truncate */
- jmp LCheck_53_round_up
- LDown_53:
- cmpb SIGN_POS,PARAM5
- je LCheck_truncate_53 /* If positive then down==truncate */
- LCheck_53_round_up:
- movl %ebx,%ecx
- andl $0x000007ff,%ecx
- orl %edx,%ecx
- jnz LDo_53_round_up
- jmp L_Re_normalise
- LRound_nearest_53:
- /* Do rounding of the 53rd bit if needed (nearest or even) */
- movl %ebx,%ecx
- andl $0x000007ff,%ecx
- cmpl $0x00000400,%ecx
- jc LCheck_truncate_53 /* less than half, no increment needed */
- jnz LGreater_Half_53 /* greater than half, increment needed */
- /* Possibly half, we need to check the ls bits */
- orl %edx,%edx
- jnz LGreater_Half_53 /* greater than half, increment needed */
- /* Exactly half, increment only if 53rd bit is 1 (round to even) */
- testl $0x00000800,%ebx
- jz LTruncate_53
- LGreater_Half_53: /* Rounding: increment at the 53rd bit */
- LDo_53_round_up:
- movb LOST_UP,FPU_bits_lost
- andl $0xfffff800,%ebx /* Truncate to 53 bits */
- addl $0x00000800,%ebx
- adcl $0,%eax
- jmp LCheck_Round_Overflow
- LCheck_truncate_53:
- movl %ebx,%ecx
- andl $0x000007ff,%ecx
- orl %edx,%ecx
- jz L_Re_normalise
- LTruncate_53:
- movb LOST_DOWN,FPU_bits_lost
- andl $0xfffff800,%ebx /* Truncate to 53 bits */
- jmp L_Re_normalise
- /* Round etc to 64 bit precision */
- LRound_To_64:
- movl %esi,%ecx
- andl CW_RC,%ecx
- cmpl RC_RND,%ecx
- je LRound_nearest_64
- cmpl RC_CHOP,%ecx
- je LCheck_truncate_64
- cmpl RC_UP,%ecx /* Towards +infinity */
- je LUp_64
- cmpl RC_DOWN,%ecx /* Towards -infinity */
- je LDown_64
- #ifdef PARANOID
- jmp L_bugged_round64
- #endif /* PARANOID */
- LUp_64:
- cmpb SIGN_POS,PARAM5
- jne LCheck_truncate_64 /* If negative then up==truncate */
- orl %edx,%edx
- jnz LDo_64_round_up
- jmp L_Re_normalise
- LDown_64:
- cmpb SIGN_POS,PARAM5
- je LCheck_truncate_64 /* If positive then down==truncate */
- orl %edx,%edx
- jnz LDo_64_round_up
- jmp L_Re_normalise
- LRound_nearest_64:
- cmpl $0x80000000,%edx
- jc LCheck_truncate_64
- jne LDo_64_round_up
- /* Now test for round-to-even */
- testb $1,%bl
- jz LCheck_truncate_64
- LDo_64_round_up:
- movb LOST_UP,FPU_bits_lost
- addl $1,%ebx
- adcl $0,%eax
- LCheck_Round_Overflow:
- jnc L_Re_normalise
- /* Overflow, adjust the result (significand to 1.0) */
- rcrl $1,%eax
- rcrl $1,%ebx
- incw EXP(%edi)
- jmp L_Re_normalise
- LCheck_truncate_64:
- orl %edx,%edx
- jz L_Re_normalise
- LTruncate_64:
- movb LOST_DOWN,FPU_bits_lost
- L_Re_normalise:
- testb $0xff,FPU_denormal
- jnz Normalise_result
- L_Normalised:
- movl TAG_Valid,%edx
- L_deNormalised:
- cmpb LOST_UP,FPU_bits_lost
- je L_precision_lost_up
- cmpb LOST_DOWN,FPU_bits_lost
- je L_precision_lost_down
- L_no_precision_loss:
- /* store the result */
- L_Store_significand:
- movl %eax,SIGH(%edi)
- movl %ebx,SIGL(%edi)
- cmpw EXP_OVER,EXP(%edi)
- jge L_overflow
- movl %edx,%eax
- /* Convert the exponent to 80x87 form. */
- addw EXTENDED_Ebias,EXP(%edi)
- andw $0x7fff,EXP(%edi)
- fpu_reg_round_signed_special_exit:
- cmpb SIGN_POS,PARAM5
- je fpu_reg_round_special_exit
- orw $0x8000,EXP(%edi) /* Negative sign for the result. */
- fpu_reg_round_special_exit:
- #ifndef NON_REENTRANT_FPU
- popl %ebx /* adjust the stack pointer */
- #endif /* NON_REENTRANT_FPU */
- fpu_Arith_exit:
- popl %ebx
- popl %edi
- popl %esi
- leave
- ret
- /*
- * Set the FPU status flags to represent precision loss due to
- * round-up.
- */
- L_precision_lost_up:
- push %edx
- push %eax
- call set_precision_flag_up
- popl %eax
- popl %edx
- jmp L_no_precision_loss
- /*
- * Set the FPU status flags to represent precision loss due to
- * truncation.
- */
- L_precision_lost_down:
- push %edx
- push %eax
- call set_precision_flag_down
- popl %eax
- popl %edx
- jmp L_no_precision_loss
- /*
- * The number is a denormal (which might get rounded up to a normal)
- * Shift the number right the required number of bits, which will
- * have to be undone later...
- */
- L_Make_denorm:
- /* The action to be taken depends upon whether the underflow
- exception is masked */
- testb CW_Underflow,%cl /* Underflow mask. */
- jz Unmasked_underflow /* Do not make a denormal. */
- movb DENORMAL,FPU_denormal
- pushl %ecx /* Save */
- movw EXP_UNDER+1,%cx
- subw EXP(%edi),%cx
- cmpw $64,%cx /* shrd only works for 0..31 bits */
- jnc Denorm_shift_more_than_63
- cmpw $32,%cx /* shrd only works for 0..31 bits */
- jnc Denorm_shift_more_than_32
- /*
- * We got here without jumps by assuming that the most common requirement
- * is for a small de-normalising shift.
- * Shift by [1..31] bits
- */
- addw %cx,EXP(%edi)
- orl %edx,%edx /* extension */
- setne %ch /* Save whether %edx is non-zero */
- xorl %edx,%edx
- shrd %cl,%ebx,%edx
- shrd %cl,%eax,%ebx
- shr %cl,%eax
- orb %ch,%dl
- popl %ecx
- jmp Denorm_done
- /* Shift by [32..63] bits */
- Denorm_shift_more_than_32:
- addw %cx,EXP(%edi)
- subb $32,%cl
- orl %edx,%edx
- setne %ch
- orb %ch,%bl
- xorl %edx,%edx
- shrd %cl,%ebx,%edx
- shrd %cl,%eax,%ebx
- shr %cl,%eax
- orl %edx,%edx /* test these 32 bits */
- setne %cl
- orb %ch,%bl
- orb %cl,%bl
- movl %ebx,%edx
- movl %eax,%ebx
- xorl %eax,%eax
- popl %ecx
- jmp Denorm_done
- /* Shift by [64..) bits */
- Denorm_shift_more_than_63:
- cmpw $64,%cx
- jne Denorm_shift_more_than_64
- /* Exactly 64 bit shift */
- addw %cx,EXP(%edi)
- xorl %ecx,%ecx
- orl %edx,%edx
- setne %cl
- orl %ebx,%ebx
- setne %ch
- orb %ch,%cl
- orb %cl,%al
- movl %eax,%edx
- xorl %eax,%eax
- xorl %ebx,%ebx
- popl %ecx
- jmp Denorm_done
- Denorm_shift_more_than_64:
- movw EXP_UNDER+1,EXP(%edi)
- /* This is easy, %eax must be non-zero, so.. */
- movl $1,%edx
- xorl %eax,%eax
- xorl %ebx,%ebx
- popl %ecx
- jmp Denorm_done
- Unmasked_underflow:
- movb UNMASKED_UNDERFLOW,FPU_denormal
- jmp Denorm_done
- /* Undo the de-normalisation. */
- Normalise_result:
- cmpb UNMASKED_UNDERFLOW,FPU_denormal
- je Signal_underflow
- /* The number must be a denormal if we got here. */
- #ifdef PARANOID
- /* But check it... just in case. */
- cmpw EXP_UNDER+1,EXP(%edi)
- jne L_norm_bugged
- #endif /* PARANOID */
- #ifdef PECULIAR_486
- /*
- * This implements a special feature of 80486 behaviour.
- * Underflow will be signalled even if the number is
- * not a denormal after rounding.
- * This difference occurs only for masked underflow, and not
- * in the unmasked case.
- * Actual 80486 behaviour differs from this in some circumstances.
- */
- orl %eax,%eax /* ms bits */
- js LPseudoDenormal /* Will be masked underflow */
- #else
- orl %eax,%eax /* ms bits */
- js L_Normalised /* No longer a denormal */
- #endif /* PECULIAR_486 */
- jnz LDenormal_adj_exponent
- orl %ebx,%ebx
- jz L_underflow_to_zero /* The contents are zero */
- LDenormal_adj_exponent:
- decw EXP(%edi)
- LPseudoDenormal:
- testb $0xff,FPU_bits_lost /* bits lost == underflow */
- movl TAG_Special,%edx
- jz L_deNormalised
- /* There must be a masked underflow */
- push %eax
- pushl EX_Underflow
- call EXCEPTION
- popl %eax
- popl %eax
- movl TAG_Special,%edx
- jmp L_deNormalised
- /*
- * The operations resulted in a number too small to represent.
- * Masked response.
- */
- L_underflow_to_zero:
- push %eax
- call set_precision_flag_down
- popl %eax
- push %eax
- pushl EX_Underflow
- call EXCEPTION
- popl %eax
- popl %eax
- /* Reduce the exponent to EXP_UNDER */
- movw EXP_UNDER,EXP(%edi)
- movl TAG_Zero,%edx
- jmp L_Store_significand
- /* The operations resulted in a number too large to represent. */
- L_overflow:
- addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */
- push %edi
- call arith_overflow
- pop %edi
- jmp fpu_reg_round_signed_special_exit
- Signal_underflow:
- /* The number may have been changed to a non-denormal */
- /* by the rounding operations. */
- cmpw EXP_UNDER,EXP(%edi)
- jle Do_unmasked_underflow
- jmp L_Normalised
- Do_unmasked_underflow:
- /* Increase the exponent by the magic number */
- addw $(3*(1<<13)),EXP(%edi)
- push %eax
- pushl EX_Underflow
- call EXCEPTION
- popl %eax
- popl %eax
- jmp L_Normalised
- #ifdef PARANOID
- #ifdef PECULIAR_486
- L_bugged_denorm_486:
- pushl EX_INTERNAL|0x236
- call EXCEPTION
- popl %ebx
- jmp L_exception_exit
- #else
- L_bugged_denorm:
- pushl EX_INTERNAL|0x230
- call EXCEPTION
- popl %ebx
- jmp L_exception_exit
- #endif /* PECULIAR_486 */
- L_bugged_round24:
- pushl EX_INTERNAL|0x231
- call EXCEPTION
- popl %ebx
- jmp L_exception_exit
- L_bugged_round53:
- pushl EX_INTERNAL|0x232
- call EXCEPTION
- popl %ebx
- jmp L_exception_exit
- L_bugged_round64:
- pushl EX_INTERNAL|0x233
- call EXCEPTION
- popl %ebx
- jmp L_exception_exit
- L_norm_bugged:
- pushl EX_INTERNAL|0x234
- call EXCEPTION
- popl %ebx
- jmp L_exception_exit
- L_entry_bugged:
- pushl EX_INTERNAL|0x235
- call EXCEPTION
- popl %ebx
- L_exception_exit:
- mov $-1,%eax
- jmp fpu_reg_round_special_exit
- #endif /* PARANOID */
- ENDPROC(FPU_round)
|