123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150 |
- /* Copyright (C) 2006 Free Software Foundation, Inc.
- This file is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the
- Free Software Foundation; either version 2, or (at your option) any
- later version.
- In addition to the permissions in the GNU General Public License, the
- Free Software Foundation gives you unlimited permission to link the
- compiled version of this file into combinations with other programs,
- and to distribute those combinations without any restriction coming
- from the use of this file. (The General Public License restrictions
- do apply in other respects; for example, they cover modification of
- the file, and distribution when not linked into a combine
- executable.)
- This file is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; see the file COPYING. If not, write to
- the Free Software Foundation, 51 Franklin Street, Fifth Floor,
- Boston, MA 02110-1301, USA. */
- /* Moderately Space-optimized libgcc routines for the Renesas SH /
- STMicroelectronics ST40 CPUs.
- Contributed by J"orn Rennecke joern.rennecke@st.com. */
- /* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
- sh4-200 run times:
- udiv small divisor: 55 cycles
- udiv large divisor: 52 cycles
- sdiv small divisor, positive result: 59 cycles
- sdiv large divisor, positive result: 56 cycles
- sdiv small divisor, negative result: 65 cycles (*)
- sdiv large divisor, negative result: 62 cycles (*)
- (*): r2 is restored in the rts delay slot and has a lingering latency
- of two more cycles. */
- .balign 4
- .global __udivsi3_i4i
- .global __udivsi3_i4
- .set __udivsi3_i4, __udivsi3_i4i
- .type __udivsi3_i4i, @function
- .type __sdivsi3_i4i, @function
- __udivsi3_i4i:
- sts pr,r1
- mov.l r4,@-r15
- extu.w r5,r0
- cmp/eq r5,r0
- swap.w r4,r0
- shlr16 r4
- bf/s large_divisor
- div0u
- mov.l r5,@-r15
- shll16 r5
- sdiv_small_divisor:
- div1 r5,r4
- bsr div6
- div1 r5,r4
- div1 r5,r4
- bsr div6
- div1 r5,r4
- xtrct r4,r0
- xtrct r0,r4
- bsr div7
- swap.w r4,r4
- div1 r5,r4
- bsr div7
- div1 r5,r4
- xtrct r4,r0
- mov.l @r15+,r5
- swap.w r0,r0
- mov.l @r15+,r4
- jmp @r1
- rotcl r0
- div7:
- div1 r5,r4
- div6:
- div1 r5,r4; div1 r5,r4; div1 r5,r4
- div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
- divx3:
- rotcl r0
- div1 r5,r4
- rotcl r0
- div1 r5,r4
- rotcl r0
- rts
- div1 r5,r4
- large_divisor:
- mov.l r5,@-r15
- sdiv_large_divisor:
- xor r4,r0
- .rept 4
- rotcl r0
- bsr divx3
- div1 r5,r4
- .endr
- mov.l @r15+,r5
- mov.l @r15+,r4
- jmp @r1
- rotcl r0
- .global __sdivsi3_i4i
- .global __sdivsi3_i4
- .global __sdivsi3
- .set __sdivsi3_i4, __sdivsi3_i4i
- .set __sdivsi3, __sdivsi3_i4i
- __sdivsi3_i4i:
- mov.l r4,@-r15
- cmp/pz r5
- mov.l r5,@-r15
- bt/s pos_divisor
- cmp/pz r4
- neg r5,r5
- extu.w r5,r0
- bt/s neg_result
- cmp/eq r5,r0
- neg r4,r4
- pos_result:
- swap.w r4,r0
- bra sdiv_check_divisor
- sts pr,r1
- pos_divisor:
- extu.w r5,r0
- bt/s pos_result
- cmp/eq r5,r0
- neg r4,r4
- neg_result:
- mova negate_result,r0
- ;
- mov r0,r1
- swap.w r4,r0
- lds r2,macl
- sts pr,r2
- sdiv_check_divisor:
- shlr16 r4
- bf/s sdiv_large_divisor
- div0u
- bra sdiv_small_divisor
- shll16 r5
- .balign 4
- negate_result:
- neg r0,r0
- jmp @r2
- sts macl,r2
|