udivsi3_i4i-Os.S 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. /* Copyright (C) 2006 Free Software Foundation, Inc.
  2. This file is free software; you can redistribute it and/or modify it
  3. under the terms of the GNU General Public License as published by the
  4. Free Software Foundation; either version 2, or (at your option) any
  5. later version.
  6. In addition to the permissions in the GNU General Public License, the
  7. Free Software Foundation gives you unlimited permission to link the
  8. compiled version of this file into combinations with other programs,
  9. and to distribute those combinations without any restriction coming
  10. from the use of this file. (The General Public License restrictions
  11. do apply in other respects; for example, they cover modification of
  12. the file, and distribution when not linked into a combine
  13. executable.)
  14. This file is distributed in the hope that it will be useful, but
  15. WITHOUT ANY WARRANTY; without even the implied warranty of
  16. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. General Public License for more details.
  18. You should have received a copy of the GNU General Public License
  19. along with this program; see the file COPYING. If not, write to
  20. the Free Software Foundation, 51 Franklin Street, Fifth Floor,
  21. Boston, MA 02110-1301, USA. */
  22. /* Moderately Space-optimized libgcc routines for the Renesas SH /
  23. STMicroelectronics ST40 CPUs.
  24. Contributed by J"orn Rennecke joern.rennecke@st.com. */
  25. /* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
  26. sh4-200 run times:
  27. udiv small divisor: 55 cycles
  28. udiv large divisor: 52 cycles
  29. sdiv small divisor, positive result: 59 cycles
  30. sdiv large divisor, positive result: 56 cycles
  31. sdiv small divisor, negative result: 65 cycles (*)
  32. sdiv large divisor, negative result: 62 cycles (*)
  33. (*): r2 is restored in the rts delay slot and has a lingering latency
  34. of two more cycles. */
  35. .balign 4
  36. .global __udivsi3_i4i
  37. .global __udivsi3_i4
  38. .set __udivsi3_i4, __udivsi3_i4i
  39. .type __udivsi3_i4i, @function
  40. .type __sdivsi3_i4i, @function
  41. __udivsi3_i4i:
  42. sts pr,r1
  43. mov.l r4,@-r15
  44. extu.w r5,r0
  45. cmp/eq r5,r0
  46. swap.w r4,r0
  47. shlr16 r4
  48. bf/s large_divisor
  49. div0u
  50. mov.l r5,@-r15
  51. shll16 r5
  52. sdiv_small_divisor:
  53. div1 r5,r4
  54. bsr div6
  55. div1 r5,r4
  56. div1 r5,r4
  57. bsr div6
  58. div1 r5,r4
  59. xtrct r4,r0
  60. xtrct r0,r4
  61. bsr div7
  62. swap.w r4,r4
  63. div1 r5,r4
  64. bsr div7
  65. div1 r5,r4
  66. xtrct r4,r0
  67. mov.l @r15+,r5
  68. swap.w r0,r0
  69. mov.l @r15+,r4
  70. jmp @r1
  71. rotcl r0
  72. div7:
  73. div1 r5,r4
  74. div6:
  75. div1 r5,r4; div1 r5,r4; div1 r5,r4
  76. div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
  77. divx3:
  78. rotcl r0
  79. div1 r5,r4
  80. rotcl r0
  81. div1 r5,r4
  82. rotcl r0
  83. rts
  84. div1 r5,r4
  85. large_divisor:
  86. mov.l r5,@-r15
  87. sdiv_large_divisor:
  88. xor r4,r0
  89. .rept 4
  90. rotcl r0
  91. bsr divx3
  92. div1 r5,r4
  93. .endr
  94. mov.l @r15+,r5
  95. mov.l @r15+,r4
  96. jmp @r1
  97. rotcl r0
  98. .global __sdivsi3_i4i
  99. .global __sdivsi3_i4
  100. .global __sdivsi3
  101. .set __sdivsi3_i4, __sdivsi3_i4i
  102. .set __sdivsi3, __sdivsi3_i4i
  103. __sdivsi3_i4i:
  104. mov.l r4,@-r15
  105. cmp/pz r5
  106. mov.l r5,@-r15
  107. bt/s pos_divisor
  108. cmp/pz r4
  109. neg r5,r5
  110. extu.w r5,r0
  111. bt/s neg_result
  112. cmp/eq r5,r0
  113. neg r4,r4
  114. pos_result:
  115. swap.w r4,r0
  116. bra sdiv_check_divisor
  117. sts pr,r1
  118. pos_divisor:
  119. extu.w r5,r0
  120. bt/s pos_result
  121. cmp/eq r5,r0
  122. neg r4,r4
  123. neg_result:
  124. mova negate_result,r0
  125. ;
  126. mov r0,r1
  127. swap.w r4,r0
  128. lds r2,macl
  129. sts pr,r2
  130. sdiv_check_divisor:
  131. shlr16 r4
  132. bf/s sdiv_large_divisor
  133. div0u
  134. bra sdiv_small_divisor
  135. shll16 r5
  136. .balign 4
  137. negate_result:
  138. neg r0,r0
  139. jmp @r2
  140. sts macl,r2