div64.S 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. /*
  2. * linux/arch/arm/lib/div64.S
  3. *
  4. * Optimized computation of 64-bit dividend / 32-bit divisor
  5. *
  6. * Author: Nicolas Pitre
  7. * Created: Oct 5, 2003
  8. * Copyright: Monta Vista Software, Inc.
  9. *
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License version 2 as
  12. * published by the Free Software Foundation.
  13. */
  14. #include <linux/linkage.h>
  15. #ifdef __ARMEB__
  16. #define xh r0
  17. #define xl r1
  18. #define yh r2
  19. #define yl r3
  20. #else
  21. #define xl r0
  22. #define xh r1
  23. #define yl r2
  24. #define yh r3
  25. #endif
  26. /*
  27. * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
  28. *
  29. * Note: Calling convention is totally non standard for optimal code.
  30. * This is meant to be used by do_div() from include/asm/div64.h only.
  31. *
  32. * Input parameters:
  33. * xh-xl = dividend (clobbered)
  34. * r4 = divisor (preserved)
  35. *
  36. * Output values:
  37. * yh-yl = result
  38. * xh = remainder
  39. *
  40. * Clobbered regs: xl, ip
  41. */
  42. ENTRY(__do_div64)
  43. @ Test for easy paths first.
  44. subs ip, r4, #1
  45. bls 9f @ divisor is 0 or 1
  46. tst ip, r4
  47. beq 8f @ divisor is power of 2
  48. @ See if we need to handle upper 32-bit result.
  49. cmp xh, r4
  50. mov yh, #0
  51. blo 3f
  52. @ Align divisor with upper part of dividend.
  53. @ The aligned divisor is stored in yl preserving the original.
  54. @ The bit position is stored in ip.
  55. #if __LINUX_ARM_ARCH__ >= 5
  56. clz yl, r4
  57. clz ip, xh
  58. sub yl, yl, ip
  59. mov ip, #1
  60. mov ip, ip, lsl yl
  61. mov yl, r4, lsl yl
  62. #else
  63. mov yl, r4
  64. mov ip, #1
  65. 1: cmp yl, #0x80000000
  66. cmpcc yl, xh
  67. movcc yl, yl, lsl #1
  68. movcc ip, ip, lsl #1
  69. bcc 1b
  70. #endif
  71. @ The division loop for needed upper bit positions.
  72. @ Break out early if dividend reaches 0.
  73. 2: cmp xh, yl
  74. orrcs yh, yh, ip
  75. subcss xh, xh, yl
  76. movnes ip, ip, lsr #1
  77. mov yl, yl, lsr #1
  78. bne 2b
  79. @ See if we need to handle lower 32-bit result.
  80. 3: cmp xh, #0
  81. mov yl, #0
  82. cmpeq xl, r4
  83. movlo xh, xl
  84. movlo pc, lr
  85. @ The division loop for lower bit positions.
  86. @ Here we shift remainer bits leftwards rather than moving the
  87. @ divisor for comparisons, considering the carry-out bit as well.
  88. mov ip, #0x80000000
  89. 4: movs xl, xl, lsl #1
  90. adcs xh, xh, xh
  91. beq 6f
  92. cmpcc xh, r4
  93. 5: orrcs yl, yl, ip
  94. subcs xh, xh, r4
  95. movs ip, ip, lsr #1
  96. bne 4b
  97. mov pc, lr
  98. @ The top part of remainder became zero. If carry is set
  99. @ (the 33th bit) this is a false positive so resume the loop.
  100. @ Otherwise, if lower part is also null then we are done.
  101. 6: bcs 5b
  102. cmp xl, #0
  103. moveq pc, lr
  104. @ We still have remainer bits in the low part. Bring them up.
  105. #if __LINUX_ARM_ARCH__ >= 5
  106. clz xh, xl @ we know xh is zero here so...
  107. add xh, xh, #1
  108. mov xl, xl, lsl xh
  109. mov ip, ip, lsr xh
  110. #else
  111. 7: movs xl, xl, lsl #1
  112. mov ip, ip, lsr #1
  113. bcc 7b
  114. #endif
  115. @ Current remainder is now 1. It is worthless to compare with
  116. @ divisor at this point since divisor can not be smaller than 3 here.
  117. @ If possible, branch for another shift in the division loop.
  118. @ If no bit position left then we are done.
  119. movs ip, ip, lsr #1
  120. mov xh, #1
  121. bne 4b
  122. mov pc, lr
  123. 8: @ Division by a power of 2: determine what that divisor order is
  124. @ then simply shift values around
  125. #if __LINUX_ARM_ARCH__ >= 5
  126. clz ip, r4
  127. rsb ip, ip, #31
  128. #else
  129. mov yl, r4
  130. cmp r4, #(1 << 16)
  131. mov ip, #0
  132. movhs yl, yl, lsr #16
  133. movhs ip, #16
  134. cmp yl, #(1 << 8)
  135. movhs yl, yl, lsr #8
  136. addhs ip, ip, #8
  137. cmp yl, #(1 << 4)
  138. movhs yl, yl, lsr #4
  139. addhs ip, ip, #4
  140. cmp yl, #(1 << 2)
  141. addhi ip, ip, #3
  142. addls ip, ip, yl, lsr #1
  143. #endif
  144. mov yh, xh, lsr ip
  145. mov yl, xl, lsr ip
  146. rsb ip, ip, #32
  147. ARM( orr yl, yl, xh, lsl ip )
  148. THUMB( lsl xh, xh, ip )
  149. THUMB( orr yl, yl, xh )
  150. mov xh, xl, lsl ip
  151. mov xh, xh, lsr ip
  152. mov pc, lr
  153. @ eq -> division by 1: obvious enough...
  154. 9: moveq yl, xl
  155. moveq yh, xh
  156. moveq xh, #0
  157. moveq pc, lr
  158. @ Division by 0:
  159. str lr, [sp, #-8]!
  160. bl __div0
  161. @ as wrong as it could be...
  162. mov yl, #0
  163. mov yh, #0
  164. mov xh, #0
  165. ldr pc, [sp], #8
  166. ENDPROC(__do_div64)