lib1funcs.S 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372
  1. /*
  2. * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
  3. *
  4. * Author: Nicolas Pitre <nico@fluxnic.net>
  5. * - contributed to gcc-3.4 on Sep 30, 2003
  6. * - adapted for the Linux kernel on Oct 2, 2003
  7. */
  8. /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
  9. This file is free software; you can redistribute it and/or modify it
  10. under the terms of the GNU General Public License as published by the
  11. Free Software Foundation; either version 2, or (at your option) any
  12. later version.
  13. In addition to the permissions in the GNU General Public License, the
  14. Free Software Foundation gives you unlimited permission to link the
  15. compiled version of this file into combinations with other programs,
  16. and to distribute those combinations without any restriction coming
  17. from the use of this file. (The General Public License restrictions
  18. do apply in other respects; for example, they cover modification of
  19. the file, and distribution when not linked into a combine
  20. executable.)
  21. This file is distributed in the hope that it will be useful, but
  22. WITHOUT ANY WARRANTY; without even the implied warranty of
  23. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  24. General Public License for more details.
  25. You should have received a copy of the GNU General Public License
  26. along with this program; see the file COPYING. If not, write to
  27. the Free Software Foundation, 59 Temple Place - Suite 330,
  28. Boston, MA 02111-1307, USA. */
  29. #include <linux/linkage.h>
  30. #include <asm/assembler.h>
  31. #include <asm/unwind.h>
  32. .macro ARM_DIV_BODY dividend, divisor, result, curbit
  33. #if __LINUX_ARM_ARCH__ >= 5
  34. clz \curbit, \divisor
  35. clz \result, \dividend
  36. sub \result, \curbit, \result
  37. mov \curbit, #1
  38. mov \divisor, \divisor, lsl \result
  39. mov \curbit, \curbit, lsl \result
  40. mov \result, #0
  41. #else
  42. @ Initially shift the divisor left 3 bits if possible,
  43. @ set curbit accordingly. This allows for curbit to be located
  44. @ at the left end of each 4 bit nibbles in the division loop
  45. @ to save one loop in most cases.
  46. tst \divisor, #0xe0000000
  47. moveq \divisor, \divisor, lsl #3
  48. moveq \curbit, #8
  49. movne \curbit, #1
  50. @ Unless the divisor is very big, shift it up in multiples of
  51. @ four bits, since this is the amount of unwinding in the main
  52. @ division loop. Continue shifting until the divisor is
  53. @ larger than the dividend.
  54. 1: cmp \divisor, #0x10000000
  55. cmplo \divisor, \dividend
  56. movlo \divisor, \divisor, lsl #4
  57. movlo \curbit, \curbit, lsl #4
  58. blo 1b
  59. @ For very big divisors, we must shift it a bit at a time, or
  60. @ we will be in danger of overflowing.
  61. 1: cmp \divisor, #0x80000000
  62. cmplo \divisor, \dividend
  63. movlo \divisor, \divisor, lsl #1
  64. movlo \curbit, \curbit, lsl #1
  65. blo 1b
  66. mov \result, #0
  67. #endif
  68. @ Division loop
  69. 1: cmp \dividend, \divisor
  70. subhs \dividend, \dividend, \divisor
  71. orrhs \result, \result, \curbit
  72. cmp \dividend, \divisor, lsr #1
  73. subhs \dividend, \dividend, \divisor, lsr #1
  74. orrhs \result, \result, \curbit, lsr #1
  75. cmp \dividend, \divisor, lsr #2
  76. subhs \dividend, \dividend, \divisor, lsr #2
  77. orrhs \result, \result, \curbit, lsr #2
  78. cmp \dividend, \divisor, lsr #3
  79. subhs \dividend, \dividend, \divisor, lsr #3
  80. orrhs \result, \result, \curbit, lsr #3
  81. cmp \dividend, #0 @ Early termination?
  82. movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
  83. movne \divisor, \divisor, lsr #4
  84. bne 1b
  85. .endm
  86. .macro ARM_DIV2_ORDER divisor, order
  87. #if __LINUX_ARM_ARCH__ >= 5
  88. clz \order, \divisor
  89. rsb \order, \order, #31
  90. #else
  91. cmp \divisor, #(1 << 16)
  92. movhs \divisor, \divisor, lsr #16
  93. movhs \order, #16
  94. movlo \order, #0
  95. cmp \divisor, #(1 << 8)
  96. movhs \divisor, \divisor, lsr #8
  97. addhs \order, \order, #8
  98. cmp \divisor, #(1 << 4)
  99. movhs \divisor, \divisor, lsr #4
  100. addhs \order, \order, #4
  101. cmp \divisor, #(1 << 2)
  102. addhi \order, \order, #3
  103. addls \order, \order, \divisor, lsr #1
  104. #endif
  105. .endm
  106. .macro ARM_MOD_BODY dividend, divisor, order, spare
  107. #if __LINUX_ARM_ARCH__ >= 5
  108. clz \order, \divisor
  109. clz \spare, \dividend
  110. sub \order, \order, \spare
  111. mov \divisor, \divisor, lsl \order
  112. #else
  113. mov \order, #0
  114. @ Unless the divisor is very big, shift it up in multiples of
  115. @ four bits, since this is the amount of unwinding in the main
  116. @ division loop. Continue shifting until the divisor is
  117. @ larger than the dividend.
  118. 1: cmp \divisor, #0x10000000
  119. cmplo \divisor, \dividend
  120. movlo \divisor, \divisor, lsl #4
  121. addlo \order, \order, #4
  122. blo 1b
  123. @ For very big divisors, we must shift it a bit at a time, or
  124. @ we will be in danger of overflowing.
  125. 1: cmp \divisor, #0x80000000
  126. cmplo \divisor, \dividend
  127. movlo \divisor, \divisor, lsl #1
  128. addlo \order, \order, #1
  129. blo 1b
  130. #endif
  131. @ Perform all needed subtractions to keep only the reminder.
  132. @ Do comparisons in batch of 4 first.
  133. subs \order, \order, #3 @ yes, 3 is intended here
  134. blt 2f
  135. 1: cmp \dividend, \divisor
  136. subhs \dividend, \dividend, \divisor
  137. cmp \dividend, \divisor, lsr #1
  138. subhs \dividend, \dividend, \divisor, lsr #1
  139. cmp \dividend, \divisor, lsr #2
  140. subhs \dividend, \dividend, \divisor, lsr #2
  141. cmp \dividend, \divisor, lsr #3
  142. subhs \dividend, \dividend, \divisor, lsr #3
  143. cmp \dividend, #1
  144. mov \divisor, \divisor, lsr #4
  145. subges \order, \order, #4
  146. bge 1b
  147. tst \order, #3
  148. teqne \dividend, #0
  149. beq 5f
  150. @ Either 1, 2 or 3 comparison/subtractions are left.
  151. 2: cmn \order, #2
  152. blt 4f
  153. beq 3f
  154. cmp \dividend, \divisor
  155. subhs \dividend, \dividend, \divisor
  156. mov \divisor, \divisor, lsr #1
  157. 3: cmp \dividend, \divisor
  158. subhs \dividend, \dividend, \divisor
  159. mov \divisor, \divisor, lsr #1
  160. 4: cmp \dividend, \divisor
  161. subhs \dividend, \dividend, \divisor
  162. 5:
  163. .endm
  164. #ifdef CONFIG_ARM_PATCH_IDIV
  165. .align 3
  166. #endif
  167. ENTRY(__udivsi3)
  168. ENTRY(__aeabi_uidiv)
  169. UNWIND(.fnstart)
  170. subs r2, r1, #1
  171. reteq lr
  172. bcc Ldiv0
  173. cmp r0, r1
  174. bls 11f
  175. tst r1, r2
  176. beq 12f
  177. ARM_DIV_BODY r0, r1, r2, r3
  178. mov r0, r2
  179. ret lr
  180. 11: moveq r0, #1
  181. movne r0, #0
  182. ret lr
  183. 12: ARM_DIV2_ORDER r1, r2
  184. mov r0, r0, lsr r2
  185. ret lr
  186. UNWIND(.fnend)
  187. ENDPROC(__udivsi3)
  188. ENDPROC(__aeabi_uidiv)
  189. ENTRY(__umodsi3)
  190. UNWIND(.fnstart)
  191. subs r2, r1, #1 @ compare divisor with 1
  192. bcc Ldiv0
  193. cmpne r0, r1 @ compare dividend with divisor
  194. moveq r0, #0
  195. tsthi r1, r2 @ see if divisor is power of 2
  196. andeq r0, r0, r2
  197. retls lr
  198. ARM_MOD_BODY r0, r1, r2, r3
  199. ret lr
  200. UNWIND(.fnend)
  201. ENDPROC(__umodsi3)
  202. #ifdef CONFIG_ARM_PATCH_IDIV
  203. .align 3
  204. #endif
  205. ENTRY(__divsi3)
  206. ENTRY(__aeabi_idiv)
  207. UNWIND(.fnstart)
  208. cmp r1, #0
  209. eor ip, r0, r1 @ save the sign of the result.
  210. beq Ldiv0
  211. rsbmi r1, r1, #0 @ loops below use unsigned.
  212. subs r2, r1, #1 @ division by 1 or -1 ?
  213. beq 10f
  214. movs r3, r0
  215. rsbmi r3, r0, #0 @ positive dividend value
  216. cmp r3, r1
  217. bls 11f
  218. tst r1, r2 @ divisor is power of 2 ?
  219. beq 12f
  220. ARM_DIV_BODY r3, r1, r0, r2
  221. cmp ip, #0
  222. rsbmi r0, r0, #0
  223. ret lr
  224. 10: teq ip, r0 @ same sign ?
  225. rsbmi r0, r0, #0
  226. ret lr
  227. 11: movlo r0, #0
  228. moveq r0, ip, asr #31
  229. orreq r0, r0, #1
  230. ret lr
  231. 12: ARM_DIV2_ORDER r1, r2
  232. cmp ip, #0
  233. mov r0, r3, lsr r2
  234. rsbmi r0, r0, #0
  235. ret lr
  236. UNWIND(.fnend)
  237. ENDPROC(__divsi3)
  238. ENDPROC(__aeabi_idiv)
  239. ENTRY(__modsi3)
  240. UNWIND(.fnstart)
  241. cmp r1, #0
  242. beq Ldiv0
  243. rsbmi r1, r1, #0 @ loops below use unsigned.
  244. movs ip, r0 @ preserve sign of dividend
  245. rsbmi r0, r0, #0 @ if negative make positive
  246. subs r2, r1, #1 @ compare divisor with 1
  247. cmpne r0, r1 @ compare dividend with divisor
  248. moveq r0, #0
  249. tsthi r1, r2 @ see if divisor is power of 2
  250. andeq r0, r0, r2
  251. bls 10f
  252. ARM_MOD_BODY r0, r1, r2, r3
  253. 10: cmp ip, #0
  254. rsbmi r0, r0, #0
  255. ret lr
  256. UNWIND(.fnend)
  257. ENDPROC(__modsi3)
  258. #ifdef CONFIG_AEABI
  259. ENTRY(__aeabi_uidivmod)
  260. UNWIND(.fnstart)
  261. UNWIND(.save {r0, r1, ip, lr} )
  262. stmfd sp!, {r0, r1, ip, lr}
  263. bl __aeabi_uidiv
  264. ldmfd sp!, {r1, r2, ip, lr}
  265. mul r3, r0, r2
  266. sub r1, r1, r3
  267. ret lr
  268. UNWIND(.fnend)
  269. ENDPROC(__aeabi_uidivmod)
  270. ENTRY(__aeabi_idivmod)
  271. UNWIND(.fnstart)
  272. UNWIND(.save {r0, r1, ip, lr} )
  273. stmfd sp!, {r0, r1, ip, lr}
  274. bl __aeabi_idiv
  275. ldmfd sp!, {r1, r2, ip, lr}
  276. mul r3, r0, r2
  277. sub r1, r1, r3
  278. ret lr
  279. UNWIND(.fnend)
  280. ENDPROC(__aeabi_idivmod)
  281. #endif
  282. Ldiv0:
  283. UNWIND(.fnstart)
  284. UNWIND(.pad #4)
  285. UNWIND(.save {lr})
  286. str lr, [sp, #-8]!
  287. bl __div0
  288. mov r0, #0 @ About as wrong as it could be.
  289. ldr pc, [sp], #8
  290. UNWIND(.fnend)
  291. ENDPROC(Ldiv0)