usercopy.S 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. /*
  2. * arch/xtensa/lib/usercopy.S
  3. *
  4. * Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
  5. *
  6. * DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
  7. * It needs to remain separate and distinct. The hal files are part
  8. * of the Xtensa link-time HAL, and those files may differ per
  9. * processor configuration. Patching the kernel for another
  10. * processor configuration includes replacing the hal files, and we
  11. * could lose the special functionality for accessing user-space
  12. * memory during such a patch. We sacrifice a little code space here
  13. * in favor to simplify code maintenance.
  14. *
  15. * This file is subject to the terms and conditions of the GNU General
  16. * Public License. See the file "COPYING" in the main directory of
  17. * this archive for more details.
  18. *
  19. * Copyright (C) 2002 Tensilica Inc.
  20. */
  21. /*
  22. * size_t __xtensa_copy_user (void *dst, const void *src, size_t len);
  23. *
  24. * The returned value is the number of bytes not copied. Implies zero
  25. * is success.
  26. *
  27. * The general case algorithm is as follows:
  28. * If the destination and source are both aligned,
  29. * do 16B chunks with a loop, and then finish up with
  30. * 8B, 4B, 2B, and 1B copies conditional on the length.
  31. * If destination is aligned and source unaligned,
  32. * do the same, but use SRC to align the source data.
  33. * If destination is unaligned, align it by conditionally
  34. * copying 1B and 2B and then retest.
  35. * This code tries to use fall-through braches for the common
  36. * case of aligned destinations (except for the branches to
  37. * the alignment label).
  38. *
  39. * Register use:
  40. * a0/ return address
  41. * a1/ stack pointer
  42. * a2/ return value
  43. * a3/ src
  44. * a4/ length
  45. * a5/ dst
  46. * a6/ tmp
  47. * a7/ tmp
  48. * a8/ tmp
  49. * a9/ tmp
  50. * a10/ tmp
  51. * a11/ original length
  52. */
  53. #include <linux/linkage.h>
  54. #include <variant/core.h>
  55. #include <asm/asmmacro.h>
  56. .text
  57. ENTRY(__xtensa_copy_user)
  58. entry sp, 16 # minimal stack frame
  59. # a2/ dst, a3/ src, a4/ len
  60. mov a5, a2 # copy dst so that a2 is return value
  61. mov a11, a4 # preserve original len for error case
  62. .Lcommon:
  63. bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2
  64. bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4
  65. .Ldstaligned: # return here from .Ldstunaligned when dst is aligned
  66. srli a7, a4, 4 # number of loop iterations with 16B
  67. # per iteration
  68. movi a8, 3 # if source is also aligned,
  69. bnone a3, a8, .Laligned # then use word copy
  70. __ssa8 a3 # set shift amount from byte offset
  71. bnez a4, .Lsrcunaligned
  72. movi a2, 0 # return success for len==0
  73. retw
  74. /*
  75. * Destination is unaligned
  76. */
  77. .Ldst1mod2: # dst is only byte aligned
  78. bltui a4, 7, .Lbytecopy # do short copies byte by byte
  79. # copy 1 byte
  80. EX(10f) l8ui a6, a3, 0
  81. addi a3, a3, 1
  82. EX(10f) s8i a6, a5, 0
  83. addi a5, a5, 1
  84. addi a4, a4, -1
  85. bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then
  86. # return to main algorithm
  87. .Ldst2mod4: # dst 16-bit aligned
  88. # copy 2 bytes
  89. bltui a4, 6, .Lbytecopy # do short copies byte by byte
  90. EX(10f) l8ui a6, a3, 0
  91. EX(10f) l8ui a7, a3, 1
  92. addi a3, a3, 2
  93. EX(10f) s8i a6, a5, 0
  94. EX(10f) s8i a7, a5, 1
  95. addi a5, a5, 2
  96. addi a4, a4, -2
  97. j .Ldstaligned # dst is now aligned, return to main algorithm
  98. /*
  99. * Byte by byte copy
  100. */
  101. .align 4
  102. .byte 0 # 1 mod 4 alignment for LOOPNEZ
  103. # (0 mod 4 alignment for LBEG)
  104. .Lbytecopy:
  105. #if XCHAL_HAVE_LOOPS
  106. loopnez a4, .Lbytecopydone
  107. #else /* !XCHAL_HAVE_LOOPS */
  108. beqz a4, .Lbytecopydone
  109. add a7, a3, a4 # a7 = end address for source
  110. #endif /* !XCHAL_HAVE_LOOPS */
  111. .Lnextbyte:
  112. EX(10f) l8ui a6, a3, 0
  113. addi a3, a3, 1
  114. EX(10f) s8i a6, a5, 0
  115. addi a5, a5, 1
  116. #if !XCHAL_HAVE_LOOPS
  117. blt a3, a7, .Lnextbyte
  118. #endif /* !XCHAL_HAVE_LOOPS */
  119. .Lbytecopydone:
  120. movi a2, 0 # return success for len bytes copied
  121. retw
  122. /*
  123. * Destination and source are word-aligned.
  124. */
  125. # copy 16 bytes per iteration for word-aligned dst and word-aligned src
  126. .align 4 # 1 mod 4 alignment for LOOPNEZ
  127. .byte 0 # (0 mod 4 alignment for LBEG)
  128. .Laligned:
  129. #if XCHAL_HAVE_LOOPS
  130. loopnez a7, .Loop1done
  131. #else /* !XCHAL_HAVE_LOOPS */
  132. beqz a7, .Loop1done
  133. slli a8, a7, 4
  134. add a8, a8, a3 # a8 = end of last 16B source chunk
  135. #endif /* !XCHAL_HAVE_LOOPS */
  136. .Loop1:
  137. EX(10f) l32i a6, a3, 0
  138. EX(10f) l32i a7, a3, 4
  139. EX(10f) s32i a6, a5, 0
  140. EX(10f) l32i a6, a3, 8
  141. EX(10f) s32i a7, a5, 4
  142. EX(10f) l32i a7, a3, 12
  143. EX(10f) s32i a6, a5, 8
  144. addi a3, a3, 16
  145. EX(10f) s32i a7, a5, 12
  146. addi a5, a5, 16
  147. #if !XCHAL_HAVE_LOOPS
  148. blt a3, a8, .Loop1
  149. #endif /* !XCHAL_HAVE_LOOPS */
  150. .Loop1done:
  151. bbci.l a4, 3, .L2
  152. # copy 8 bytes
  153. EX(10f) l32i a6, a3, 0
  154. EX(10f) l32i a7, a3, 4
  155. addi a3, a3, 8
  156. EX(10f) s32i a6, a5, 0
  157. EX(10f) s32i a7, a5, 4
  158. addi a5, a5, 8
  159. .L2:
  160. bbci.l a4, 2, .L3
  161. # copy 4 bytes
  162. EX(10f) l32i a6, a3, 0
  163. addi a3, a3, 4
  164. EX(10f) s32i a6, a5, 0
  165. addi a5, a5, 4
  166. .L3:
  167. bbci.l a4, 1, .L4
  168. # copy 2 bytes
  169. EX(10f) l16ui a6, a3, 0
  170. addi a3, a3, 2
  171. EX(10f) s16i a6, a5, 0
  172. addi a5, a5, 2
  173. .L4:
  174. bbci.l a4, 0, .L5
  175. # copy 1 byte
  176. EX(10f) l8ui a6, a3, 0
  177. EX(10f) s8i a6, a5, 0
  178. .L5:
  179. movi a2, 0 # return success for len bytes copied
  180. retw
  181. /*
  182. * Destination is aligned, Source is unaligned
  183. */
  184. .align 4
  185. .byte 0 # 1 mod 4 alignement for LOOPNEZ
  186. # (0 mod 4 alignment for LBEG)
  187. .Lsrcunaligned:
  188. # copy 16 bytes per iteration for word-aligned dst and unaligned src
  189. and a10, a3, a8 # save unalignment offset for below
  190. sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware)
  191. EX(10f) l32i a6, a3, 0 # load first word
  192. #if XCHAL_HAVE_LOOPS
  193. loopnez a7, .Loop2done
  194. #else /* !XCHAL_HAVE_LOOPS */
  195. beqz a7, .Loop2done
  196. slli a12, a7, 4
  197. add a12, a12, a3 # a12 = end of last 16B source chunk
  198. #endif /* !XCHAL_HAVE_LOOPS */
  199. .Loop2:
  200. EX(10f) l32i a7, a3, 4
  201. EX(10f) l32i a8, a3, 8
  202. __src_b a6, a6, a7
  203. EX(10f) s32i a6, a5, 0
  204. EX(10f) l32i a9, a3, 12
  205. __src_b a7, a7, a8
  206. EX(10f) s32i a7, a5, 4
  207. EX(10f) l32i a6, a3, 16
  208. __src_b a8, a8, a9
  209. EX(10f) s32i a8, a5, 8
  210. addi a3, a3, 16
  211. __src_b a9, a9, a6
  212. EX(10f) s32i a9, a5, 12
  213. addi a5, a5, 16
  214. #if !XCHAL_HAVE_LOOPS
  215. blt a3, a12, .Loop2
  216. #endif /* !XCHAL_HAVE_LOOPS */
  217. .Loop2done:
  218. bbci.l a4, 3, .L12
  219. # copy 8 bytes
  220. EX(10f) l32i a7, a3, 4
  221. EX(10f) l32i a8, a3, 8
  222. __src_b a6, a6, a7
  223. EX(10f) s32i a6, a5, 0
  224. addi a3, a3, 8
  225. __src_b a7, a7, a8
  226. EX(10f) s32i a7, a5, 4
  227. addi a5, a5, 8
  228. mov a6, a8
  229. .L12:
  230. bbci.l a4, 2, .L13
  231. # copy 4 bytes
  232. EX(10f) l32i a7, a3, 4
  233. addi a3, a3, 4
  234. __src_b a6, a6, a7
  235. EX(10f) s32i a6, a5, 0
  236. addi a5, a5, 4
  237. mov a6, a7
  238. .L13:
  239. add a3, a3, a10 # readjust a3 with correct misalignment
  240. bbci.l a4, 1, .L14
  241. # copy 2 bytes
  242. EX(10f) l8ui a6, a3, 0
  243. EX(10f) l8ui a7, a3, 1
  244. addi a3, a3, 2
  245. EX(10f) s8i a6, a5, 0
  246. EX(10f) s8i a7, a5, 1
  247. addi a5, a5, 2
  248. .L14:
  249. bbci.l a4, 0, .L15
  250. # copy 1 byte
  251. EX(10f) l8ui a6, a3, 0
  252. EX(10f) s8i a6, a5, 0
  253. .L15:
  254. movi a2, 0 # return success for len bytes copied
  255. retw
  256. ENDPROC(__xtensa_copy_user)
  257. .section .fixup, "ax"
  258. .align 4
  259. /* a2 = original dst; a5 = current dst; a11= original len
  260. * bytes_copied = a5 - a2
  261. * retval = bytes_not_copied = original len - bytes_copied
  262. * retval = a11 - (a5 - a2)
  263. */
  264. 10:
  265. sub a2, a5, a2 /* a2 <-- bytes copied */
  266. sub a2, a11, a2 /* a2 <-- bytes not copied */
  267. retw