copy_user_64.S 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. /*
  2. * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
  3. * Copyright 2002 Andi Kleen, SuSE Labs.
  4. * Subject to the GNU Public License v2.
  5. *
  6. * Functions to copy from and to user space.
  7. */
  8. #include <linux/linkage.h>
  9. #include <asm/current.h>
  10. #include <asm/asm-offsets.h>
  11. #include <asm/thread_info.h>
  12. #include <asm/cpufeature.h>
  13. #include <asm/alternative-asm.h>
  14. #include <asm/asm.h>
  15. #include <asm/smap.h>
  16. /* Standard copy_to_user with segment limit checking */
  17. ENTRY(_copy_to_user)
  18. GET_THREAD_INFO(%rax)
  19. movq %rdi,%rcx
  20. addq %rdx,%rcx
  21. jc bad_to_user
  22. cmpq TI_addr_limit(%rax),%rcx
  23. ja bad_to_user
  24. ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
  25. "jmp copy_user_generic_string", \
  26. X86_FEATURE_REP_GOOD, \
  27. "jmp copy_user_enhanced_fast_string", \
  28. X86_FEATURE_ERMS
  29. ENDPROC(_copy_to_user)
  30. /* Standard copy_from_user with segment limit checking */
  31. ENTRY(_copy_from_user)
  32. GET_THREAD_INFO(%rax)
  33. movq %rsi,%rcx
  34. addq %rdx,%rcx
  35. jc bad_from_user
  36. cmpq TI_addr_limit(%rax),%rcx
  37. ja bad_from_user
  38. ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
  39. "jmp copy_user_generic_string", \
  40. X86_FEATURE_REP_GOOD, \
  41. "jmp copy_user_enhanced_fast_string", \
  42. X86_FEATURE_ERMS
  43. ENDPROC(_copy_from_user)
  44. .section .fixup,"ax"
  45. /* must zero dest */
  46. ENTRY(bad_from_user)
  47. bad_from_user:
  48. movl %edx,%ecx
  49. xorl %eax,%eax
  50. rep
  51. stosb
  52. bad_to_user:
  53. movl %edx,%eax
  54. ret
  55. ENDPROC(bad_from_user)
  56. .previous
  57. /*
  58. * copy_user_generic_unrolled - memory copy with exception handling.
  59. * This version is for CPUs like P4 that don't have efficient micro
  60. * code for rep movsq
  61. *
  62. * Input:
  63. * rdi destination
  64. * rsi source
  65. * rdx count
  66. *
  67. * Output:
  68. * eax uncopied bytes or 0 if successful.
  69. */
  70. ENTRY(copy_user_generic_unrolled)
  71. ASM_STAC
  72. cmpl $8,%edx
  73. jb 20f /* less then 8 bytes, go to byte copy loop */
  74. ALIGN_DESTINATION
  75. movl %edx,%ecx
  76. andl $63,%edx
  77. shrl $6,%ecx
  78. jz 17f
  79. 1: movq (%rsi),%r8
  80. 2: movq 1*8(%rsi),%r9
  81. 3: movq 2*8(%rsi),%r10
  82. 4: movq 3*8(%rsi),%r11
  83. 5: movq %r8,(%rdi)
  84. 6: movq %r9,1*8(%rdi)
  85. 7: movq %r10,2*8(%rdi)
  86. 8: movq %r11,3*8(%rdi)
  87. 9: movq 4*8(%rsi),%r8
  88. 10: movq 5*8(%rsi),%r9
  89. 11: movq 6*8(%rsi),%r10
  90. 12: movq 7*8(%rsi),%r11
  91. 13: movq %r8,4*8(%rdi)
  92. 14: movq %r9,5*8(%rdi)
  93. 15: movq %r10,6*8(%rdi)
  94. 16: movq %r11,7*8(%rdi)
  95. leaq 64(%rsi),%rsi
  96. leaq 64(%rdi),%rdi
  97. decl %ecx
  98. jnz 1b
  99. 17: movl %edx,%ecx
  100. andl $7,%edx
  101. shrl $3,%ecx
  102. jz 20f
  103. 18: movq (%rsi),%r8
  104. 19: movq %r8,(%rdi)
  105. leaq 8(%rsi),%rsi
  106. leaq 8(%rdi),%rdi
  107. decl %ecx
  108. jnz 18b
  109. 20: andl %edx,%edx
  110. jz 23f
  111. movl %edx,%ecx
  112. 21: movb (%rsi),%al
  113. 22: movb %al,(%rdi)
  114. incq %rsi
  115. incq %rdi
  116. decl %ecx
  117. jnz 21b
  118. 23: xor %eax,%eax
  119. ASM_CLAC
  120. ret
  121. .section .fixup,"ax"
  122. 30: shll $6,%ecx
  123. addl %ecx,%edx
  124. jmp 60f
  125. 40: leal (%rdx,%rcx,8),%edx
  126. jmp 60f
  127. 50: movl %ecx,%edx
  128. 60: jmp copy_user_handle_tail /* ecx is zerorest also */
  129. .previous
  130. _ASM_EXTABLE(1b,30b)
  131. _ASM_EXTABLE(2b,30b)
  132. _ASM_EXTABLE(3b,30b)
  133. _ASM_EXTABLE(4b,30b)
  134. _ASM_EXTABLE(5b,30b)
  135. _ASM_EXTABLE(6b,30b)
  136. _ASM_EXTABLE(7b,30b)
  137. _ASM_EXTABLE(8b,30b)
  138. _ASM_EXTABLE(9b,30b)
  139. _ASM_EXTABLE(10b,30b)
  140. _ASM_EXTABLE(11b,30b)
  141. _ASM_EXTABLE(12b,30b)
  142. _ASM_EXTABLE(13b,30b)
  143. _ASM_EXTABLE(14b,30b)
  144. _ASM_EXTABLE(15b,30b)
  145. _ASM_EXTABLE(16b,30b)
  146. _ASM_EXTABLE(18b,40b)
  147. _ASM_EXTABLE(19b,40b)
  148. _ASM_EXTABLE(21b,50b)
  149. _ASM_EXTABLE(22b,50b)
  150. ENDPROC(copy_user_generic_unrolled)
  151. /* Some CPUs run faster using the string copy instructions.
  152. * This is also a lot simpler. Use them when possible.
  153. *
  154. * Only 4GB of copy is supported. This shouldn't be a problem
  155. * because the kernel normally only writes from/to page sized chunks
  156. * even if user space passed a longer buffer.
  157. * And more would be dangerous because both Intel and AMD have
  158. * errata with rep movsq > 4GB. If someone feels the need to fix
  159. * this please consider this.
  160. *
  161. * Input:
  162. * rdi destination
  163. * rsi source
  164. * rdx count
  165. *
  166. * Output:
  167. * eax uncopied bytes or 0 if successful.
  168. */
  169. ENTRY(copy_user_generic_string)
  170. ASM_STAC
  171. cmpl $8,%edx
  172. jb 2f /* less than 8 bytes, go to byte copy loop */
  173. ALIGN_DESTINATION
  174. movl %edx,%ecx
  175. shrl $3,%ecx
  176. andl $7,%edx
  177. 1: rep
  178. movsq
  179. 2: movl %edx,%ecx
  180. 3: rep
  181. movsb
  182. xorl %eax,%eax
  183. ASM_CLAC
  184. ret
  185. .section .fixup,"ax"
  186. 11: leal (%rdx,%rcx,8),%ecx
  187. 12: movl %ecx,%edx /* ecx is zerorest also */
  188. jmp copy_user_handle_tail
  189. .previous
  190. _ASM_EXTABLE(1b,11b)
  191. _ASM_EXTABLE(3b,12b)
  192. ENDPROC(copy_user_generic_string)
  193. /*
  194. * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
  195. * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
  196. *
  197. * Input:
  198. * rdi destination
  199. * rsi source
  200. * rdx count
  201. *
  202. * Output:
  203. * eax uncopied bytes or 0 if successful.
  204. */
  205. ENTRY(copy_user_enhanced_fast_string)
  206. ASM_STAC
  207. movl %edx,%ecx
  208. 1: rep
  209. movsb
  210. xorl %eax,%eax
  211. ASM_CLAC
  212. ret
  213. .section .fixup,"ax"
  214. 12: movl %ecx,%edx /* ecx is zerorest also */
  215. jmp copy_user_handle_tail
  216. .previous
  217. _ASM_EXTABLE(1b,12b)
  218. ENDPROC(copy_user_enhanced_fast_string)
  219. /*
  220. * copy_user_nocache - Uncached memory copy with exception handling
  221. * This will force destination/source out of cache for more performance.
  222. */
  223. ENTRY(__copy_user_nocache)
  224. ASM_STAC
  225. cmpl $8,%edx
  226. jb 20f /* less then 8 bytes, go to byte copy loop */
  227. ALIGN_DESTINATION
  228. movl %edx,%ecx
  229. andl $63,%edx
  230. shrl $6,%ecx
  231. jz 17f
  232. 1: movq (%rsi),%r8
  233. 2: movq 1*8(%rsi),%r9
  234. 3: movq 2*8(%rsi),%r10
  235. 4: movq 3*8(%rsi),%r11
  236. 5: movnti %r8,(%rdi)
  237. 6: movnti %r9,1*8(%rdi)
  238. 7: movnti %r10,2*8(%rdi)
  239. 8: movnti %r11,3*8(%rdi)
  240. 9: movq 4*8(%rsi),%r8
  241. 10: movq 5*8(%rsi),%r9
  242. 11: movq 6*8(%rsi),%r10
  243. 12: movq 7*8(%rsi),%r11
  244. 13: movnti %r8,4*8(%rdi)
  245. 14: movnti %r9,5*8(%rdi)
  246. 15: movnti %r10,6*8(%rdi)
  247. 16: movnti %r11,7*8(%rdi)
  248. leaq 64(%rsi),%rsi
  249. leaq 64(%rdi),%rdi
  250. decl %ecx
  251. jnz 1b
  252. 17: movl %edx,%ecx
  253. andl $7,%edx
  254. shrl $3,%ecx
  255. jz 20f
  256. 18: movq (%rsi),%r8
  257. 19: movnti %r8,(%rdi)
  258. leaq 8(%rsi),%rsi
  259. leaq 8(%rdi),%rdi
  260. decl %ecx
  261. jnz 18b
  262. 20: andl %edx,%edx
  263. jz 23f
  264. movl %edx,%ecx
  265. 21: movb (%rsi),%al
  266. 22: movb %al,(%rdi)
  267. incq %rsi
  268. incq %rdi
  269. decl %ecx
  270. jnz 21b
  271. 23: xorl %eax,%eax
  272. ASM_CLAC
  273. sfence
  274. ret
  275. .section .fixup,"ax"
  276. 30: shll $6,%ecx
  277. addl %ecx,%edx
  278. jmp 60f
  279. 40: lea (%rdx,%rcx,8),%rdx
  280. jmp 60f
  281. 50: movl %ecx,%edx
  282. 60: sfence
  283. jmp copy_user_handle_tail
  284. .previous
  285. _ASM_EXTABLE(1b,30b)
  286. _ASM_EXTABLE(2b,30b)
  287. _ASM_EXTABLE(3b,30b)
  288. _ASM_EXTABLE(4b,30b)
  289. _ASM_EXTABLE(5b,30b)
  290. _ASM_EXTABLE(6b,30b)
  291. _ASM_EXTABLE(7b,30b)
  292. _ASM_EXTABLE(8b,30b)
  293. _ASM_EXTABLE(9b,30b)
  294. _ASM_EXTABLE(10b,30b)
  295. _ASM_EXTABLE(11b,30b)
  296. _ASM_EXTABLE(12b,30b)
  297. _ASM_EXTABLE(13b,30b)
  298. _ASM_EXTABLE(14b,30b)
  299. _ASM_EXTABLE(15b,30b)
  300. _ASM_EXTABLE(16b,30b)
  301. _ASM_EXTABLE(18b,40b)
  302. _ASM_EXTABLE(19b,40b)
  303. _ASM_EXTABLE(21b,50b)
  304. _ASM_EXTABLE(22b,50b)
  305. ENDPROC(__copy_user_nocache)