memcpy.S 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740
  1. /*
  2. * This file is subject to the terms and conditions of the GNU General Public
  3. * License. See the file "COPYING" in the main directory of this archive
  4. * for more details.
  5. *
  6. * Unified implementation of memcpy, memmove and the __copy_user backend.
  7. *
  8. * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org)
  9. * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
  10. * Copyright (C) 2002 Broadcom, Inc.
  11. * memcpy/copy_user author: Mark Vandevoorde
  12. * Copyright (C) 2007 Maciej W. Rozycki
  13. * Copyright (C) 2014 Imagination Technologies Ltd.
  14. *
  15. * Mnemonic names for arguments to memcpy/__copy_user
  16. */
  17. /*
  18. * Hack to resolve longstanding prefetch issue
  19. *
  20. * Prefetching may be fatal on some systems if we're prefetching beyond the
  21. * end of memory on some systems. It's also a seriously bad idea on non
  22. * dma-coherent systems.
  23. */
  24. #ifdef CONFIG_DMA_NONCOHERENT
  25. #undef CONFIG_CPU_HAS_PREFETCH
  26. #endif
  27. #ifdef CONFIG_MIPS_MALTA
  28. #undef CONFIG_CPU_HAS_PREFETCH
  29. #endif
  30. #include <asm/asm.h>
  31. #include <asm/asm-offsets.h>
  32. #include <asm/regdef.h>
  33. #define dst a0
  34. #define src a1
  35. #define len a2
  36. /*
  37. * Spec
  38. *
  39. * memcpy copies len bytes from src to dst and sets v0 to dst.
  40. * It assumes that
  41. * - src and dst don't overlap
  42. * - src is readable
  43. * - dst is writable
  44. * memcpy uses the standard calling convention
  45. *
  46. * __copy_user copies up to len bytes from src to dst and sets a2 (len) to
  47. * the number of uncopied bytes due to an exception caused by a read or write.
  48. * __copy_user assumes that src and dst don't overlap, and that the call is
  49. * implementing one of the following:
  50. * copy_to_user
  51. * - src is readable (no exceptions when reading src)
  52. * copy_from_user
  53. * - dst is writable (no exceptions when writing dst)
  54. * __copy_user uses a non-standard calling convention; see
  55. * include/asm-mips/uaccess.h
  56. *
  57. * When an exception happens on a load, the handler must
  58. # ensure that all of the destination buffer is overwritten to prevent
  59. * leaking information to user mode programs.
  60. */
  61. /*
  62. * Implementation
  63. */
  64. /*
  65. * The exception handler for loads requires that:
  66. * 1- AT contain the address of the byte just past the end of the source
  67. * of the copy,
  68. * 2- src_entry <= src < AT, and
  69. * 3- (dst - src) == (dst_entry - src_entry),
  70. * The _entry suffix denotes values when __copy_user was called.
  71. *
  72. * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user
  73. * (2) is met by incrementing src by the number of bytes copied
  74. * (3) is met by not doing loads between a pair of increments of dst and src
  75. *
  76. * The exception handlers for stores adjust len (if necessary) and return.
  77. * These handlers do not need to overwrite any data.
  78. *
  79. * For __rmemcpy and memmove an exception is always a kernel bug, therefore
  80. * they're not protected.
  81. */
  82. /* Instruction type */
  83. #define LD_INSN 1
  84. #define ST_INSN 2
  85. /* Pretech type */
  86. #define SRC_PREFETCH 1
  87. #define DST_PREFETCH 2
  88. #define LEGACY_MODE 1
  89. #define EVA_MODE 2
  90. #define USEROP 1
  91. #define KERNELOP 2
  92. /*
  93. * Wrapper to add an entry in the exception table
  94. * in case the insn causes a memory exception.
  95. * Arguments:
  96. * insn : Load/store instruction
  97. * type : Instruction type
  98. * reg : Register
  99. * addr : Address
  100. * handler : Exception handler
  101. */
  102. #define EXC(insn, type, reg, addr, handler) \
  103. .if \mode == LEGACY_MODE; \
  104. 9: insn reg, addr; \
  105. .section __ex_table,"a"; \
  106. PTR 9b, handler; \
  107. .previous; \
  108. /* This is assembled in EVA mode */ \
  109. .else; \
  110. /* If loading from user or storing to user */ \
  111. .if ((\from == USEROP) && (type == LD_INSN)) || \
  112. ((\to == USEROP) && (type == ST_INSN)); \
  113. 9: __BUILD_EVA_INSN(insn##e, reg, addr); \
  114. .section __ex_table,"a"; \
  115. PTR 9b, handler; \
  116. .previous; \
  117. .else; \
  118. /* \
  119. * Still in EVA, but no need for \
  120. * exception handler or EVA insn \
  121. */ \
  122. insn reg, addr; \
  123. .endif; \
  124. .endif
  125. /*
  126. * Only on the 64-bit kernel we can made use of 64-bit registers.
  127. */
  128. #ifdef CONFIG_64BIT
  129. #define USE_DOUBLE
  130. #endif
  131. #ifdef USE_DOUBLE
  132. #define LOADK ld /* No exception */
  133. #define LOAD(reg, addr, handler) EXC(ld, LD_INSN, reg, addr, handler)
  134. #define LOADL(reg, addr, handler) EXC(ldl, LD_INSN, reg, addr, handler)
  135. #define LOADR(reg, addr, handler) EXC(ldr, LD_INSN, reg, addr, handler)
  136. #define STOREL(reg, addr, handler) EXC(sdl, ST_INSN, reg, addr, handler)
  137. #define STORER(reg, addr, handler) EXC(sdr, ST_INSN, reg, addr, handler)
  138. #define STORE(reg, addr, handler) EXC(sd, ST_INSN, reg, addr, handler)
  139. #define ADD daddu
  140. #define SUB dsubu
  141. #define SRL dsrl
  142. #define SRA dsra
  143. #define SLL dsll
  144. #define SLLV dsllv
  145. #define SRLV dsrlv
  146. #define NBYTES 8
  147. #define LOG_NBYTES 3
  148. /*
  149. * As we are sharing code base with the mips32 tree (which use the o32 ABI
  150. * register definitions). We need to redefine the register definitions from
  151. * the n64 ABI register naming to the o32 ABI register naming.
  152. */
  153. #undef t0
  154. #undef t1
  155. #undef t2
  156. #undef t3
  157. #define t0 $8
  158. #define t1 $9
  159. #define t2 $10
  160. #define t3 $11
  161. #define t4 $12
  162. #define t5 $13
  163. #define t6 $14
  164. #define t7 $15
  165. #else
  166. #define LOADK lw /* No exception */
  167. #define LOAD(reg, addr, handler) EXC(lw, LD_INSN, reg, addr, handler)
  168. #define LOADL(reg, addr, handler) EXC(lwl, LD_INSN, reg, addr, handler)
  169. #define LOADR(reg, addr, handler) EXC(lwr, LD_INSN, reg, addr, handler)
  170. #define STOREL(reg, addr, handler) EXC(swl, ST_INSN, reg, addr, handler)
  171. #define STORER(reg, addr, handler) EXC(swr, ST_INSN, reg, addr, handler)
  172. #define STORE(reg, addr, handler) EXC(sw, ST_INSN, reg, addr, handler)
  173. #define ADD addu
  174. #define SUB subu
  175. #define SRL srl
  176. #define SLL sll
  177. #define SRA sra
  178. #define SLLV sllv
  179. #define SRLV srlv
  180. #define NBYTES 4
  181. #define LOG_NBYTES 2
  182. #endif /* USE_DOUBLE */
  183. #define LOADB(reg, addr, handler) EXC(lb, LD_INSN, reg, addr, handler)
  184. #define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler)
  185. #define _PREF(hint, addr, type) \
  186. .if \mode == LEGACY_MODE; \
  187. PREF(hint, addr); \
  188. .else; \
  189. .if ((\from == USEROP) && (type == SRC_PREFETCH)) || \
  190. ((\to == USEROP) && (type == DST_PREFETCH)); \
  191. /* \
  192. * PREFE has only 9 bits for the offset \
  193. * compared to PREF which has 16, so it may \
  194. * need to use the $at register but this \
  195. * register should remain intact because it's \
  196. * used later on. Therefore use $v1. \
  197. */ \
  198. .set at=v1; \
  199. PREFE(hint, addr); \
  200. .set noat; \
  201. .else; \
  202. PREF(hint, addr); \
  203. .endif; \
  204. .endif
  205. #define PREFS(hint, addr) _PREF(hint, addr, SRC_PREFETCH)
  206. #define PREFD(hint, addr) _PREF(hint, addr, DST_PREFETCH)
  207. #ifdef CONFIG_CPU_LITTLE_ENDIAN
  208. #define LDFIRST LOADR
  209. #define LDREST LOADL
  210. #define STFIRST STORER
  211. #define STREST STOREL
  212. #define SHIFT_DISCARD SLLV
  213. #else
  214. #define LDFIRST LOADL
  215. #define LDREST LOADR
  216. #define STFIRST STOREL
  217. #define STREST STORER
  218. #define SHIFT_DISCARD SRLV
  219. #endif
  220. #define FIRST(unit) ((unit)*NBYTES)
  221. #define REST(unit) (FIRST(unit)+NBYTES-1)
  222. #define UNIT(unit) FIRST(unit)
  223. #define ADDRMASK (NBYTES-1)
  224. .text
  225. .set noreorder
  226. #ifndef CONFIG_CPU_DADDI_WORKAROUNDS
  227. .set noat
  228. #else
  229. .set at=v1
  230. #endif
  231. .align 5
  232. /*
  233. * Macro to build the __copy_user common code
  234. * Arguments:
  235. * mode : LEGACY_MODE or EVA_MODE
  236. * from : Source operand. USEROP or KERNELOP
  237. * to : Destination operand. USEROP or KERNELOP
  238. */
  239. .macro __BUILD_COPY_USER mode, from, to
  240. /* initialize __memcpy if this the first time we execute this macro */
  241. .ifnotdef __memcpy
  242. .set __memcpy, 1
  243. .hidden __memcpy /* make sure it does not leak */
  244. .endif
  245. /*
  246. * Note: dst & src may be unaligned, len may be 0
  247. * Temps
  248. */
  249. #define rem t8
  250. R10KCBARRIER(0(ra))
  251. /*
  252. * The "issue break"s below are very approximate.
  253. * Issue delays for dcache fills will perturb the schedule, as will
  254. * load queue full replay traps, etc.
  255. *
  256. * If len < NBYTES use byte operations.
  257. */
  258. PREFS( 0, 0(src) )
  259. PREFD( 1, 0(dst) )
  260. sltu t2, len, NBYTES
  261. and t1, dst, ADDRMASK
  262. PREFS( 0, 1*32(src) )
  263. PREFD( 1, 1*32(dst) )
  264. bnez t2, .Lcopy_bytes_checklen\@
  265. and t0, src, ADDRMASK
  266. PREFS( 0, 2*32(src) )
  267. PREFD( 1, 2*32(dst) )
  268. #ifndef CONFIG_CPU_MIPSR6
  269. bnez t1, .Ldst_unaligned\@
  270. nop
  271. bnez t0, .Lsrc_unaligned_dst_aligned\@
  272. #else
  273. or t0, t0, t1
  274. bnez t0, .Lcopy_unaligned_bytes\@
  275. #endif
  276. /*
  277. * use delay slot for fall-through
  278. * src and dst are aligned; need to compute rem
  279. */
  280. .Lboth_aligned\@:
  281. SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter
  282. beqz t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES
  283. and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES)
  284. PREFS( 0, 3*32(src) )
  285. PREFD( 1, 3*32(dst) )
  286. .align 4
  287. 1:
  288. R10KCBARRIER(0(ra))
  289. LOAD(t0, UNIT(0)(src), .Ll_exc\@)
  290. LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@)
  291. LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@)
  292. LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@)
  293. SUB len, len, 8*NBYTES
  294. LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@)
  295. LOAD(t7, UNIT(5)(src), .Ll_exc_copy\@)
  296. STORE(t0, UNIT(0)(dst), .Ls_exc_p8u\@)
  297. STORE(t1, UNIT(1)(dst), .Ls_exc_p7u\@)
  298. LOAD(t0, UNIT(6)(src), .Ll_exc_copy\@)
  299. LOAD(t1, UNIT(7)(src), .Ll_exc_copy\@)
  300. ADD src, src, 8*NBYTES
  301. ADD dst, dst, 8*NBYTES
  302. STORE(t2, UNIT(-6)(dst), .Ls_exc_p6u\@)
  303. STORE(t3, UNIT(-5)(dst), .Ls_exc_p5u\@)
  304. STORE(t4, UNIT(-4)(dst), .Ls_exc_p4u\@)
  305. STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u\@)
  306. STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u\@)
  307. STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u\@)
  308. PREFS( 0, 8*32(src) )
  309. PREFD( 1, 8*32(dst) )
  310. bne len, rem, 1b
  311. nop
  312. /*
  313. * len == rem == the number of bytes left to copy < 8*NBYTES
  314. */
  315. .Lcleanup_both_aligned\@:
  316. beqz len, .Ldone\@
  317. sltu t0, len, 4*NBYTES
  318. bnez t0, .Lless_than_4units\@
  319. and rem, len, (NBYTES-1) # rem = len % NBYTES
  320. /*
  321. * len >= 4*NBYTES
  322. */
  323. LOAD( t0, UNIT(0)(src), .Ll_exc\@)
  324. LOAD( t1, UNIT(1)(src), .Ll_exc_copy\@)
  325. LOAD( t2, UNIT(2)(src), .Ll_exc_copy\@)
  326. LOAD( t3, UNIT(3)(src), .Ll_exc_copy\@)
  327. SUB len, len, 4*NBYTES
  328. ADD src, src, 4*NBYTES
  329. R10KCBARRIER(0(ra))
  330. STORE(t0, UNIT(0)(dst), .Ls_exc_p4u\@)
  331. STORE(t1, UNIT(1)(dst), .Ls_exc_p3u\@)
  332. STORE(t2, UNIT(2)(dst), .Ls_exc_p2u\@)
  333. STORE(t3, UNIT(3)(dst), .Ls_exc_p1u\@)
  334. .set reorder /* DADDI_WAR */
  335. ADD dst, dst, 4*NBYTES
  336. beqz len, .Ldone\@
  337. .set noreorder
  338. .Lless_than_4units\@:
  339. /*
  340. * rem = len % NBYTES
  341. */
  342. beq rem, len, .Lcopy_bytes\@
  343. nop
  344. 1:
  345. R10KCBARRIER(0(ra))
  346. LOAD(t0, 0(src), .Ll_exc\@)
  347. ADD src, src, NBYTES
  348. SUB len, len, NBYTES
  349. STORE(t0, 0(dst), .Ls_exc_p1u\@)
  350. .set reorder /* DADDI_WAR */
  351. ADD dst, dst, NBYTES
  352. bne rem, len, 1b
  353. .set noreorder
  354. #ifndef CONFIG_CPU_MIPSR6
  355. /*
  356. * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
  357. * A loop would do only a byte at a time with possible branch
  358. * mispredicts. Can't do an explicit LOAD dst,mask,or,STORE
  359. * because can't assume read-access to dst. Instead, use
  360. * STREST dst, which doesn't require read access to dst.
  361. *
  362. * This code should perform better than a simple loop on modern,
  363. * wide-issue mips processors because the code has fewer branches and
  364. * more instruction-level parallelism.
  365. */
  366. #define bits t2
  367. beqz len, .Ldone\@
  368. ADD t1, dst, len # t1 is just past last byte of dst
  369. li bits, 8*NBYTES
  370. SLL rem, len, 3 # rem = number of bits to keep
  371. LOAD(t0, 0(src), .Ll_exc\@)
  372. SUB bits, bits, rem # bits = number of bits to discard
  373. SHIFT_DISCARD t0, t0, bits
  374. STREST(t0, -1(t1), .Ls_exc\@)
  375. jr ra
  376. move len, zero
  377. .Ldst_unaligned\@:
  378. /*
  379. * dst is unaligned
  380. * t0 = src & ADDRMASK
  381. * t1 = dst & ADDRMASK; T1 > 0
  382. * len >= NBYTES
  383. *
  384. * Copy enough bytes to align dst
  385. * Set match = (src and dst have same alignment)
  386. */
  387. #define match rem
  388. LDFIRST(t3, FIRST(0)(src), .Ll_exc\@)
  389. ADD t2, zero, NBYTES
  390. LDREST(t3, REST(0)(src), .Ll_exc_copy\@)
  391. SUB t2, t2, t1 # t2 = number of bytes copied
  392. xor match, t0, t1
  393. R10KCBARRIER(0(ra))
  394. STFIRST(t3, FIRST(0)(dst), .Ls_exc\@)
  395. beq len, t2, .Ldone\@
  396. SUB len, len, t2
  397. ADD dst, dst, t2
  398. beqz match, .Lboth_aligned\@
  399. ADD src, src, t2
  400. .Lsrc_unaligned_dst_aligned\@:
  401. SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter
  402. PREFS( 0, 3*32(src) )
  403. beqz t0, .Lcleanup_src_unaligned\@
  404. and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES
  405. PREFD( 1, 3*32(dst) )
  406. 1:
  407. /*
  408. * Avoid consecutive LD*'s to the same register since some mips
  409. * implementations can't issue them in the same cycle.
  410. * It's OK to load FIRST(N+1) before REST(N) because the two addresses
  411. * are to the same unit (unless src is aligned, but it's not).
  412. */
  413. R10KCBARRIER(0(ra))
  414. LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
  415. LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@)
  416. SUB len, len, 4*NBYTES
  417. LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
  418. LDREST(t1, REST(1)(src), .Ll_exc_copy\@)
  419. LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@)
  420. LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@)
  421. LDREST(t2, REST(2)(src), .Ll_exc_copy\@)
  422. LDREST(t3, REST(3)(src), .Ll_exc_copy\@)
  423. PREFS( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed)
  424. ADD src, src, 4*NBYTES
  425. #ifdef CONFIG_CPU_SB1
  426. nop # improves slotting
  427. #endif
  428. STORE(t0, UNIT(0)(dst), .Ls_exc_p4u\@)
  429. STORE(t1, UNIT(1)(dst), .Ls_exc_p3u\@)
  430. STORE(t2, UNIT(2)(dst), .Ls_exc_p2u\@)
  431. STORE(t3, UNIT(3)(dst), .Ls_exc_p1u\@)
  432. PREFD( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed)
  433. .set reorder /* DADDI_WAR */
  434. ADD dst, dst, 4*NBYTES
  435. bne len, rem, 1b
  436. .set noreorder
  437. .Lcleanup_src_unaligned\@:
  438. beqz len, .Ldone\@
  439. and rem, len, NBYTES-1 # rem = len % NBYTES
  440. beq rem, len, .Lcopy_bytes\@
  441. nop
  442. 1:
  443. R10KCBARRIER(0(ra))
  444. LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
  445. LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
  446. ADD src, src, NBYTES
  447. SUB len, len, NBYTES
  448. STORE(t0, 0(dst), .Ls_exc_p1u\@)
  449. .set reorder /* DADDI_WAR */
  450. ADD dst, dst, NBYTES
  451. bne len, rem, 1b
  452. .set noreorder
  453. #endif /* !CONFIG_CPU_MIPSR6 */
  454. .Lcopy_bytes_checklen\@:
  455. beqz len, .Ldone\@
  456. nop
  457. .Lcopy_bytes\@:
  458. /* 0 < len < NBYTES */
  459. R10KCBARRIER(0(ra))
  460. #define COPY_BYTE(N) \
  461. LOADB(t0, N(src), .Ll_exc\@); \
  462. SUB len, len, 1; \
  463. beqz len, .Ldone\@; \
  464. STOREB(t0, N(dst), .Ls_exc_p1\@)
  465. COPY_BYTE(0)
  466. COPY_BYTE(1)
  467. #ifdef USE_DOUBLE
  468. COPY_BYTE(2)
  469. COPY_BYTE(3)
  470. COPY_BYTE(4)
  471. COPY_BYTE(5)
  472. #endif
  473. LOADB(t0, NBYTES-2(src), .Ll_exc\@)
  474. SUB len, len, 1
  475. jr ra
  476. STOREB(t0, NBYTES-2(dst), .Ls_exc_p1\@)
  477. .Ldone\@:
  478. jr ra
  479. nop
  480. #ifdef CONFIG_CPU_MIPSR6
  481. .Lcopy_unaligned_bytes\@:
  482. 1:
  483. COPY_BYTE(0)
  484. COPY_BYTE(1)
  485. COPY_BYTE(2)
  486. COPY_BYTE(3)
  487. COPY_BYTE(4)
  488. COPY_BYTE(5)
  489. COPY_BYTE(6)
  490. COPY_BYTE(7)
  491. ADD src, src, 8
  492. b 1b
  493. ADD dst, dst, 8
  494. #endif /* CONFIG_CPU_MIPSR6 */
  495. .if __memcpy == 1
  496. END(memcpy)
  497. .set __memcpy, 0
  498. .hidden __memcpy
  499. .endif
  500. .Ll_exc_copy\@:
  501. /*
  502. * Copy bytes from src until faulting load address (or until a
  503. * lb faults)
  504. *
  505. * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
  506. * may be more than a byte beyond the last address.
  507. * Hence, the lb below may get an exception.
  508. *
  509. * Assumes src < THREAD_BUADDR($28)
  510. */
  511. LOADK t0, TI_TASK($28)
  512. nop
  513. LOADK t0, THREAD_BUADDR(t0)
  514. 1:
  515. LOADB(t1, 0(src), .Ll_exc\@)
  516. ADD src, src, 1
  517. sb t1, 0(dst) # can't fault -- we're copy_from_user
  518. .set reorder /* DADDI_WAR */
  519. ADD dst, dst, 1
  520. bne src, t0, 1b
  521. .set noreorder
  522. .Ll_exc\@:
  523. LOADK t0, TI_TASK($28)
  524. nop
  525. LOADK t0, THREAD_BUADDR(t0) # t0 is just past last good address
  526. nop
  527. SUB len, AT, t0 # len number of uncopied bytes
  528. bnez t6, .Ldone\@ /* Skip the zeroing part if inatomic */
  529. /*
  530. * Here's where we rely on src and dst being incremented in tandem,
  531. * See (3) above.
  532. * dst += (fault addr - src) to put dst at first byte to clear
  533. */
  534. ADD dst, t0 # compute start address in a1
  535. SUB dst, src
  536. /*
  537. * Clear len bytes starting at dst. Can't call __bzero because it
  538. * might modify len. An inefficient loop for these rare times...
  539. */
  540. .set reorder /* DADDI_WAR */
  541. SUB src, len, 1
  542. beqz len, .Ldone\@
  543. .set noreorder
  544. 1: sb zero, 0(dst)
  545. ADD dst, dst, 1
  546. #ifndef CONFIG_CPU_DADDI_WORKAROUNDS
  547. bnez src, 1b
  548. SUB src, src, 1
  549. #else
  550. .set push
  551. .set noat
  552. li v1, 1
  553. bnez src, 1b
  554. SUB src, src, v1
  555. .set pop
  556. #endif
  557. jr ra
  558. nop
  559. #define SEXC(n) \
  560. .set reorder; /* DADDI_WAR */ \
  561. .Ls_exc_p ## n ## u\@: \
  562. ADD len, len, n*NBYTES; \
  563. jr ra; \
  564. .set noreorder
  565. SEXC(8)
  566. SEXC(7)
  567. SEXC(6)
  568. SEXC(5)
  569. SEXC(4)
  570. SEXC(3)
  571. SEXC(2)
  572. SEXC(1)
  573. .Ls_exc_p1\@:
  574. .set reorder /* DADDI_WAR */
  575. ADD len, len, 1
  576. jr ra
  577. .set noreorder
  578. .Ls_exc\@:
  579. jr ra
  580. nop
  581. .endm
  582. .align 5
  583. LEAF(memmove)
  584. ADD t0, a0, a2
  585. ADD t1, a1, a2
  586. sltu t0, a1, t0 # dst + len <= src -> memcpy
  587. sltu t1, a0, t1 # dst >= src + len -> memcpy
  588. and t0, t1
  589. beqz t0, .L__memcpy
  590. move v0, a0 /* return value */
  591. beqz a2, .Lr_out
  592. END(memmove)
  593. /* fall through to __rmemcpy */
  594. LEAF(__rmemcpy) /* a0=dst a1=src a2=len */
  595. sltu t0, a1, a0
  596. beqz t0, .Lr_end_bytes_up # src >= dst
  597. nop
  598. ADD a0, a2 # dst = dst + len
  599. ADD a1, a2 # src = src + len
  600. .Lr_end_bytes:
  601. R10KCBARRIER(0(ra))
  602. lb t0, -1(a1)
  603. SUB a2, a2, 0x1
  604. sb t0, -1(a0)
  605. SUB a1, a1, 0x1
  606. .set reorder /* DADDI_WAR */
  607. SUB a0, a0, 0x1
  608. bnez a2, .Lr_end_bytes
  609. .set noreorder
  610. .Lr_out:
  611. jr ra
  612. move a2, zero
  613. .Lr_end_bytes_up:
  614. R10KCBARRIER(0(ra))
  615. lb t0, (a1)
  616. SUB a2, a2, 0x1
  617. sb t0, (a0)
  618. ADD a1, a1, 0x1
  619. .set reorder /* DADDI_WAR */
  620. ADD a0, a0, 0x1
  621. bnez a2, .Lr_end_bytes_up
  622. .set noreorder
  623. jr ra
  624. move a2, zero
  625. END(__rmemcpy)
  626. /*
  627. * t6 is used as a flag to note inatomic mode.
  628. */
  629. LEAF(__copy_user_inatomic)
  630. b __copy_user_common
  631. li t6, 1
  632. END(__copy_user_inatomic)
  633. /*
  634. * A combined memcpy/__copy_user
  635. * __copy_user sets len to 0 for success; else to an upper bound of
  636. * the number of uncopied bytes.
  637. * memcpy sets v0 to dst.
  638. */
  639. .align 5
  640. LEAF(memcpy) /* a0=dst a1=src a2=len */
  641. move v0, dst /* return value */
  642. .L__memcpy:
  643. FEXPORT(__copy_user)
  644. li t6, 0 /* not inatomic */
  645. __copy_user_common:
  646. /* Legacy Mode, user <-> user */
  647. __BUILD_COPY_USER LEGACY_MODE USEROP USEROP
  648. #ifdef CONFIG_EVA
  649. /*
  650. * For EVA we need distinct symbols for reading and writing to user space.
  651. * This is because we need to use specific EVA instructions to perform the
  652. * virtual <-> physical translation when a virtual address is actually in user
  653. * space
  654. */
  655. LEAF(__copy_user_inatomic_eva)
  656. b __copy_from_user_common
  657. li t6, 1
  658. END(__copy_user_inatomic_eva)
  659. /*
  660. * __copy_from_user (EVA)
  661. */
  662. LEAF(__copy_from_user_eva)
  663. li t6, 0 /* not inatomic */
  664. __copy_from_user_common:
  665. __BUILD_COPY_USER EVA_MODE USEROP KERNELOP
  666. END(__copy_from_user_eva)
  667. /*
  668. * __copy_to_user (EVA)
  669. */
  670. LEAF(__copy_to_user_eva)
  671. __BUILD_COPY_USER EVA_MODE KERNELOP USEROP
  672. END(__copy_to_user_eva)
  673. /*
  674. * __copy_in_user (EVA)
  675. */
  676. LEAF(__copy_in_user_eva)
  677. __BUILD_COPY_USER EVA_MODE USEROP USEROP
  678. END(__copy_in_user_eva)
  679. #endif