memcpy_32.c 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/string.h>
  3. #include <linux/export.h>
  4. #undef memcpy
  5. #undef memset
  6. __visible void *memcpy(void *to, const void *from, size_t n)
  7. {
  8. #if defined(CONFIG_X86_USE_3DNOW) && !defined(CONFIG_FORTIFY_SOURCE)
  9. return __memcpy3d(to, from, n);
  10. #else
  11. return __memcpy(to, from, n);
  12. #endif
  13. }
  14. EXPORT_SYMBOL(memcpy);
  15. __visible void *memset(void *s, int c, size_t count)
  16. {
  17. return __memset(s, c, count);
  18. }
  19. EXPORT_SYMBOL(memset);
  20. __visible void *memmove(void *dest, const void *src, size_t n)
  21. {
  22. int d0,d1,d2,d3,d4,d5;
  23. char *ret = dest;
  24. __asm__ __volatile__(
  25. /* Handle more 16 bytes in loop */
  26. "cmp $0x10, %0\n\t"
  27. "jb 1f\n\t"
  28. /* Decide forward/backward copy mode */
  29. "cmp %2, %1\n\t"
  30. "jb 2f\n\t"
  31. /*
  32. * movs instruction have many startup latency
  33. * so we handle small size by general register.
  34. */
  35. "cmp $680, %0\n\t"
  36. "jb 3f\n\t"
  37. /*
  38. * movs instruction is only good for aligned case.
  39. */
  40. "mov %1, %3\n\t"
  41. "xor %2, %3\n\t"
  42. "and $0xff, %3\n\t"
  43. "jz 4f\n\t"
  44. "3:\n\t"
  45. "sub $0x10, %0\n\t"
  46. /*
  47. * We gobble 16 bytes forward in each loop.
  48. */
  49. "3:\n\t"
  50. "sub $0x10, %0\n\t"
  51. "mov 0*4(%1), %3\n\t"
  52. "mov 1*4(%1), %4\n\t"
  53. "mov %3, 0*4(%2)\n\t"
  54. "mov %4, 1*4(%2)\n\t"
  55. "mov 2*4(%1), %3\n\t"
  56. "mov 3*4(%1), %4\n\t"
  57. "mov %3, 2*4(%2)\n\t"
  58. "mov %4, 3*4(%2)\n\t"
  59. "lea 0x10(%1), %1\n\t"
  60. "lea 0x10(%2), %2\n\t"
  61. "jae 3b\n\t"
  62. "add $0x10, %0\n\t"
  63. "jmp 1f\n\t"
  64. /*
  65. * Handle data forward by movs.
  66. */
  67. ".p2align 4\n\t"
  68. "4:\n\t"
  69. "mov -4(%1, %0), %3\n\t"
  70. "lea -4(%2, %0), %4\n\t"
  71. "shr $2, %0\n\t"
  72. "rep movsl\n\t"
  73. "mov %3, (%4)\n\t"
  74. "jmp 11f\n\t"
  75. /*
  76. * Handle data backward by movs.
  77. */
  78. ".p2align 4\n\t"
  79. "6:\n\t"
  80. "mov (%1), %3\n\t"
  81. "mov %2, %4\n\t"
  82. "lea -4(%1, %0), %1\n\t"
  83. "lea -4(%2, %0), %2\n\t"
  84. "shr $2, %0\n\t"
  85. "std\n\t"
  86. "rep movsl\n\t"
  87. "mov %3,(%4)\n\t"
  88. "cld\n\t"
  89. "jmp 11f\n\t"
  90. /*
  91. * Start to prepare for backward copy.
  92. */
  93. ".p2align 4\n\t"
  94. "2:\n\t"
  95. "cmp $680, %0\n\t"
  96. "jb 5f\n\t"
  97. "mov %1, %3\n\t"
  98. "xor %2, %3\n\t"
  99. "and $0xff, %3\n\t"
  100. "jz 6b\n\t"
  101. /*
  102. * Calculate copy position to tail.
  103. */
  104. "5:\n\t"
  105. "add %0, %1\n\t"
  106. "add %0, %2\n\t"
  107. "sub $0x10, %0\n\t"
  108. /*
  109. * We gobble 16 bytes backward in each loop.
  110. */
  111. "7:\n\t"
  112. "sub $0x10, %0\n\t"
  113. "mov -1*4(%1), %3\n\t"
  114. "mov -2*4(%1), %4\n\t"
  115. "mov %3, -1*4(%2)\n\t"
  116. "mov %4, -2*4(%2)\n\t"
  117. "mov -3*4(%1), %3\n\t"
  118. "mov -4*4(%1), %4\n\t"
  119. "mov %3, -3*4(%2)\n\t"
  120. "mov %4, -4*4(%2)\n\t"
  121. "lea -0x10(%1), %1\n\t"
  122. "lea -0x10(%2), %2\n\t"
  123. "jae 7b\n\t"
  124. /*
  125. * Calculate copy position to head.
  126. */
  127. "add $0x10, %0\n\t"
  128. "sub %0, %1\n\t"
  129. "sub %0, %2\n\t"
  130. /*
  131. * Move data from 8 bytes to 15 bytes.
  132. */
  133. ".p2align 4\n\t"
  134. "1:\n\t"
  135. "cmp $8, %0\n\t"
  136. "jb 8f\n\t"
  137. "mov 0*4(%1), %3\n\t"
  138. "mov 1*4(%1), %4\n\t"
  139. "mov -2*4(%1, %0), %5\n\t"
  140. "mov -1*4(%1, %0), %1\n\t"
  141. "mov %3, 0*4(%2)\n\t"
  142. "mov %4, 1*4(%2)\n\t"
  143. "mov %5, -2*4(%2, %0)\n\t"
  144. "mov %1, -1*4(%2, %0)\n\t"
  145. "jmp 11f\n\t"
  146. /*
  147. * Move data from 4 bytes to 7 bytes.
  148. */
  149. ".p2align 4\n\t"
  150. "8:\n\t"
  151. "cmp $4, %0\n\t"
  152. "jb 9f\n\t"
  153. "mov 0*4(%1), %3\n\t"
  154. "mov -1*4(%1, %0), %4\n\t"
  155. "mov %3, 0*4(%2)\n\t"
  156. "mov %4, -1*4(%2, %0)\n\t"
  157. "jmp 11f\n\t"
  158. /*
  159. * Move data from 2 bytes to 3 bytes.
  160. */
  161. ".p2align 4\n\t"
  162. "9:\n\t"
  163. "cmp $2, %0\n\t"
  164. "jb 10f\n\t"
  165. "movw 0*2(%1), %%dx\n\t"
  166. "movw -1*2(%1, %0), %%bx\n\t"
  167. "movw %%dx, 0*2(%2)\n\t"
  168. "movw %%bx, -1*2(%2, %0)\n\t"
  169. "jmp 11f\n\t"
  170. /*
  171. * Move data for 1 byte.
  172. */
  173. ".p2align 4\n\t"
  174. "10:\n\t"
  175. "cmp $1, %0\n\t"
  176. "jb 11f\n\t"
  177. "movb (%1), %%cl\n\t"
  178. "movb %%cl, (%2)\n\t"
  179. ".p2align 4\n\t"
  180. "11:"
  181. : "=&c" (d0), "=&S" (d1), "=&D" (d2),
  182. "=r" (d3),"=r" (d4), "=r"(d5)
  183. :"0" (n),
  184. "1" (src),
  185. "2" (dest)
  186. :"memory");
  187. return ret;
  188. }
  189. EXPORT_SYMBOL(memmove);