memcpy.c 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. /*
  2. * linux/arch/alpha/lib/memcpy.c
  3. *
  4. * Copyright (C) 1995 Linus Torvalds
  5. */
  6. /*
  7. * This is a reasonably optimized memcpy() routine.
  8. */
  9. /*
  10. * Note that the C code is written to be optimized into good assembly. However,
  11. * at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a
  12. * explicit compare against 0 (instead of just using the proper "blt reg, xx" or
  13. * "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually..
  14. */
  15. #include <linux/types.h>
  16. #include <linux/export.h>
  17. /*
  18. * This should be done in one go with ldq_u*2/mask/stq_u. Do it
  19. * with a macro so that we can fix it up later..
  20. */
  21. #define ALIGN_DEST_TO8_UP(d,s,n) \
  22. while (d & 7) { \
  23. if (n <= 0) return; \
  24. n--; \
  25. *(char *) d = *(char *) s; \
  26. d++; s++; \
  27. }
  28. #define ALIGN_DEST_TO8_DN(d,s,n) \
  29. while (d & 7) { \
  30. if (n <= 0) return; \
  31. n--; \
  32. d--; s--; \
  33. *(char *) d = *(char *) s; \
  34. }
  35. /*
  36. * This should similarly be done with ldq_u*2/mask/stq. The destination
  37. * is aligned, but we don't fill in a full quad-word
  38. */
  39. #define DO_REST_UP(d,s,n) \
  40. while (n > 0) { \
  41. n--; \
  42. *(char *) d = *(char *) s; \
  43. d++; s++; \
  44. }
  45. #define DO_REST_DN(d,s,n) \
  46. while (n > 0) { \
  47. n--; \
  48. d--; s--; \
  49. *(char *) d = *(char *) s; \
  50. }
  51. /*
  52. * This should be done with ldq/mask/stq. The source and destination are
  53. * aligned, but we don't fill in a full quad-word
  54. */
  55. #define DO_REST_ALIGNED_UP(d,s,n) DO_REST_UP(d,s,n)
  56. #define DO_REST_ALIGNED_DN(d,s,n) DO_REST_DN(d,s,n)
  57. /*
  58. * This does unaligned memory copies. We want to avoid storing to
  59. * an unaligned address, as that would do a read-modify-write cycle.
  60. * We also want to avoid double-reading the unaligned reads.
  61. *
  62. * Note the ordering to try to avoid load (and address generation) latencies.
  63. */
  64. static inline void __memcpy_unaligned_up (unsigned long d, unsigned long s,
  65. long n)
  66. {
  67. ALIGN_DEST_TO8_UP(d,s,n);
  68. n -= 8; /* to avoid compare against 8 in the loop */
  69. if (n >= 0) {
  70. unsigned long low_word, high_word;
  71. __asm__("ldq_u %0,%1":"=r" (low_word):"m" (*(unsigned long *) s));
  72. do {
  73. unsigned long tmp;
  74. __asm__("ldq_u %0,%1":"=r" (high_word):"m" (*(unsigned long *)(s+8)));
  75. n -= 8;
  76. __asm__("extql %1,%2,%0"
  77. :"=r" (low_word)
  78. :"r" (low_word), "r" (s));
  79. __asm__("extqh %1,%2,%0"
  80. :"=r" (tmp)
  81. :"r" (high_word), "r" (s));
  82. s += 8;
  83. *(unsigned long *) d = low_word | tmp;
  84. d += 8;
  85. low_word = high_word;
  86. } while (n >= 0);
  87. }
  88. n += 8;
  89. DO_REST_UP(d,s,n);
  90. }
  91. static inline void __memcpy_unaligned_dn (unsigned long d, unsigned long s,
  92. long n)
  93. {
  94. /* I don't understand AXP assembler well enough for this. -Tim */
  95. s += n;
  96. d += n;
  97. while (n--)
  98. * (char *) --d = * (char *) --s;
  99. }
  100. /*
  101. * Hmm.. Strange. The __asm__ here is there to make gcc use an integer register
  102. * for the load-store. I don't know why, but it would seem that using a floating
  103. * point register for the move seems to slow things down (very small difference,
  104. * though).
  105. *
  106. * Note the ordering to try to avoid load (and address generation) latencies.
  107. */
  108. static inline void __memcpy_aligned_up (unsigned long d, unsigned long s,
  109. long n)
  110. {
  111. ALIGN_DEST_TO8_UP(d,s,n);
  112. n -= 8;
  113. while (n >= 0) {
  114. unsigned long tmp;
  115. __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
  116. n -= 8;
  117. s += 8;
  118. *(unsigned long *) d = tmp;
  119. d += 8;
  120. }
  121. n += 8;
  122. DO_REST_ALIGNED_UP(d,s,n);
  123. }
  124. static inline void __memcpy_aligned_dn (unsigned long d, unsigned long s,
  125. long n)
  126. {
  127. s += n;
  128. d += n;
  129. ALIGN_DEST_TO8_DN(d,s,n);
  130. n -= 8;
  131. while (n >= 0) {
  132. unsigned long tmp;
  133. s -= 8;
  134. __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
  135. n -= 8;
  136. d -= 8;
  137. *(unsigned long *) d = tmp;
  138. }
  139. n += 8;
  140. DO_REST_ALIGNED_DN(d,s,n);
  141. }
  142. void * memcpy(void * dest, const void *src, size_t n)
  143. {
  144. if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) {
  145. __memcpy_aligned_up ((unsigned long) dest, (unsigned long) src,
  146. n);
  147. return dest;
  148. }
  149. __memcpy_unaligned_up ((unsigned long) dest, (unsigned long) src, n);
  150. return dest;
  151. }
  152. EXPORT_SYMBOL(memcpy);