memmove.S 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. /*
  2. * arch/alpha/lib/memmove.S
  3. *
  4. * Barely optimized memmove routine for Alpha EV5.
  5. *
  6. * This is hand-massaged output from the original memcpy.c. We defer to
  7. * memcpy whenever possible; the backwards copy loops are not unrolled.
  8. */
  9. #include <asm/export.h>
  10. .set noat
  11. .set noreorder
  12. .text
  13. .align 4
  14. .globl memmove
  15. .ent memmove
  16. memmove:
  17. ldgp $29, 0($27)
  18. unop
  19. nop
  20. .prologue 1
  21. addq $16,$18,$4
  22. addq $17,$18,$5
  23. cmpule $4,$17,$1 /* dest + n <= src */
  24. cmpule $5,$16,$2 /* dest >= src + n */
  25. bis $1,$2,$1
  26. mov $16,$0
  27. xor $16,$17,$2
  28. bne $1,memcpy !samegp
  29. and $2,7,$2 /* Test for src/dest co-alignment. */
  30. and $16,7,$1
  31. cmpule $16,$17,$3
  32. bne $3,$memmove_up /* dest < src */
  33. and $4,7,$1
  34. bne $2,$misaligned_dn
  35. unop
  36. beq $1,$skip_aligned_byte_loop_head_dn
  37. $aligned_byte_loop_head_dn:
  38. lda $4,-1($4)
  39. lda $5,-1($5)
  40. unop
  41. ble $18,$egress
  42. ldq_u $3,0($5)
  43. ldq_u $2,0($4)
  44. lda $18,-1($18)
  45. extbl $3,$5,$1
  46. insbl $1,$4,$1
  47. mskbl $2,$4,$2
  48. bis $1,$2,$1
  49. and $4,7,$6
  50. stq_u $1,0($4)
  51. bne $6,$aligned_byte_loop_head_dn
  52. $skip_aligned_byte_loop_head_dn:
  53. lda $18,-8($18)
  54. blt $18,$skip_aligned_word_loop_dn
  55. $aligned_word_loop_dn:
  56. ldq $1,-8($5)
  57. nop
  58. lda $5,-8($5)
  59. lda $18,-8($18)
  60. stq $1,-8($4)
  61. nop
  62. lda $4,-8($4)
  63. bge $18,$aligned_word_loop_dn
  64. $skip_aligned_word_loop_dn:
  65. lda $18,8($18)
  66. bgt $18,$byte_loop_tail_dn
  67. unop
  68. ret $31,($26),1
  69. .align 4
  70. $misaligned_dn:
  71. nop
  72. fnop
  73. unop
  74. beq $18,$egress
  75. $byte_loop_tail_dn:
  76. ldq_u $3,-1($5)
  77. ldq_u $2,-1($4)
  78. lda $5,-1($5)
  79. lda $4,-1($4)
  80. lda $18,-1($18)
  81. extbl $3,$5,$1
  82. insbl $1,$4,$1
  83. mskbl $2,$4,$2
  84. bis $1,$2,$1
  85. stq_u $1,0($4)
  86. bgt $18,$byte_loop_tail_dn
  87. br $egress
  88. $memmove_up:
  89. mov $16,$4
  90. mov $17,$5
  91. bne $2,$misaligned_up
  92. beq $1,$skip_aligned_byte_loop_head_up
  93. $aligned_byte_loop_head_up:
  94. unop
  95. ble $18,$egress
  96. ldq_u $3,0($5)
  97. ldq_u $2,0($4)
  98. lda $18,-1($18)
  99. extbl $3,$5,$1
  100. insbl $1,$4,$1
  101. mskbl $2,$4,$2
  102. bis $1,$2,$1
  103. lda $5,1($5)
  104. stq_u $1,0($4)
  105. lda $4,1($4)
  106. and $4,7,$6
  107. bne $6,$aligned_byte_loop_head_up
  108. $skip_aligned_byte_loop_head_up:
  109. lda $18,-8($18)
  110. blt $18,$skip_aligned_word_loop_up
  111. $aligned_word_loop_up:
  112. ldq $1,0($5)
  113. nop
  114. lda $5,8($5)
  115. lda $18,-8($18)
  116. stq $1,0($4)
  117. nop
  118. lda $4,8($4)
  119. bge $18,$aligned_word_loop_up
  120. $skip_aligned_word_loop_up:
  121. lda $18,8($18)
  122. bgt $18,$byte_loop_tail_up
  123. unop
  124. ret $31,($26),1
  125. .align 4
  126. $misaligned_up:
  127. nop
  128. fnop
  129. unop
  130. beq $18,$egress
  131. $byte_loop_tail_up:
  132. ldq_u $3,0($5)
  133. ldq_u $2,0($4)
  134. lda $18,-1($18)
  135. extbl $3,$5,$1
  136. insbl $1,$4,$1
  137. mskbl $2,$4,$2
  138. bis $1,$2,$1
  139. stq_u $1,0($4)
  140. lda $5,1($5)
  141. lda $4,1($4)
  142. nop
  143. bgt $18,$byte_loop_tail_up
  144. $egress:
  145. ret $31,($26),1
  146. nop
  147. nop
  148. nop
  149. .end memmove
  150. EXPORT_SYMBOL(memmove)