memcpy-archs.S 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. /*
  2. * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License version 2 as
  6. * published by the Free Software Foundation.
  7. */
  8. #include <linux/linkage.h>
  9. #ifdef __LITTLE_ENDIAN__
  10. # define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; <<
  11. # define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >>
  12. # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM
  13. # define MERGE_2(RX,RY,IMM)
  14. # define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF
  15. # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM
  16. #else
  17. # define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >>
  18. # define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; <<
  19. # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; <<
  20. # define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; <<
  21. # define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM
  22. # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08
  23. #endif
  24. #ifdef CONFIG_ARC_HAS_LL64
  25. # define LOADX(DST,RX) ldd.ab DST, [RX, 8]
  26. # define STOREX(SRC,RX) std.ab SRC, [RX, 8]
  27. # define ZOLSHFT 5
  28. # define ZOLAND 0x1F
  29. #else
  30. # define LOADX(DST,RX) ld.ab DST, [RX, 4]
  31. # define STOREX(SRC,RX) st.ab SRC, [RX, 4]
  32. # define ZOLSHFT 4
  33. # define ZOLAND 0xF
  34. #endif
  35. ENTRY_CFI(memcpy)
  36. mov.f 0, r2
  37. ;;; if size is zero
  38. jz.d [blink]
  39. mov r3, r0 ; don;t clobber ret val
  40. ;;; if size <= 8
  41. cmp r2, 8
  42. bls.d @.Lsmallchunk
  43. mov.f lp_count, r2
  44. and.f r4, r0, 0x03
  45. rsub lp_count, r4, 4
  46. lpnz @.Laligndestination
  47. ;; LOOP BEGIN
  48. ldb.ab r5, [r1,1]
  49. sub r2, r2, 1
  50. stb.ab r5, [r3,1]
  51. .Laligndestination:
  52. ;;; Check the alignment of the source
  53. and.f r4, r1, 0x03
  54. bnz.d @.Lsourceunaligned
  55. ;;; CASE 0: Both source and destination are 32bit aligned
  56. ;;; Convert len to Dwords, unfold x4
  57. lsr.f lp_count, r2, ZOLSHFT
  58. lpnz @.Lcopy32_64bytes
  59. ;; LOOP START
  60. LOADX (r6, r1)
  61. LOADX (r8, r1)
  62. LOADX (r10, r1)
  63. LOADX (r4, r1)
  64. STOREX (r6, r3)
  65. STOREX (r8, r3)
  66. STOREX (r10, r3)
  67. STOREX (r4, r3)
  68. .Lcopy32_64bytes:
  69. and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
  70. .Lsmallchunk:
  71. lpnz @.Lcopyremainingbytes
  72. ;; LOOP START
  73. ldb.ab r5, [r1,1]
  74. stb.ab r5, [r3,1]
  75. .Lcopyremainingbytes:
  76. j [blink]
  77. ;;; END CASE 0
  78. .Lsourceunaligned:
  79. cmp r4, 2
  80. beq.d @.LunalignedOffby2
  81. sub r2, r2, 1
  82. bhi.d @.LunalignedOffby3
  83. ldb.ab r5, [r1, 1]
  84. ;;; CASE 1: The source is unaligned, off by 1
  85. ;; Hence I need to read 1 byte for a 16bit alignment
  86. ;; and 2bytes to reach 32bit alignment
  87. ldh.ab r6, [r1, 2]
  88. sub r2, r2, 2
  89. ;; Convert to words, unfold x2
  90. lsr.f lp_count, r2, 3
  91. MERGE_1 (r6, r6, 8)
  92. MERGE_2 (r5, r5, 24)
  93. or r5, r5, r6
  94. ;; Both src and dst are aligned
  95. lpnz @.Lcopy8bytes_1
  96. ;; LOOP START
  97. ld.ab r6, [r1, 4]
  98. ld.ab r8, [r1,4]
  99. SHIFT_1 (r7, r6, 24)
  100. or r7, r7, r5
  101. SHIFT_2 (r5, r6, 8)
  102. SHIFT_1 (r9, r8, 24)
  103. or r9, r9, r5
  104. SHIFT_2 (r5, r8, 8)
  105. st.ab r7, [r3, 4]
  106. st.ab r9, [r3, 4]
  107. .Lcopy8bytes_1:
  108. ;; Write back the remaining 16bits
  109. EXTRACT_1 (r6, r5, 16)
  110. sth.ab r6, [r3, 2]
  111. ;; Write back the remaining 8bits
  112. EXTRACT_2 (r5, r5, 16)
  113. stb.ab r5, [r3, 1]
  114. and.f lp_count, r2, 0x07 ;Last 8bytes
  115. lpnz @.Lcopybytewise_1
  116. ;; LOOP START
  117. ldb.ab r6, [r1,1]
  118. stb.ab r6, [r3,1]
  119. .Lcopybytewise_1:
  120. j [blink]
  121. .LunalignedOffby2:
  122. ;;; CASE 2: The source is unaligned, off by 2
  123. ldh.ab r5, [r1, 2]
  124. sub r2, r2, 1
  125. ;; Both src and dst are aligned
  126. ;; Convert to words, unfold x2
  127. lsr.f lp_count, r2, 3
  128. #ifdef __BIG_ENDIAN__
  129. asl.nz r5, r5, 16
  130. #endif
  131. lpnz @.Lcopy8bytes_2
  132. ;; LOOP START
  133. ld.ab r6, [r1, 4]
  134. ld.ab r8, [r1,4]
  135. SHIFT_1 (r7, r6, 16)
  136. or r7, r7, r5
  137. SHIFT_2 (r5, r6, 16)
  138. SHIFT_1 (r9, r8, 16)
  139. or r9, r9, r5
  140. SHIFT_2 (r5, r8, 16)
  141. st.ab r7, [r3, 4]
  142. st.ab r9, [r3, 4]
  143. .Lcopy8bytes_2:
  144. #ifdef __BIG_ENDIAN__
  145. lsr.nz r5, r5, 16
  146. #endif
  147. sth.ab r5, [r3, 2]
  148. and.f lp_count, r2, 0x07 ;Last 8bytes
  149. lpnz @.Lcopybytewise_2
  150. ;; LOOP START
  151. ldb.ab r6, [r1,1]
  152. stb.ab r6, [r3,1]
  153. .Lcopybytewise_2:
  154. j [blink]
  155. .LunalignedOffby3:
  156. ;;; CASE 3: The source is unaligned, off by 3
  157. ;;; Hence, I need to read 1byte for achieve the 32bit alignment
  158. ;; Both src and dst are aligned
  159. ;; Convert to words, unfold x2
  160. lsr.f lp_count, r2, 3
  161. #ifdef __BIG_ENDIAN__
  162. asl.ne r5, r5, 24
  163. #endif
  164. lpnz @.Lcopy8bytes_3
  165. ;; LOOP START
  166. ld.ab r6, [r1, 4]
  167. ld.ab r8, [r1,4]
  168. SHIFT_1 (r7, r6, 8)
  169. or r7, r7, r5
  170. SHIFT_2 (r5, r6, 24)
  171. SHIFT_1 (r9, r8, 8)
  172. or r9, r9, r5
  173. SHIFT_2 (r5, r8, 24)
  174. st.ab r7, [r3, 4]
  175. st.ab r9, [r3, 4]
  176. .Lcopy8bytes_3:
  177. #ifdef __BIG_ENDIAN__
  178. lsr.nz r5, r5, 24
  179. #endif
  180. stb.ab r5, [r3, 1]
  181. and.f lp_count, r2, 0x07 ;Last 8bytes
  182. lpnz @.Lcopybytewise_3
  183. ;; LOOP START
  184. ldb.ab r6, [r1,1]
  185. stb.ab r6, [r3,1]
  186. .Lcopybytewise_3:
  187. j [blink]
  188. END_CFI(memcpy)