memset.S 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. /* $NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $ */
  2. /*-
  3. * Copyright (c) 2002 SHIMIZU Ryo. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. * 3. The name of the author may not be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  17. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  18. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  19. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  20. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  21. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  25. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include <machine/asm.h>
  28. #define REG_PTR r0
  29. #define REG_TMP1 r1
  30. #ifdef BZERO
  31. # define REG_C r2
  32. # define REG_DST r4
  33. # define REG_LEN r5
  34. #else
  35. # define REG_DST0 r3
  36. # define REG_DST r4
  37. # define REG_C r5
  38. # define REG_LEN r6
  39. #endif
  40. #ifdef BZERO
  41. ENTRY(bzero)
  42. #else
  43. ENTRY(memset)
  44. mov REG_DST,REG_DST0 /* for return value */
  45. #endif
  46. /* small amount to fill ? */
  47. mov #28,REG_TMP1
  48. cmp/hs REG_TMP1,REG_LEN /* if (len >= 28) goto large; */
  49. bt/s large
  50. mov #12,REG_TMP1 /* if (len >= 12) goto small; */
  51. cmp/hs REG_TMP1,REG_LEN
  52. bt/s small
  53. #ifdef BZERO
  54. mov #0,REG_C
  55. #endif
  56. /* very little fill (0 ~ 11 bytes) */
  57. tst REG_LEN,REG_LEN
  58. add REG_DST,REG_LEN
  59. bt/s done
  60. add #1,REG_DST
  61. /* unroll 4 loops */
  62. cmp/eq REG_DST,REG_LEN
  63. 1: mov.b REG_C,@-REG_LEN
  64. bt/s done
  65. cmp/eq REG_DST,REG_LEN
  66. mov.b REG_C,@-REG_LEN
  67. bt/s done
  68. cmp/eq REG_DST,REG_LEN
  69. mov.b REG_C,@-REG_LEN
  70. bt/s done
  71. cmp/eq REG_DST,REG_LEN
  72. mov.b REG_C,@-REG_LEN
  73. bf/s 1b
  74. cmp/eq REG_DST,REG_LEN
  75. done:
  76. #ifdef BZERO
  77. rts
  78. nop
  79. #else
  80. rts
  81. mov REG_DST0,r0
  82. #endif
  83. small:
  84. mov REG_DST,r0
  85. tst #1,r0
  86. bt/s small_aligned
  87. mov REG_DST,REG_TMP1
  88. shll REG_LEN
  89. mova 1f,r0 /* 1f must be 4bytes aligned! */
  90. add #16,REG_TMP1 /* REG_TMP1 = dst+16; */
  91. sub REG_LEN,r0
  92. jmp @r0
  93. mov REG_C,r0
  94. .align 2
  95. mov.b r0,@(15,REG_TMP1)
  96. mov.b r0,@(14,REG_TMP1)
  97. mov.b r0,@(13,REG_TMP1)
  98. mov.b r0,@(12,REG_TMP1)
  99. mov.b r0,@(11,REG_TMP1)
  100. mov.b r0,@(10,REG_TMP1)
  101. mov.b r0,@(9,REG_TMP1)
  102. mov.b r0,@(8,REG_TMP1)
  103. mov.b r0,@(7,REG_TMP1)
  104. mov.b r0,@(6,REG_TMP1)
  105. mov.b r0,@(5,REG_TMP1)
  106. mov.b r0,@(4,REG_TMP1)
  107. mov.b r0,@(3,REG_TMP1)
  108. mov.b r0,@(2,REG_TMP1)
  109. mov.b r0,@(1,REG_TMP1)
  110. mov.b r0,@REG_TMP1
  111. mov.b r0,@(15,REG_DST)
  112. mov.b r0,@(14,REG_DST)
  113. mov.b r0,@(13,REG_DST)
  114. mov.b r0,@(12,REG_DST)
  115. mov.b r0,@(11,REG_DST)
  116. mov.b r0,@(10,REG_DST)
  117. mov.b r0,@(9,REG_DST)
  118. mov.b r0,@(8,REG_DST)
  119. mov.b r0,@(7,REG_DST)
  120. mov.b r0,@(6,REG_DST)
  121. mov.b r0,@(5,REG_DST)
  122. mov.b r0,@(4,REG_DST)
  123. mov.b r0,@(3,REG_DST)
  124. mov.b r0,@(2,REG_DST)
  125. mov.b r0,@(1,REG_DST)
  126. #ifdef BZERO
  127. rts
  128. 1: mov.b r0,@REG_DST
  129. #else
  130. mov.b r0,@REG_DST
  131. 1: rts
  132. mov REG_DST0,r0
  133. #endif
  134. /* 2 bytes aligned small fill */
  135. small_aligned:
  136. #ifndef BZERO
  137. extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */
  138. shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */
  139. or REG_TMP1,REG_C /* REG_C = ????xxxx */
  140. #endif
  141. mov REG_LEN,r0
  142. tst #1,r0 /* len is aligned? */
  143. bt/s 1f
  144. add #-1,r0
  145. mov.b REG_C,@(r0,REG_DST) /* fill last a byte */
  146. mov r0,REG_LEN
  147. 1:
  148. mova 1f,r0 /* 1f must be 4bytes aligned! */
  149. sub REG_LEN,r0
  150. jmp @r0
  151. mov REG_C,r0
  152. .align 2
  153. mov.w r0,@(30,REG_DST)
  154. mov.w r0,@(28,REG_DST)
  155. mov.w r0,@(26,REG_DST)
  156. mov.w r0,@(24,REG_DST)
  157. mov.w r0,@(22,REG_DST)
  158. mov.w r0,@(20,REG_DST)
  159. mov.w r0,@(18,REG_DST)
  160. mov.w r0,@(16,REG_DST)
  161. mov.w r0,@(14,REG_DST)
  162. mov.w r0,@(12,REG_DST)
  163. mov.w r0,@(10,REG_DST)
  164. mov.w r0,@(8,REG_DST)
  165. mov.w r0,@(6,REG_DST)
  166. mov.w r0,@(4,REG_DST)
  167. mov.w r0,@(2,REG_DST)
  168. #ifdef BZERO
  169. rts
  170. 1: mov.w r0,@REG_DST
  171. #else
  172. mov.w r0,@REG_DST
  173. 1: rts
  174. mov REG_DST0,r0
  175. #endif
  176. .align 2
  177. large:
  178. #ifdef BZERO
  179. mov #0,REG_C
  180. #else
  181. extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */
  182. shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */
  183. or REG_C,REG_TMP1 /* REG_C = ????xx00, REG_TMP1 = ????xxxx */
  184. swap.w REG_TMP1,REG_C /* REG_C = xxxx????, REG_TMP1 = ????xxxx */
  185. xtrct REG_TMP1,REG_C /* REG_C = xxxxxxxx */
  186. #endif
  187. mov #3,REG_TMP1
  188. tst REG_TMP1,REG_DST
  189. mov REG_DST,REG_PTR
  190. bf/s unaligned_dst
  191. add REG_LEN,REG_PTR /* REG_PTR = dst + len; */
  192. tst REG_TMP1,REG_LEN
  193. bf/s unaligned_len
  194. aligned:
  195. /* fill 32*n bytes */
  196. mov #32,REG_TMP1
  197. cmp/hi REG_LEN,REG_TMP1
  198. bt 9f
  199. .align 2
  200. 1: sub REG_TMP1,REG_PTR
  201. mov.l REG_C,@REG_PTR
  202. sub REG_TMP1,REG_LEN
  203. mov.l REG_C,@(4,REG_PTR)
  204. cmp/hi REG_LEN,REG_TMP1
  205. mov.l REG_C,@(8,REG_PTR)
  206. mov.l REG_C,@(12,REG_PTR)
  207. mov.l REG_C,@(16,REG_PTR)
  208. mov.l REG_C,@(20,REG_PTR)
  209. mov.l REG_C,@(24,REG_PTR)
  210. bf/s 1b
  211. mov.l REG_C,@(28,REG_PTR)
  212. 9:
  213. /* fill left 4*n bytes */
  214. cmp/eq REG_DST,REG_PTR
  215. bt 9f
  216. add #4,REG_DST
  217. cmp/eq REG_DST,REG_PTR
  218. 1: mov.l REG_C,@-REG_PTR
  219. bt/s 9f
  220. cmp/eq REG_DST,REG_PTR
  221. mov.l REG_C,@-REG_PTR
  222. bt/s 9f
  223. cmp/eq REG_DST,REG_PTR
  224. mov.l REG_C,@-REG_PTR
  225. bt/s 9f
  226. cmp/eq REG_DST,REG_PTR
  227. mov.l REG_C,@-REG_PTR
  228. bf/s 1b
  229. cmp/eq REG_DST,REG_PTR
  230. 9:
  231. #ifdef BZERO
  232. rts
  233. nop
  234. #else
  235. rts
  236. mov REG_DST0,r0
  237. #endif
  238. unaligned_dst:
  239. mov #1,REG_TMP1
  240. tst REG_TMP1,REG_DST /* if (dst & 1) { */
  241. add #1,REG_TMP1
  242. bt/s 2f
  243. tst REG_TMP1,REG_DST
  244. mov.b REG_C,@REG_DST /* *dst++ = c; */
  245. add #1,REG_DST
  246. tst REG_TMP1,REG_DST
  247. 2: /* } */
  248. /* if (dst & 2) { */
  249. bt 4f
  250. mov.w REG_C,@REG_DST /* *(u_int16_t*)dst++ = c; */
  251. add #2,REG_DST
  252. 4: /* } */
  253. tst #3,REG_PTR /* if (ptr & 3) { */
  254. bt/s 4f /* */
  255. unaligned_len:
  256. tst #1,REG_PTR /* if (ptr & 1) { */
  257. bt/s 2f
  258. tst #2,REG_PTR
  259. mov.b REG_C,@-REG_PTR /* --ptr = c; */
  260. 2: /* } */
  261. /* if (ptr & 2) { */
  262. bt 4f
  263. mov.w REG_C,@-REG_PTR /* *--(u_int16_t*)ptr = c; */
  264. 4: /* } */
  265. /* } */
  266. mov REG_PTR,REG_LEN
  267. bra aligned
  268. sub REG_DST,REG_LEN