memmove.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. /* $OpenBSD: memmove.S,v 1.6 2014/11/09 16:41:26 miod Exp $ */
  2. /*
  3. * Copyright (c) 1996
  4. * The President and Fellows of Harvard College. All rights reserved.
  5. * Copyright (c) 1992, 1993
  6. * The Regents of the University of California. All rights reserved.
  7. *
  8. * This software was developed by the Computer Systems Engineering group
  9. * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
  10. * contributed to Berkeley.
  11. *
  12. * All advertising materials mentioning features or use of this software
  13. * must display the following acknowledgement:
  14. * This product includes software developed by the University of
  15. * California, Lawrence Berkeley Laboratory.
  16. * This product includes software developed by Harvard University.
  17. *
  18. * Redistribution and use in source and binary forms, with or without
  19. * modification, are permitted provided that the following conditions
  20. * are met:
  21. * 1. Redistributions of source code must retain the above copyright
  22. * notice, this list of conditions and the following disclaimer.
  23. * 2. Redistributions in binary form must reproduce the above copyright
  24. * notice, this list of conditions and the following disclaimer in the
  25. * documentation and/or other materials provided with the distribution.
  26. * 3. All advertising materials mentioning features or use of this software
  27. * must display the following acknowledgement:
  28. * This product includes software developed by the University of
  29. * California, Berkeley and its contributors.
  30. * This product includes software developed by Harvard University.
  31. * This product includes software developed by Paul Kranenburg.
  32. * 4. Neither the name of the University nor the names of its contributors
  33. * may be used to endorse or promote products derived from this software
  34. * without specific prior written permission.
  35. *
  36. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  37. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  38. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  39. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  40. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  41. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  42. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  43. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  44. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  45. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  46. * SUCH DAMAGE.
  47. */
  48. #include <machine/param.h>
  49. #include <machine/asm.h>
  50. /*
  51. * GNU assembler does not understand `.empty' directive; Sun assembler
  52. * gripes about labels without it. To allow cross-compilation using
  53. * the Sun assembler, and because .empty directives are useful documentation,
  54. * we use this trick.
  55. */
  56. #ifdef SUN_AS
  57. #define EMPTY .empty
  58. #else
  59. #define EMPTY /* .empty */
  60. #endif
  61. /* use as needed to align things on longword boundaries */
  62. #define _ALIGN .align 4
  63. #define BCOPY_SMALL 32 /* if < 32, copy by bytes */
  64. /*
  65. * memcpy(dst, src, len). Assumes regions do not overlap; returns dst.
  66. */
  67. ENTRY(memcpy)
  68. /*
  69. * Swap args, because we may end up in bcopy.
  70. */
  71. mov %o0, %o5 ! save return value
  72. mov %o1, %o0
  73. mov %o5, %o1
  74. Lbcopy_old:
  75. cmp %o2, BCOPY_SMALL
  76. Lbcopy_start:
  77. bge,a Lbcopy_fancy ! if >= this many, go be fancy.
  78. btst 7, %o0 ! (part of being fancy)
  79. /*
  80. * Not much to copy, just do it a byte at a time.
  81. */
  82. deccc %o2 ! while (--len >= 0)
  83. bl 1f
  84. EMPTY
  85. 0:
  86. inc %o0
  87. ldsb [%o0 - 1], %o4 ! (++dst)[-1] = *src++;
  88. stb %o4, [%o1]
  89. deccc %o2
  90. bge 0b
  91. inc %o1
  92. 1:
  93. retl
  94. mov %o5, %o0 ! return (dst)
  95. /* NOTREACHED */
  96. /*
  97. * Plenty of data to copy, so try to do it optimally.
  98. */
  99. Lbcopy_fancy:
  100. ! check for common case first: everything lines up.
  101. ! btst 7, %o0 ! done already
  102. bne 1f
  103. EMPTY
  104. btst 7, %o1
  105. be,a Lbcopy_doubles
  106. dec 8, %o2 ! if all lined up, len -= 8, goto bcopy_doubles
  107. ! If the low bits match, we can make these line up.
  108. 1:
  109. xor %o0, %o1, %o3 ! t = src ^ dst;
  110. btst 1, %o3 ! if (t & 1) {
  111. be,a 1f
  112. btst 1, %o0 ! [delay slot: if (src & 1)]
  113. ! low bits do not match, must copy by bytes.
  114. 0:
  115. ldsb [%o0], %o4 ! do {
  116. inc %o0 ! (++dst)[-1] = *src++;
  117. inc %o1
  118. deccc %o2
  119. bnz 0b ! } while (--len != 0);
  120. stb %o4, [%o1 - 1]
  121. retl
  122. mov %o5, %o0 ! return (dst)
  123. /* NOTREACHED */
  124. ! lowest bit matches, so we can copy by words, if nothing else
  125. 1:
  126. be,a 1f ! if (src & 1) {
  127. btst 2, %o3 ! [delay slot: if (t & 2)]
  128. ! although low bits match, both are 1: must copy 1 byte to align
  129. ldsb [%o0], %o4 ! *dst++ = *src++;
  130. stb %o4, [%o1]
  131. inc %o0
  132. inc %o1
  133. dec %o2 ! len--;
  134. btst 2, %o3 ! } [if (t & 2)]
  135. 1:
  136. be,a 1f ! if (t & 2) {
  137. btst 2, %o0 ! [delay slot: if (src & 2)]
  138. dec 2, %o2 ! len -= 2;
  139. 0:
  140. ldsh [%o0], %o4 ! do {
  141. sth %o4, [%o1] ! *(short *)dst = *(short *)src;
  142. inc 2, %o0 ! dst += 2, src += 2;
  143. deccc 2, %o2 ! } while ((len -= 2) >= 0);
  144. bge 0b
  145. inc 2, %o1
  146. b Lbcopy_mopb ! goto mop_up_byte;
  147. btst 1, %o2 ! } [delay slot: if (len & 1)]
  148. /* NOTREACHED */
  149. ! low two bits match, so we can copy by longwords
  150. 1:
  151. be,a 1f ! if (src & 2) {
  152. btst 4, %o3 ! [delay slot: if (t & 4)]
  153. ! although low 2 bits match, they are 10: must copy one short to align
  154. ldsh [%o0], %o4 ! (*short *)dst = *(short *)src;
  155. sth %o4, [%o1]
  156. inc 2, %o0 ! dst += 2;
  157. inc 2, %o1 ! src += 2;
  158. dec 2, %o2 ! len -= 2;
  159. btst 4, %o3 ! } [if (t & 4)]
  160. 1:
  161. be,a 1f ! if (t & 4) {
  162. btst 4, %o0 ! [delay slot: if (src & 4)]
  163. dec 4, %o2 ! len -= 4;
  164. 0:
  165. ld [%o0], %o4 ! do {
  166. st %o4, [%o1] ! *(int *)dst = *(int *)src;
  167. inc 4, %o0 ! dst += 4, src += 4;
  168. deccc 4, %o2 ! } while ((len -= 4) >= 0);
  169. bge 0b
  170. inc 4, %o1
  171. b Lbcopy_mopw ! goto mop_up_word_and_byte;
  172. btst 2, %o2 ! } [delay slot: if (len & 2)]
  173. /* NOTREACHED */
  174. ! low three bits match, so we can copy by doublewords
  175. 1:
  176. be 1f ! if (src & 4) {
  177. dec 8, %o2 ! [delay slot: len -= 8]
  178. ld [%o0], %o4 ! *(int *)dst = *(int *)src;
  179. st %o4, [%o1]
  180. inc 4, %o0 ! dst += 4, src += 4, len -= 4;
  181. inc 4, %o1
  182. dec 4, %o2 ! }
  183. 1:
  184. Lbcopy_doubles:
  185. mov %o5, %o3 ! save return value
  186. 1:
  187. ldd [%o0], %o4 ! do {
  188. std %o4, [%o1] ! *(double *)dst = *(double *)src;
  189. inc 8, %o0 ! dst += 8, src += 8;
  190. deccc 8, %o2 ! } while ((len -= 8) >= 0);
  191. bge 1b
  192. inc 8, %o1
  193. ! check for a usual case again (save work)
  194. btst 7, %o2 ! if ((len & 7) == 0)
  195. be Lbcopy_done ! goto bcopy_done;
  196. mov %o3, %o5 ! [delay slot: restore return value]
  197. btst 4, %o2 ! if ((len & 4) == 0)
  198. be,a Lbcopy_mopw ! goto mop_up_word_and_byte;
  199. btst 2, %o2 ! [delay slot: if (len & 2)]
  200. ld [%o0], %o4 ! *(int *)dst = *(int *)src;
  201. st %o4, [%o1]
  202. inc 4, %o0 ! dst += 4;
  203. inc 4, %o1 ! src += 4;
  204. btst 2, %o2 ! } [if (len & 2)]
  205. 1:
  206. ! mop up trailing word (if present) and byte (if present).
  207. Lbcopy_mopw:
  208. be Lbcopy_mopb ! no word, go mop up byte
  209. btst 1, %o2 ! [delay slot: if (len & 1)]
  210. ldsh [%o0], %o4 ! *(short *)dst = *(short *)src;
  211. be Lbcopy_done ! if ((len & 1) == 0) goto done;
  212. sth %o4, [%o1]
  213. ldsb [%o0 + 2], %o4 ! dst[2] = src[2];
  214. stb %o4, [%o1 + 2]
  215. retl
  216. mov %o5, %o0 ! return (dst)
  217. /* NOTREACHED */
  218. ! mop up trailing byte (if present).
  219. Lbcopy_mopb:
  220. bne,a 1f
  221. ldsb [%o0], %o4
  222. Lbcopy_done:
  223. retl
  224. mov %o5, %o0 ! return (dst)
  225. 1:
  226. stb %o4,[%o1]
  227. retl
  228. mov %o5, %o0 ! return (dst)
  229. /*
  230. * memmove(dst, src, len). Handles overlap; returns dst.
  231. */
  232. ENTRY(memmove)
  233. /*
  234. * Swap args and continue to bcopy.
  235. */
  236. mov %o0, %o5 ! save dst
  237. mov %o1, %o0
  238. mov %o5, %o1
  239. /*
  240. * bcopy(src, dst, len): regions may overlap.
  241. */
  242. ENTRY(bcopy)
  243. cmp %o0, %o1 ! src < dst?
  244. bgeu Lbcopy_start ! no, go copy forwards as via old bcopy
  245. cmp %o2, BCOPY_SMALL! (check length for doublecopy first)
  246. /*
  247. * Since src comes before dst, and the regions might overlap,
  248. * we have to do the copy starting at the end and working backwards.
  249. */
  250. add %o2, %o0, %o0 ! src += len
  251. add %o2, %o1, %o1 ! dst += len
  252. bge,a Lback_fancy ! if len >= BCOPY_SMALL, go be fancy
  253. btst 3, %o0
  254. /*
  255. * Not much to copy, just do it a byte at a time.
  256. */
  257. deccc %o2 ! while (--len >= 0)
  258. bl 1f
  259. EMPTY
  260. 0:
  261. dec %o0 ! *--dst = *--src;
  262. ldsb [%o0], %o4
  263. dec %o1
  264. deccc %o2
  265. bge 0b
  266. stb %o4, [%o1]
  267. 1:
  268. retl
  269. mov %o5, %o0 ! return (dst)
  270. /*
  271. * Plenty to copy, try to be optimal.
  272. * We only bother with word/halfword/byte copies here.
  273. */
  274. Lback_fancy:
  275. ! btst 3, %o0 ! done already
  276. bnz 1f ! if ((src & 3) == 0 &&
  277. btst 3, %o1 ! (dst & 3) == 0)
  278. bz,a Lback_words ! goto words;
  279. dec 4, %o2 ! (done early for word copy)
  280. 1:
  281. /*
  282. * See if the low bits match.
  283. */
  284. xor %o0, %o1, %o3 ! t = src ^ dst;
  285. btst 1, %o3
  286. bz,a 3f ! if (t & 1) == 0, can do better
  287. btst 1, %o0
  288. /*
  289. * Nope; gotta do byte copy.
  290. */
  291. 2:
  292. dec %o0 ! do {
  293. ldsb [%o0], %o4 ! *--dst = *--src;
  294. dec %o1
  295. deccc %o2 ! } while (--len != 0);
  296. bnz 2b
  297. stb %o4, [%o1]
  298. retl
  299. mov %o5, %o0 ! return (dst)
  300. 3:
  301. /*
  302. * Can do halfword or word copy, but might have to copy 1 byte first.
  303. */
  304. ! btst 1, %o0 ! done earlier
  305. bz,a 4f ! if (src & 1) { /* copy 1 byte */
  306. btst 2, %o3 ! (done early)
  307. dec %o0 ! *--dst = *--src;
  308. ldsb [%o0], %o4
  309. dec %o1
  310. stb %o4, [%o1]
  311. dec %o2 ! len--;
  312. btst 2, %o3 ! }
  313. 4:
  314. /*
  315. * See if we can do a word copy ((t&2) == 0).
  316. */
  317. ! btst 2, %o3 ! done earlier
  318. bz,a 6f ! if (t & 2) == 0, can do word copy
  319. btst 2, %o0 ! (src&2, done early)
  320. /*
  321. * Gotta do halfword copy.
  322. */
  323. dec 2, %o2 ! len -= 2;
  324. 5:
  325. dec 2, %o0 ! do {
  326. ldsh [%o0], %o4 ! src -= 2;
  327. dec 2, %o1 ! dst -= 2;
  328. deccc 2, %o2 ! *(short *)dst = *(short *)src;
  329. bge 5b ! } while ((len -= 2) >= 0);
  330. sth %o4, [%o1]
  331. b Lback_mopb ! goto mop_up_byte;
  332. btst 1, %o2 ! (len&1, done early)
  333. 6:
  334. /*
  335. * We can do word copies, but we might have to copy
  336. * one halfword first.
  337. */
  338. ! btst 2, %o0 ! done already
  339. bz 7f ! if (src & 2) {
  340. dec 4, %o2 ! (len -= 4, done early)
  341. dec 2, %o0 ! src -= 2, dst -= 2;
  342. ldsh [%o0], %o4 ! *(short *)dst = *(short *)src;
  343. dec 2, %o1
  344. sth %o4, [%o1]
  345. dec 2, %o2 ! len -= 2;
  346. ! }
  347. 7:
  348. Lback_words:
  349. /*
  350. * Do word copies (backwards), then mop up trailing halfword
  351. * and byte if any.
  352. */
  353. ! dec 4, %o2 ! len -= 4, done already
  354. 0: ! do {
  355. dec 4, %o0 ! src -= 4;
  356. dec 4, %o1 ! src -= 4;
  357. ld [%o0], %o4 ! *(int *)dst = *(int *)src;
  358. deccc 4, %o2 ! } while ((len -= 4) >= 0);
  359. bge 0b
  360. st %o4, [%o1]
  361. /*
  362. * Check for trailing shortword.
  363. */
  364. btst 2, %o2 ! if (len & 2) {
  365. bz,a 1f
  366. btst 1, %o2 ! (len&1, done early)
  367. dec 2, %o0 ! src -= 2, dst -= 2;
  368. ldsh [%o0], %o4 ! *(short *)dst = *(short *)src;
  369. dec 2, %o1
  370. sth %o4, [%o1] ! }
  371. btst 1, %o2
  372. /*
  373. * Check for trailing byte.
  374. */
  375. 1:
  376. Lback_mopb:
  377. ! btst 1, %o2 ! (done already)
  378. bnz,a 1f ! if (len & 1) {
  379. ldsb [%o0 - 1], %o4 ! b = src[-1];
  380. retl
  381. mov %o5, %o0 ! return (dst)
  382. 1:
  383. stb %o4, [%o1 - 1] ! }
  384. retl ! dst[-1] = b;
  385. mov %o5, %o0 ! return (dst)