123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136 |
- /* memcpy.S: optimised assembly memcpy
- *
- * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
- .text
- .p2align 4
- ###############################################################################
- #
- # void *memcpy(void *to, const char *from, size_t count)
- #
- # - NOTE: must not use any stack. exception detection performs function return
- # to caller's fixup routine, aborting the remainder of the copy
- #
- ###############################################################################
- .globl memcpy,__memcpy_end
- .type memcpy,@function
- memcpy:
- or.p gr8,gr9,gr4
- orcc gr10,gr0,gr0,icc3
- or.p gr10,gr4,gr4
- beqlr icc3,#0
- # optimise based on best common alignment for to, from & count
- andicc.p gr4,#0x0f,gr0,icc0
- setlos #8,gr11
- andicc.p gr4,#0x07,gr0,icc1
- beq icc0,#0,memcpy_16
- andicc.p gr4,#0x03,gr0,icc0
- beq icc1,#0,memcpy_8
- andicc.p gr4,#0x01,gr0,icc1
- beq icc0,#0,memcpy_4
- setlos.p #1,gr11
- beq icc1,#0,memcpy_2
- # do byte by byte copy
- sub.p gr8,gr11,gr3
- sub gr9,gr11,gr9
- 0: ldubu.p @(gr9,gr11),gr4
- subicc gr10,#1,gr10,icc0
- stbu.p gr4,@(gr3,gr11)
- bne icc0,#2,0b
- bralr
- # do halfword by halfword copy
- memcpy_2:
- setlos #2,gr11
- sub.p gr8,gr11,gr3
- sub gr9,gr11,gr9
- 0: lduhu.p @(gr9,gr11),gr4
- subicc gr10,#2,gr10,icc0
- sthu.p gr4,@(gr3,gr11)
- bne icc0,#2,0b
- bralr
- # do word by word copy
- memcpy_4:
- setlos #4,gr11
- sub.p gr8,gr11,gr3
- sub gr9,gr11,gr9
- 0: ldu.p @(gr9,gr11),gr4
- subicc gr10,#4,gr10,icc0
- stu.p gr4,@(gr3,gr11)
- bne icc0,#2,0b
- bralr
- # do double-word by double-word copy
- memcpy_8:
- sub.p gr8,gr11,gr3
- sub gr9,gr11,gr9
- 0: lddu.p @(gr9,gr11),gr4
- subicc gr10,#8,gr10,icc0
- stdu.p gr4,@(gr3,gr11)
- bne icc0,#2,0b
- bralr
- # do quad-word by quad-word copy
- memcpy_16:
- sub.p gr8,gr11,gr3
- sub gr9,gr11,gr9
- 0: lddu @(gr9,gr11),gr4
- lddu.p @(gr9,gr11),gr6
- subicc gr10,#16,gr10,icc0
- stdu gr4,@(gr3,gr11)
- stdu.p gr6,@(gr3,gr11)
- bne icc0,#2,0b
- bralr
- __memcpy_end:
- .size memcpy, __memcpy_end-memcpy
- ###############################################################################
- #
- # copy to/from userspace
- # - return the number of bytes that could not be copied (0 on complete success)
- #
- # long __memcpy_user(void *dst, const void *src, size_t count)
- #
- ###############################################################################
- .globl __memcpy_user, __memcpy_user_error_lr, __memcpy_user_error_handler
- .type __memcpy_user,@function
- __memcpy_user:
- movsg lr,gr7
- subi.p sp,#8,sp
- add gr8,gr10,gr6 ; calculate expected end address
- stdi gr6,@(sp,#0)
- # abuse memcpy to do the dirty work
- call memcpy
- __memcpy_user_error_lr:
- ldi.p @(sp,#4),gr7
- setlos #0,gr8
- jmpl.p @(gr7,gr0)
- addi sp,#8,sp
- # deal any exception generated by memcpy
- # GR8 - memcpy's current dest address
- # GR11 - memset's step value (index register for store insns)
- __memcpy_user_error_handler:
- lddi.p @(sp,#0),gr4 ; load GR4 with dst+count, GR5 with ret addr
- add gr11,gr3,gr7
- sub.p gr4,gr7,gr8
- addi sp,#8,sp
- jmpl @(gr5,gr0)
- .size __memcpy_user, .-__memcpy_user
|