123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186 |
- /*
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
- /* Numerology:
- * WXYZ
- * W: width in bytes
- * X: Load=0, Store=1
- * Y: Location 0=preamble,8=loop,9=epilog
- * Z: Location=0,handler=9
- */
- .text
- .global FUNCNAME
- .type FUNCNAME, @function
- .p2align 5
- FUNCNAME:
- {
- p0 = cmp.gtu(bytes,#0)
- if (!p0.new) jump:nt .Ldone
- r3 = or(dst,src)
- r4 = xor(dst,src)
- }
- {
- p1 = cmp.gtu(bytes,#15)
- p0 = bitsclr(r3,#7)
- if (!p0.new) jump:nt .Loop_not_aligned_8
- src_dst_sav = combine(src,dst)
- }
- {
- loopcount = lsr(bytes,#3)
- if (!p1) jump .Lsmall
- }
- p3=sp1loop0(.Loop8,loopcount)
- .Loop8:
- 8080:
- 8180:
- {
- if (p3) memd(dst++#8) = d_dbuf
- d_dbuf = memd(src++#8)
- }:endloop0
- 8190:
- {
- memd(dst++#8) = d_dbuf
- bytes -= asl(loopcount,#3)
- jump .Lsmall
- }
- .Loop_not_aligned_8:
- {
- p0 = bitsclr(r4,#7)
- if (p0.new) jump:nt .Lalign
- }
- {
- p0 = bitsclr(r3,#3)
- if (!p0.new) jump:nt .Loop_not_aligned_4
- p1 = cmp.gtu(bytes,#7)
- }
- {
- if (!p1) jump .Lsmall
- loopcount = lsr(bytes,#2)
- }
- p3=sp1loop0(.Loop4,loopcount)
- .Loop4:
- 4080:
- 4180:
- {
- if (p3) memw(dst++#4) = w_dbuf
- w_dbuf = memw(src++#4)
- }:endloop0
- 4190:
- {
- memw(dst++#4) = w_dbuf
- bytes -= asl(loopcount,#2)
- jump .Lsmall
- }
- .Loop_not_aligned_4:
- {
- p0 = bitsclr(r3,#1)
- if (!p0.new) jump:nt .Loop_not_aligned
- p1 = cmp.gtu(bytes,#3)
- }
- {
- if (!p1) jump .Lsmall
- loopcount = lsr(bytes,#1)
- }
- p3=sp1loop0(.Loop2,loopcount)
- .Loop2:
- 2080:
- 2180:
- {
- if (p3) memh(dst++#2) = w_dbuf
- w_dbuf = memuh(src++#2)
- }:endloop0
- 2190:
- {
- memh(dst++#2) = w_dbuf
- bytes -= asl(loopcount,#1)
- jump .Lsmall
- }
- .Loop_not_aligned: /* Works for as small as one byte */
- p3=sp1loop0(.Loop1,bytes)
- .Loop1:
- 1080:
- 1180:
- {
- if (p3) memb(dst++#1) = w_dbuf
- w_dbuf = memub(src++#1)
- }:endloop0
- /* Done */
- 1190:
- {
- memb(dst) = w_dbuf
- jumpr r31
- r0 = #0
- }
- .Lsmall:
- {
- p0 = cmp.gtu(bytes,#0)
- if (p0.new) jump:nt .Loop_not_aligned
- }
- .Ldone:
- {
- r0 = #0
- jumpr r31
- }
- .falign
- .Lalign:
- 1000:
- {
- if (p0.new) w_dbuf = memub(src)
- p0 = tstbit(src,#0)
- if (!p1) jump .Lsmall
- }
- 1100:
- {
- if (p0) memb(dst++#1) = w_dbuf
- if (p0) bytes = add(bytes,#-1)
- if (p0) src = add(src,#1)
- }
- 2000:
- {
- if (p0.new) w_dbuf = memuh(src)
- p0 = tstbit(src,#1)
- if (!p1) jump .Lsmall
- }
- 2100:
- {
- if (p0) memh(dst++#2) = w_dbuf
- if (p0) bytes = add(bytes,#-2)
- if (p0) src = add(src,#2)
- }
- 4000:
- {
- if (p0.new) w_dbuf = memw(src)
- p0 = tstbit(src,#2)
- if (!p1) jump .Lsmall
- }
- 4100:
- {
- if (p0) memw(dst++#4) = w_dbuf
- if (p0) bytes = add(bytes,#-4)
- if (p0) src = add(src,#4)
- jump FUNCNAME
- }
- .size FUNCNAME,.-FUNCNAME
|