|
- .section .text
- .p2align 4
- .global memcpy
- .type memcpy, @function
- memcpy:
- {
- p2 = cmp.eq(len,
- align888 = or(ptr_in, ptr_out);
- p0 = cmp.gtu(len,
- p1 = cmp.eq(ptr_in, ptr_out);
- }
- {
- p1 = or(p2, p1);
- p3 = cmp.gtu(len,
- align888 = or(align888, len);
- len8 = lsr(len,
- }
- {
- dcfetch(ptr_in);
- p2 = bitsclr(align888,
- if(p1) jumpr r31;
- }
- {
- p2 = and(p2,!p3);
- if (p2.new) len = add(len,
- if (p2.new) jump:NT .Ldwordaligned;
- }
- {
- if(!p0) jump .Lbytes23orless;
- mask.l =
-
- prolog = sub(
- }
- {
-
- allocframe(
- mask.h =
- ptr_in_p_128 = add(ptr_in,
- back = cl0(len);
- }
- {
- memd(sp+
- r31.l =
- prolog &= lsr(mask, back);
- offset = and(ptr_in,
- }
- {
- memd(sp+
- dalign = sub(ptr_out, ptr_in);
- r31.h =
- }
- {
-
- over = add(len, ptr_in);
- back = add(len, offset);
- memd(sp+
- }
- {
- noprolog = bitsclr(prolog,
- prolog = and(prolog,
- dcfetch(ptr_in_p_128);
- ptr_in_p_128 = add(ptr_in_p_128,
- }
- {
- kernel = sub(len, prolog);
- shift = asl(prolog,
- star3 = and(prolog,
- ptr_in = and(ptr_in,
- }
- {
- prolog = lsr(prolog,
- epilog = and(kernel,
- ptr_out_p_32 = add(ptr_out, prolog);
- over = and(over,
- }
- {
- p3 = cmp.gtu(back,
- kernel = lsr(kernel,
- dcfetch(ptr_in_p_128);
- ptr_in_p_128 = add(ptr_in_p_128,
- }
- {
- p1 = cmp.eq(prolog,
- if(!p1.new) prolog = add(prolog,
- dcfetch(ptr_in_p_128);
- ptr_in_p_128 = add(ptr_in_p_128,
- }
- {
- nokernel = cmp.eq(kernel,
- dcfetch(ptr_in_p_128);
- ptr_in_p_128 = add(ptr_in_p_128,
- shiftb = and(shift,
- }
- {
- dcfetch(ptr_in_p_128);
- ptr_in_p_128 = add(ptr_in_p_128,
- if(nokernel) jump .Lskip64;
- p2 = cmp.eq(kernel,
- }
- {
- dczeroa(ptr_out_p_32);
-
- if(!p2) ptr_out_p_32 = add(ptr_out_p_32,
- }
- {
- dalign = and(dalign,
- dczeroa(ptr_out_p_32);
- }
- .Lskip64:
- {
- data70 = memd(ptr_in++
- if(p3) dataF8 = memd(ptr_in+
- if(noprolog) jump .Lnoprolog32;
- align = offset;
- }
- {
- ldata0 = valignb(dataF8, data70, align);
- ifbyte = tstbit(shift,
- offset = add(offset, star3);
- }
- {
- if(ifbyte) memb(ptr_out++
- ldata0 = lsr(ldata0, shiftb);
- shiftb = and(shift,
- ifhword = tstbit(shift,
- }
- {
- if(ifhword) memh(ptr_out++
- ldata0 = lsr(ldata0, shiftb);
- ifword = tstbit(shift,
- p2 = cmp.gtu(offset,
- }
- {
- if(ifword) memw(ptr_out++
- if(p2) data70 = dataF8;
- if(p2) dataF8 = memd(ptr_in++
- align = offset;
- }
- .Lnoprolog32:
- {
- p3 = sp1loop0(.Ldword_loop_prolog, prolog)
- rest = sub(len, star3);
- p0 = cmp.gt(over,
- }
- if(p0) rest = add(rest,
- .Ldword_loop_prolog:
- {
- if(p3) memd(ptr_out++
- ldata0 = valignb(dataF8, data70, align);
- p0 = cmp.gt(rest,
- }
- {
- data70 = dataF8;
- if(p0) dataF8 = memd(ptr_in++
- rest = add(rest,
- }:endloop0
- .Lkernel:
- {
-
- p3 = cmp.gtu(kernel,
-
- if(p3.new) kernel = add(kernel,
-
- if(p3.new) epilog = add(epilog,
- }
- {
- nokernel = cmp.eq(kernel,
- if(nokernel.new) jump:NT .Lepilog;
- inc = combine(
- p3 = cmp.gtu(dalign,
- }
- {
- if(p3) jump .Lodd_alignment;
- }
- {
- loop0(.Loword_loop_25to31, kernel);
- kernel1 = cmp.gtu(kernel,
- rest = kernel;
- }
- .falign
- .Loword_loop_25to31:
- {
- dcfetch(ptr_in_p_128);
- if(kernel1) ptr_out_p_32 = add(ptr_out_p_32,
- }
- {
- dczeroa(ptr_out_p_32);
- p3 = cmp.eq(kernel, rest);
- }
- {
-
- ptr_in_p_128kernel = vaddw(ptr_in_p_128kernel, inc);
-
- if(!p3) memd(ptr_out++
- ldata1 = valignb(dataF8, data70, align);
- data70 = memd(ptr_in++
- }
- {
- memd(ptr_out++
- ldata0 = valignb(data70, dataF8, align);
- dataF8 = memd(ptr_in++
- }
- {
- memd(ptr_out++
- ldata1 = valignb(dataF8, data70, align);
- data70 = memd(ptr_in++
- }
- {
- memd(ptr_out++
- ldata0 = valignb(data70, dataF8, align);
- dataF8 = memd(ptr_in++
- kernel1 = cmp.gtu(kernel,
- }:endloop0
- {
- memd(ptr_out++
- jump .Lepilog;
- }
- .Lodd_alignment:
- {
- loop0(.Loword_loop_00to24, kernel);
- kernel1 = cmp.gtu(kernel,
- rest = add(kernel,
- }
- .falign
- .Loword_loop_00to24:
- {
- dcfetch(ptr_in_p_128);
- ptr_in_p_128kernel = vaddw(ptr_in_p_128kernel, inc);
- if(kernel1) ptr_out_p_32 = add(ptr_out_p_32,
- }
- {
- dczeroa(ptr_out_p_32);
- }
- {
- memd(ptr_out++
- ldata0 = valignb(dataF8, data70, align);
- data70 = memd(ptr_in++
- }
- {
- memd(ptr_out++
- ldata0 = valignb(data70, dataF8, align);
- dataF8 = memd(ptr_in++
- }
- {
- memd(ptr_out++
- ldata0 = valignb(dataF8, data70, align);
- data70 = memd(ptr_in++
- }
- {
- memd(ptr_out++
- ldata0 = valignb(data70, dataF8, align);
- dataF8 = memd(ptr_in++
- kernel1 = cmp.gtu(kernel,
- }:endloop0
- .Lepilog:
- {
- noepilog = cmp.eq(epilog,
- epilogdws = lsr(epilog,
- kernel = and(epilog,
- }
- {
- if(noepilog) jumpr r31;
- if(noepilog) ptr_out = sub(ptr_out, len);
- p3 = cmp.eq(epilogdws,
- shift2 = asl(epilog,
- }
- {
- shiftb = and(shift2,
- ifword = tstbit(epilog,
- if(p3) jump .Lepilog60;
- if(!p3) epilog = add(epilog,
- }
- {
- loop0(.Ldword_loop_epilog, epilogdws);
-
- p3 = cmp.eq(kernel,
- if(p3.new) kernel=
- p1 = cmp.gt(over,
- }
-
- if(p1) kernel=
- .Ldword_loop_epilog:
- {
- memd(ptr_out++
- ldata0 = valignb(dataF8, data70, align);
- p3 = cmp.gt(epilog, kernel);
- }
- {
- data70 = dataF8;
- if(p3) dataF8 = memd(ptr_in++
- epilog = add(epilog,
- }:endloop0
- .Lepilog60:
- {
- if(ifword) memw(ptr_out++
- ldata0 = lsr(ldata0, shiftb);
- ifhword = tstbit(epilog,
- shiftb = and(shift2,
- }
- {
- if(ifhword) memh(ptr_out++
- ldata0 = lsr(ldata0, shiftb);
- ifbyte = tstbit(epilog,
- if(ifbyte.new) len = add(len,
- }
- {
- if(ifbyte) memb(ptr_out) = data0;
- ptr_out = sub(ptr_out, len);
- jumpr r31;
- }
- .Lbytes23orless:
- {
- p3 = sp1loop0(.Lbyte_copy, len);
- len = add(len,
- }
- .Lbyte_copy:
- {
- data0 = memb(ptr_in++
- if(p3) memb(ptr_out++
- }:endloop0
- {
- memb(ptr_out) = data0;
- ptr_out = sub(ptr_out, len);
- jumpr r31;
- }
- .Ldwordaligned:
- {
- p3 = sp1loop0(.Ldword_copy, len8);
- }
- .Ldword_copy:
- {
- if(p3) memd(ptr_out++
- ldata0 = memd(ptr_in++
- }:endloop0
- {
- memd(ptr_out) = ldata0;
- ptr_out = sub(ptr_out, len);
- jumpr r31;
- }
- .Lmemcpy_return:
- r21:20 = memd(sp+
- {
- r25:24 = memd(sp+
- r17:16 = memd(sp+
- }
- deallocframe;
- jumpr r31
|