12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259 |
- /* twofish-amd64.S - AMD64 assembly implementation of Twofish cipher
- *
- * Copyright (C) 2013-2015 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
- #ifdef __x86_64
- #include <config.h>
- #if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
- defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_TWOFISH)
- #include "asm-common-amd64.h"
- .text
- /* structure of TWOFISH_context: */
- #define s0 0
- #define s1 ((s0) + 4 * 256)
- #define s2 ((s1) + 4 * 256)
- #define s3 ((s2) + 4 * 256)
- #define w ((s3) + 4 * 256)
- #define k ((w) + 4 * 8)
- /* register macros */
- #define CTX %rdi
- #define RA %rax
- #define RB %rbx
- #define RC %rcx
- #define RD %rdx
- #define RAd %eax
- #define RBd %ebx
- #define RCd %ecx
- #define RDd %edx
- #define RAbl %al
- #define RBbl %bl
- #define RCbl %cl
- #define RDbl %dl
- #define RAbh %ah
- #define RBbh %bh
- #define RCbh %ch
- #define RDbh %dh
- #define RX %r8
- #define RY %r9
- #define RXd %r8d
- #define RYd %r9d
- #define RT0 %rsi
- #define RT1 %rbp
- #define RT2 %r10
- #define RT3 %r11
- #define RT0d %esi
- #define RT1d %ebp
- #define RT2d %r10d
- #define RT3d %r11d
- /***********************************************************************
- * AMD64 assembly implementation of the Twofish cipher
- ***********************************************************************/
- #define enc_g1_2(a, b, x, y) \
- movzbl b ## bl, RT3d; \
- movzbl b ## bh, RT1d; \
- movzbl a ## bl, RT2d; \
- movzbl a ## bh, RT0d; \
- rorl $16, b ## d; \
- rorl $16, a ## d; \
- movl s1(CTX, RT3, 4), RYd; \
- movzbl b ## bl, RT3d; \
- movl s0(CTX, RT2, 4), RXd; \
- movzbl a ## bl, RT2d; \
- xorl s2(CTX, RT1, 4), RYd; \
- movzbl b ## bh, RT1d; \
- xorl s1(CTX, RT0, 4), RXd; \
- movzbl a ## bh, RT0d; \
- rorl $16, b ## d; \
- rorl $16, a ## d; \
- xorl s3(CTX, RT3, 4), RYd; \
- xorl s2(CTX, RT2, 4), RXd; \
- xorl s0(CTX, RT1, 4), RYd; \
- xorl s3(CTX, RT0, 4), RXd;
- #define dec_g1_2(a, b, x, y) \
- movzbl a ## bl, RT2d; \
- movzbl a ## bh, RT0d; \
- movzbl b ## bl, RT3d; \
- movzbl b ## bh, RT1d; \
- rorl $16, a ## d; \
- rorl $16, b ## d; \
- movl s0(CTX, RT2, 4), RXd; \
- movzbl a ## bl, RT2d; \
- movl s1(CTX, RT3, 4), RYd; \
- movzbl b ## bl, RT3d; \
- xorl s1(CTX, RT0, 4), RXd; \
- movzbl a ## bh, RT0d; \
- xorl s2(CTX, RT1, 4), RYd; \
- movzbl b ## bh, RT1d; \
- rorl $16, a ## d; \
- rorl $16, b ## d; \
- xorl s2(CTX, RT2, 4), RXd; \
- xorl s3(CTX, RT3, 4), RYd; \
- xorl s3(CTX, RT0, 4), RXd; \
- xorl s0(CTX, RT1, 4), RYd;
- #define encrypt_round(ra, rb, rc, rd, n) \
- enc_g1_2(##ra, ##rb, RX, RY); \
- \
- leal (RXd, RYd, 2), RT0d; \
- addl RYd, RXd; \
- addl (k + 8 * (n) + 4)(CTX), RT0d; \
- roll $1, rd ## d; \
- addl (k + 8 * (n))(CTX), RXd; \
- xorl RT0d, rd ## d; \
- xorl RXd, rc ## d; \
- rorl $1, rc ## d;
- #define decrypt_round(ra, rb, rc, rd, n) \
- dec_g1_2(##ra, ##rb, RX, RY); \
- \
- leal (RXd, RYd, 2), RT0d; \
- addl RYd, RXd; \
- addl (k + 8 * (n) + 4)(CTX), RT0d; \
- roll $1, rc ## d; \
- addl (k + 8 * (n))(CTX), RXd; \
- xorl RXd, rc ## d; \
- xorl RT0d, rd ## d; \
- rorl $1, rd ## d;
- #define encrypt_cycle(a, b, c, d, nc) \
- encrypt_round(##a, ##b, ##c, ##d, (nc) * 2); \
- encrypt_round(##c, ##d, ##a, ##b, (nc) * 2 + 1);
- #define decrypt_cycle(a, b, c, d, nc) \
- decrypt_round(##c, ##d, ##a, ##b, (nc) * 2 + 1); \
- decrypt_round(##a, ##b, ##c, ##d, (nc) * 2);
- #define inpack(in, n, x, m) \
- movl (4 * (n))(in), x; \
- xorl (w + 4 * (m))(CTX), x;
- #define outunpack(out, n, x, m) \
- xorl (w + 4 * (m))(CTX), x; \
- movl x, (4 * (n))(out);
- .align 16
- .globl _gcry_twofish_amd64_encrypt_block
- ELF(.type _gcry_twofish_amd64_encrypt_block,@function;)
- _gcry_twofish_amd64_encrypt_block:
- /* input:
- * %rdi: context, CTX
- * %rsi: dst
- * %rdx: src
- */
- CFI_STARTPROC();
- ENTER_SYSV_FUNC_PARAMS_0_4
- subq $(3 * 8), %rsp;
- CFI_ADJUST_CFA_OFFSET(3 * 8);
- movq %rsi, (0 * 8)(%rsp);
- movq %rbp, (1 * 8)(%rsp);
- movq %rbx, (2 * 8)(%rsp);
- CFI_REL_OFFSET(%rbp, 1 * 8);
- CFI_REL_OFFSET(%rbx, 2 * 8);
- movq %rdx, RX;
- inpack(RX, 0, RAd, 0);
- inpack(RX, 1, RBd, 1);
- inpack(RX, 2, RCd, 2);
- inpack(RX, 3, RDd, 3);
- encrypt_cycle(RA, RB, RC, RD, 0);
- encrypt_cycle(RA, RB, RC, RD, 1);
- encrypt_cycle(RA, RB, RC, RD, 2);
- encrypt_cycle(RA, RB, RC, RD, 3);
- encrypt_cycle(RA, RB, RC, RD, 4);
- encrypt_cycle(RA, RB, RC, RD, 5);
- encrypt_cycle(RA, RB, RC, RD, 6);
- encrypt_cycle(RA, RB, RC, RD, 7);
- movq (0 * 8)(%rsp), RX; /*dst*/
- outunpack(RX, 0, RCd, 4);
- outunpack(RX, 1, RDd, 5);
- outunpack(RX, 2, RAd, 6);
- outunpack(RX, 3, RBd, 7);
- movq (2 * 8)(%rsp), %rbx;
- movq (1 * 8)(%rsp), %rbp;
- CFI_RESTORE(%rbx);
- CFI_RESTORE(%rbp);
- addq $(3 * 8), %rsp;
- CFI_ADJUST_CFA_OFFSET(-3 * 8);
- EXIT_SYSV_FUNC
- ret_spec_stop;
- CFI_ENDPROC();
- ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;)
- .align 16
- .globl _gcry_twofish_amd64_decrypt_block
- ELF(.type _gcry_twofish_amd64_decrypt_block,@function;)
- _gcry_twofish_amd64_decrypt_block:
- /* input:
- * %rdi: context, CTX
- * %rsi: dst
- * %rdx: src
- */
- CFI_STARTPROC();
- ENTER_SYSV_FUNC_PARAMS_0_4
- subq $(3 * 8), %rsp;
- CFI_ADJUST_CFA_OFFSET(3 * 8);
- movq %rsi, (0 * 8)(%rsp);
- movq %rbp, (1 * 8)(%rsp);
- movq %rbx, (2 * 8)(%rsp);
- CFI_REL_OFFSET(%rbp, 1 * 8);
- CFI_REL_OFFSET(%rbx, 2 * 8);
- movq %rdx, RX;
- inpack(RX, 0, RCd, 4);
- inpack(RX, 1, RDd, 5);
- inpack(RX, 2, RAd, 6);
- inpack(RX, 3, RBd, 7);
- decrypt_cycle(RA, RB, RC, RD, 7);
- decrypt_cycle(RA, RB, RC, RD, 6);
- decrypt_cycle(RA, RB, RC, RD, 5);
- decrypt_cycle(RA, RB, RC, RD, 4);
- decrypt_cycle(RA, RB, RC, RD, 3);
- decrypt_cycle(RA, RB, RC, RD, 2);
- decrypt_cycle(RA, RB, RC, RD, 1);
- decrypt_cycle(RA, RB, RC, RD, 0);
- movq (0 * 8)(%rsp), RX; /*dst*/
- outunpack(RX, 0, RAd, 0);
- outunpack(RX, 1, RBd, 1);
- outunpack(RX, 2, RCd, 2);
- outunpack(RX, 3, RDd, 3);
- movq (2 * 8)(%rsp), %rbx;
- movq (1 * 8)(%rsp), %rbp;
- CFI_RESTORE(%rbx);
- CFI_RESTORE(%rbp);
- addq $(3 * 8), %rsp;
- CFI_ADJUST_CFA_OFFSET(-3 * 8);
- EXIT_SYSV_FUNC
- ret_spec_stop;
- CFI_ENDPROC();
- ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;)
- #undef CTX
- #undef RA
- #undef RB
- #undef RC
- #undef RD
- #undef RAd
- #undef RBd
- #undef RCd
- #undef RDd
- #undef RAbl
- #undef RBbl
- #undef RCbl
- #undef RDbl
- #undef RAbh
- #undef RBbh
- #undef RCbh
- #undef RDbh
- #undef RX
- #undef RY
- #undef RXd
- #undef RYd
- #undef RT0
- #undef RT1
- #undef RT2
- #undef RT3
- #undef RT0d
- #undef RT1d
- #undef RT2d
- #undef RT3d
- /***********************************************************************
- * AMD64 assembly implementation of the Twofish cipher, 3-way parallel
- ***********************************************************************/
- #define CTX %rdi
- #define RIO %rdx
- #define RAB0 %rax
- #define RAB1 %rbx
- #define RAB2 %rcx
- #define RAB0d %eax
- #define RAB1d %ebx
- #define RAB2d %ecx
- #define RAB0bh %ah
- #define RAB1bh %bh
- #define RAB2bh %ch
- #define RAB0bl %al
- #define RAB1bl %bl
- #define RAB2bl %cl
- #define RCD0 %r8
- #define RCD1 %r9
- #define RCD2 %r10
- #define RCD0d %r8d
- #define RCD1d %r9d
- #define RCD2d %r10d
- #define RX0 %rbp
- #define RX1 %r11
- #define RX2 %r12
- #define RX0d %ebp
- #define RX1d %r11d
- #define RX2d %r12d
- #define RY0 %r13
- #define RY1 %r14
- #define RY2 %r15
- #define RY0d %r13d
- #define RY1d %r14d
- #define RY2d %r15d
- #define RT0 %rdx
- #define RT1 %rsi
- #define RT0d %edx
- #define RT1d %esi
- #define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \
- movzbl ab ## bl, tmp2 ## d; \
- movzbl ab ## bh, tmp1 ## d; \
- rorq $(rot), ab; \
- op1##l T0(CTX, tmp2, 4), dst ## d; \
- op2##l T1(CTX, tmp1, 4), dst ## d;
- /*
- * Combined G1 & G2 function. Reordered with help of rotates to have moves
- * at beginning.
- */
- #define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \
- /* G1,1 && G2,1 */ \
- do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 0, ab ## 0, x ## 0); \
- do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 0, ab ## 0, y ## 0); \
- \
- do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 1, ab ## 1, x ## 1); \
- do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 1, ab ## 1, y ## 1); \
- \
- do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 2, ab ## 2, x ## 2); \
- do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 2, ab ## 2, y ## 2); \
- \
- /* G1,2 && G2,2 */ \
- do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \
- do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \
- movq ab ## 0, RT0; \
- movq cd ## 0, ab ## 0; \
- movq RT0, cd ## 0; \
- \
- do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \
- do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \
- movq ab ## 1, RT0; \
- movq cd ## 1, ab ## 1; \
- movq RT0, cd ## 1; \
- \
- do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \
- do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \
- movq ab ## 2, RT0; \
- movq cd ## 2, ab ## 2; \
- movq RT0, cd ## 2;
- #define enc_round_end(ab, x, y, n) \
- addl y ## d, x ## d; \
- addl x ## d, y ## d; \
- addl k+4*(2*(n))(CTX), x ## d; \
- xorl ab ## d, x ## d; \
- addl k+4*(2*(n)+1)(CTX), y ## d; \
- shrq $32, ab; \
- roll $1, ab ## d; \
- xorl y ## d, ab ## d; \
- shlq $32, ab; \
- rorl $1, x ## d; \
- orq x, ab;
- #define dec_round_end(ba, x, y, n) \
- addl y ## d, x ## d; \
- addl x ## d, y ## d; \
- addl k+4*(2*(n))(CTX), x ## d; \
- addl k+4*(2*(n)+1)(CTX), y ## d; \
- xorl ba ## d, y ## d; \
- shrq $32, ba; \
- roll $1, ba ## d; \
- xorl x ## d, ba ## d; \
- shlq $32, ba; \
- rorl $1, y ## d; \
- orq y, ba;
- #define encrypt_round3(ab, cd, n) \
- g1g2_3(ab, cd, s0, s1, s2, s3, s0, s1, s2, s3, RX, RY); \
- \
- enc_round_end(ab ## 0, RX0, RY0, n); \
- enc_round_end(ab ## 1, RX1, RY1, n); \
- enc_round_end(ab ## 2, RX2, RY2, n);
- #define decrypt_round3(ba, dc, n) \
- g1g2_3(ba, dc, s1, s2, s3, s0, s3, s0, s1, s2, RY, RX); \
- \
- dec_round_end(ba ## 0, RX0, RY0, n); \
- dec_round_end(ba ## 1, RX1, RY1, n); \
- dec_round_end(ba ## 2, RX2, RY2, n);
- #define encrypt_cycle3(ab, cd, n) \
- encrypt_round3(ab, cd, n*2); \
- encrypt_round3(ab, cd, (n*2)+1);
- #define decrypt_cycle3(ba, dc, n) \
- decrypt_round3(ba, dc, (n*2)+1); \
- decrypt_round3(ba, dc, (n*2));
- #define inpack3(xy, m) \
- xorq w+4*m(CTX), xy ## 0; \
- xorq w+4*m(CTX), xy ## 1; \
- xorq w+4*m(CTX), xy ## 2;
- #define outunpack3(xy, m) \
- xorq w+4*m(CTX), xy ## 0; \
- xorq w+4*m(CTX), xy ## 1; \
- xorq w+4*m(CTX), xy ## 2;
- #define inpack_enc3() \
- inpack3(RAB, 0); \
- inpack3(RCD, 2);
- #define outunpack_enc3() \
- outunpack3(RAB, 6); \
- outunpack3(RCD, 4);
- #define inpack_dec3() \
- inpack3(RAB, 4); \
- rorq $32, RAB0; \
- rorq $32, RAB1; \
- rorq $32, RAB2; \
- inpack3(RCD, 6); \
- rorq $32, RCD0; \
- rorq $32, RCD1; \
- rorq $32, RCD2;
- #define outunpack_dec3() \
- rorq $32, RCD0; \
- rorq $32, RCD1; \
- rorq $32, RCD2; \
- outunpack3(RCD, 0); \
- rorq $32, RAB0; \
- rorq $32, RAB1; \
- rorq $32, RAB2; \
- outunpack3(RAB, 2);
- .align 16
- ELF(.type __twofish_enc_blk3,@function;)
- __twofish_enc_blk3:
- /* input:
- * %rdi: ctx, CTX
- * RAB0,RCD0,RAB1,RCD1,RAB2,RCD2: three plaintext blocks
- * output:
- * RCD0,RAB0,RCD1,RAB1,RCD2,RAB2: three ciphertext blocks
- */
- CFI_STARTPROC();
- inpack_enc3();
- encrypt_cycle3(RAB, RCD, 0);
- encrypt_cycle3(RAB, RCD, 1);
- encrypt_cycle3(RAB, RCD, 2);
- encrypt_cycle3(RAB, RCD, 3);
- encrypt_cycle3(RAB, RCD, 4);
- encrypt_cycle3(RAB, RCD, 5);
- encrypt_cycle3(RAB, RCD, 6);
- encrypt_cycle3(RAB, RCD, 7);
- outunpack_enc3();
- ret_spec_stop;
- CFI_ENDPROC();
- ELF(.size __twofish_enc_blk3,.-__twofish_enc_blk3;)
- .align 16
- ELF(.type __twofish_dec_blk3,@function;)
- __twofish_dec_blk3:
- /* input:
- * %rdi: ctx, CTX
- * RAB0,RCD0,RAB1,RCD1,RAB2,RCD2: three ciphertext blocks
- * output:
- * RCD0,RAB0,RCD1,RAB1,RCD2,RAB2: three plaintext blocks
- */
- CFI_STARTPROC();
- inpack_dec3();
- decrypt_cycle3(RAB, RCD, 7);
- decrypt_cycle3(RAB, RCD, 6);
- decrypt_cycle3(RAB, RCD, 5);
- decrypt_cycle3(RAB, RCD, 4);
- decrypt_cycle3(RAB, RCD, 3);
- decrypt_cycle3(RAB, RCD, 2);
- decrypt_cycle3(RAB, RCD, 1);
- decrypt_cycle3(RAB, RCD, 0);
- outunpack_dec3();
- ret_spec_stop;
- CFI_ENDPROC();
- ELF(.size __twofish_dec_blk3,.-__twofish_dec_blk3;)
- .align 16
- .globl _gcry_twofish_amd64_blk3
- ELF(.type _gcry_twofish_amd64_blk3,@function;)
- _gcry_twofish_amd64_blk3:
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (3 blocks)
- * %rdx: src (3 blocks)
- * %ecx: encrypt (0 or 1)
- */
- CFI_STARTPROC();
- ENTER_SYSV_FUNC_PARAMS_0_4
- subq $(8 * 8), %rsp;
- CFI_ADJUST_CFA_OFFSET(8 * 8);
- movq %rbp, (0 * 8)(%rsp);
- movq %rbx, (1 * 8)(%rsp);
- movq %r12, (2 * 8)(%rsp);
- movq %r13, (3 * 8)(%rsp);
- movq %r14, (4 * 8)(%rsp);
- movq %r15, (5 * 8)(%rsp);
- CFI_REL_OFFSET(%rbp, 0 * 8);
- CFI_REL_OFFSET(%rbx, 1 * 8);
- CFI_REL_OFFSET(%r12, 2 * 8);
- CFI_REL_OFFSET(%r13, 3 * 8);
- CFI_REL_OFFSET(%r14, 4 * 8);
- CFI_REL_OFFSET(%r15, 5 * 8);
- testl %ecx, %ecx;
- movq %rdx, RX0;
- movq %rsi, (6 * 8)(%rsp);
- movq (0 * 8)(RX0), RAB0;
- movq (1 * 8)(RX0), RCD0;
- movq (2 * 8)(RX0), RAB1;
- movq (3 * 8)(RX0), RCD1;
- movq (4 * 8)(RX0), RAB2;
- movq (5 * 8)(RX0), RCD2;
- jz .Lblk1_3_dec;
- call __twofish_enc_blk3;
- jmp .Lblk1_3_end;
- .Lblk1_3_dec:
- call __twofish_dec_blk3;
- .Lblk1_3_end:
- movq (6 * 8)(%rsp), RX0;
- movq RCD0, (0 * 8)(RX0);
- movq RAB0, (1 * 8)(RX0);
- movq RCD1, (2 * 8)(RX0);
- movq RAB1, (3 * 8)(RX0);
- movq RCD2, (4 * 8)(RX0);
- movq RAB2, (5 * 8)(RX0);
- movq (0 * 8)(%rsp), %rbp;
- movq (1 * 8)(%rsp), %rbx;
- movq (2 * 8)(%rsp), %r12;
- movq (3 * 8)(%rsp), %r13;
- movq (4 * 8)(%rsp), %r14;
- movq (5 * 8)(%rsp), %r15;
- CFI_RESTORE(%rbp);
- CFI_RESTORE(%rbx);
- CFI_RESTORE(%r12);
- CFI_RESTORE(%r13);
- CFI_RESTORE(%r14);
- CFI_RESTORE(%r15);
- addq $(8 * 8), %rsp;
- CFI_ADJUST_CFA_OFFSET(-8 * 8);
- EXIT_SYSV_FUNC
- ret_spec_stop;
- CFI_ENDPROC();
- ELF(.size _gcry_twofish_amd64_blk3,.-_gcry_twofish_amd64_blk3;)
- .align 16
- .globl _gcry_twofish_amd64_ctr_enc
- ELF(.type _gcry_twofish_amd64_ctr_enc,@function;)
- _gcry_twofish_amd64_ctr_enc:
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (3 blocks)
- * %rdx: src (3 blocks)
- * %rcx: iv (big endian, 128bit)
- */
- CFI_STARTPROC();
- ENTER_SYSV_FUNC_PARAMS_0_4
- subq $(8 * 8), %rsp;
- CFI_ADJUST_CFA_OFFSET(8 * 8);
- movq %rbp, (0 * 8)(%rsp);
- movq %rbx, (1 * 8)(%rsp);
- movq %r12, (2 * 8)(%rsp);
- movq %r13, (3 * 8)(%rsp);
- movq %r14, (4 * 8)(%rsp);
- movq %r15, (5 * 8)(%rsp);
- CFI_REL_OFFSET(%rbp, 0 * 8);
- CFI_REL_OFFSET(%rbx, 1 * 8);
- CFI_REL_OFFSET(%r12, 2 * 8);
- CFI_REL_OFFSET(%r13, 3 * 8);
- CFI_REL_OFFSET(%r14, 4 * 8);
- CFI_REL_OFFSET(%r15, 5 * 8);
- movq %rsi, (6 * 8)(%rsp);
- movq %rdx, (7 * 8)(%rsp);
- movq %rcx, RX0;
- /* load IV and byteswap */
- movq 8(RX0), RT0;
- movq 0(RX0), RT1;
- movq RT0, RCD0;
- movq RT1, RAB0;
- bswapq RT0;
- bswapq RT1;
- /* construct IVs */
- movq RT0, RCD1;
- movq RT1, RAB1;
- movq RT0, RCD2;
- movq RT1, RAB2;
- addq $1, RCD1;
- adcq $0, RAB1;
- bswapq RCD1;
- bswapq RAB1;
- addq $2, RCD2;
- adcq $0, RAB2;
- bswapq RCD2;
- bswapq RAB2;
- addq $3, RT0;
- adcq $0, RT1;
- bswapq RT0;
- bswapq RT1;
- /* store new IV */
- movq RT0, 8(RX0);
- movq RT1, 0(RX0);
- call __twofish_enc_blk3;
- movq (7 * 8)(%rsp), RX0; /*src*/
- movq (6 * 8)(%rsp), RX1; /*dst*/
- /* XOR key-stream with plaintext */
- xorq (0 * 8)(RX0), RCD0;
- xorq (1 * 8)(RX0), RAB0;
- xorq (2 * 8)(RX0), RCD1;
- xorq (3 * 8)(RX0), RAB1;
- xorq (4 * 8)(RX0), RCD2;
- xorq (5 * 8)(RX0), RAB2;
- movq RCD0, (0 * 8)(RX1);
- movq RAB0, (1 * 8)(RX1);
- movq RCD1, (2 * 8)(RX1);
- movq RAB1, (3 * 8)(RX1);
- movq RCD2, (4 * 8)(RX1);
- movq RAB2, (5 * 8)(RX1);
- movq (0 * 8)(%rsp), %rbp;
- movq (1 * 8)(%rsp), %rbx;
- movq (2 * 8)(%rsp), %r12;
- movq (3 * 8)(%rsp), %r13;
- movq (4 * 8)(%rsp), %r14;
- movq (5 * 8)(%rsp), %r15;
- CFI_RESTORE(%rbp);
- CFI_RESTORE(%rbx);
- CFI_RESTORE(%r12);
- CFI_RESTORE(%r13);
- CFI_RESTORE(%r14);
- CFI_RESTORE(%r15);
- addq $(8 * 8), %rsp;
- CFI_ADJUST_CFA_OFFSET(-8 * 8);
- EXIT_SYSV_FUNC
- ret_spec_stop;
- CFI_ENDPROC();
- ELF(.size _gcry_twofish_amd64_ctr_enc,.-_gcry_twofish_amd64_ctr_enc;)
- .align 16
- .globl _gcry_twofish_amd64_cbc_dec
- ELF(.type _gcry_twofish_amd64_cbc_dec,@function;)
- _gcry_twofish_amd64_cbc_dec:
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (3 blocks)
- * %rdx: src (3 blocks)
- * %rcx: iv (128bit)
- */
- CFI_STARTPROC();
- ENTER_SYSV_FUNC_PARAMS_0_4
- subq $(9 * 8), %rsp;
- CFI_ADJUST_CFA_OFFSET(9 * 8);
- movq %rbp, (0 * 8)(%rsp);
- movq %rbx, (1 * 8)(%rsp);
- movq %r12, (2 * 8)(%rsp);
- movq %r13, (3 * 8)(%rsp);
- movq %r14, (4 * 8)(%rsp);
- movq %r15, (5 * 8)(%rsp);
- CFI_REL_OFFSET(%rbp, 0 * 8);
- CFI_REL_OFFSET(%rbx, 1 * 8);
- CFI_REL_OFFSET(%r12, 2 * 8);
- CFI_REL_OFFSET(%r13, 3 * 8);
- CFI_REL_OFFSET(%r14, 4 * 8);
- CFI_REL_OFFSET(%r15, 5 * 8);
- movq %rsi, (6 * 8)(%rsp);
- movq %rdx, (7 * 8)(%rsp);
- movq %rcx, (8 * 8)(%rsp);
- movq %rdx, RX0;
- /* load input */
- movq (0 * 8)(RX0), RAB0;
- movq (1 * 8)(RX0), RCD0;
- movq (2 * 8)(RX0), RAB1;
- movq (3 * 8)(RX0), RCD1;
- movq (4 * 8)(RX0), RAB2;
- movq (5 * 8)(RX0), RCD2;
- call __twofish_dec_blk3;
- movq (8 * 8)(%rsp), RT0; /*iv*/
- movq (7 * 8)(%rsp), RX0; /*src*/
- movq (6 * 8)(%rsp), RX1; /*dst*/
- movq (4 * 8)(RX0), RY0;
- movq (5 * 8)(RX0), RY1;
- xorq (0 * 8)(RT0), RCD0;
- xorq (1 * 8)(RT0), RAB0;
- xorq (0 * 8)(RX0), RCD1;
- xorq (1 * 8)(RX0), RAB1;
- xorq (2 * 8)(RX0), RCD2;
- xorq (3 * 8)(RX0), RAB2;
- movq RY0, (0 * 8)(RT0);
- movq RY1, (1 * 8)(RT0);
- movq RCD0, (0 * 8)(RX1);
- movq RAB0, (1 * 8)(RX1);
- movq RCD1, (2 * 8)(RX1);
- movq RAB1, (3 * 8)(RX1);
- movq RCD2, (4 * 8)(RX1);
- movq RAB2, (5 * 8)(RX1);
- movq (0 * 8)(%rsp), %rbp;
- movq (1 * 8)(%rsp), %rbx;
- movq (2 * 8)(%rsp), %r12;
- movq (3 * 8)(%rsp), %r13;
- movq (4 * 8)(%rsp), %r14;
- movq (5 * 8)(%rsp), %r15;
- CFI_RESTORE(%rbp);
- CFI_RESTORE(%rbx);
- CFI_RESTORE(%r12);
- CFI_RESTORE(%r13);
- CFI_RESTORE(%r14);
- CFI_RESTORE(%r15);
- addq $(9 * 8), %rsp;
- CFI_ADJUST_CFA_OFFSET(-9 * 8);
- EXIT_SYSV_FUNC
- ret_spec_stop;
- CFI_ENDPROC();
- ELF(.size _gcry_twofish_amd64_cbc_dec,.-_gcry_twofish_amd64_cbc_dec;)
- .align 16
- .globl _gcry_twofish_amd64_cfb_dec
- ELF(.type _gcry_twofish_amd64_cfb_dec,@function;)
- _gcry_twofish_amd64_cfb_dec:
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (3 blocks)
- * %rdx: src (3 blocks)
- * %rcx: iv (128bit)
- */
- CFI_STARTPROC();
- ENTER_SYSV_FUNC_PARAMS_0_4
- subq $(8 * 8), %rsp;
- CFI_ADJUST_CFA_OFFSET(8 * 8);
- movq %rbp, (0 * 8)(%rsp);
- movq %rbx, (1 * 8)(%rsp);
- movq %r12, (2 * 8)(%rsp);
- movq %r13, (3 * 8)(%rsp);
- movq %r14, (4 * 8)(%rsp);
- movq %r15, (5 * 8)(%rsp);
- CFI_REL_OFFSET(%rbp, 0 * 8);
- CFI_REL_OFFSET(%rbx, 1 * 8);
- CFI_REL_OFFSET(%r12, 2 * 8);
- CFI_REL_OFFSET(%r13, 3 * 8);
- CFI_REL_OFFSET(%r14, 4 * 8);
- CFI_REL_OFFSET(%r15, 5 * 8);
- movq %rsi, (6 * 8)(%rsp);
- movq %rdx, (7 * 8)(%rsp);
- movq %rdx, RX0;
- movq %rcx, RX1;
- /* load input */
- movq (0 * 8)(RX1), RAB0;
- movq (1 * 8)(RX1), RCD0;
- movq (0 * 8)(RX0), RAB1;
- movq (1 * 8)(RX0), RCD1;
- movq (2 * 8)(RX0), RAB2;
- movq (3 * 8)(RX0), RCD2;
- /* Update IV */
- movq (4 * 8)(RX0), RY0;
- movq (5 * 8)(RX0), RY1;
- movq RY0, (0 * 8)(RX1);
- movq RY1, (1 * 8)(RX1);
- call __twofish_enc_blk3;
- movq (7 * 8)(%rsp), RX0; /*src*/
- movq (6 * 8)(%rsp), RX1; /*dst*/
- xorq (0 * 8)(RX0), RCD0;
- xorq (1 * 8)(RX0), RAB0;
- xorq (2 * 8)(RX0), RCD1;
- xorq (3 * 8)(RX0), RAB1;
- xorq (4 * 8)(RX0), RCD2;
- xorq (5 * 8)(RX0), RAB2;
- movq RCD0, (0 * 8)(RX1);
- movq RAB0, (1 * 8)(RX1);
- movq RCD1, (2 * 8)(RX1);
- movq RAB1, (3 * 8)(RX1);
- movq RCD2, (4 * 8)(RX1);
- movq RAB2, (5 * 8)(RX1);
- movq (0 * 8)(%rsp), %rbp;
- movq (1 * 8)(%rsp), %rbx;
- movq (2 * 8)(%rsp), %r12;
- movq (3 * 8)(%rsp), %r13;
- movq (4 * 8)(%rsp), %r14;
- movq (5 * 8)(%rsp), %r15;
- CFI_RESTORE(%rbp);
- CFI_RESTORE(%rbx);
- CFI_RESTORE(%r12);
- CFI_RESTORE(%r13);
- CFI_RESTORE(%r14);
- CFI_RESTORE(%r15);
- addq $(8 * 8), %rsp;
- CFI_ADJUST_CFA_OFFSET(-8 * 8);
- EXIT_SYSV_FUNC
- ret_spec_stop;
- CFI_ENDPROC();
- ELF(.size _gcry_twofish_amd64_cfb_dec,.-_gcry_twofish_amd64_cfb_dec;)
- .align 16
- .globl _gcry_twofish_amd64_ocb_enc
- ELF(.type _gcry_twofish_amd64_ocb_enc,@function;)
- _gcry_twofish_amd64_ocb_enc:
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (3 blocks)
- * %rdx: src (3 blocks)
- * %rcx: offset
- * %r8 : checksum
- * %r9 : L pointers (void *L[3])
- */
- CFI_STARTPROC();
- ENTER_SYSV_FUNC_PARAMS_6
- subq $(8 * 8), %rsp;
- CFI_ADJUST_CFA_OFFSET(8 * 8);
- movq %rbp, (0 * 8)(%rsp);
- movq %rbx, (1 * 8)(%rsp);
- movq %r12, (2 * 8)(%rsp);
- movq %r13, (3 * 8)(%rsp);
- movq %r14, (4 * 8)(%rsp);
- movq %r15, (5 * 8)(%rsp);
- CFI_REL_OFFSET(%rbp, 0 * 8);
- CFI_REL_OFFSET(%rbx, 1 * 8);
- CFI_REL_OFFSET(%r12, 2 * 8);
- CFI_REL_OFFSET(%r13, 3 * 8);
- CFI_REL_OFFSET(%r14, 4 * 8);
- CFI_REL_OFFSET(%r15, 5 * 8);
- movq %rsi, (6 * 8)(%rsp);
- movq %rdx, RX0;
- movq %rcx, RX1;
- movq %r8, RX2;
- movq %r9, RY0;
- movq %rsi, RY1;
- /* Load offset */
- movq (0 * 8)(RX1), RT0;
- movq (1 * 8)(RX1), RT1;
- /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- movq (RY0), RY2;
- xorq (0 * 8)(RY2), RT0;
- xorq (1 * 8)(RY2), RT1;
- movq (0 * 8)(RX0), RAB0;
- movq (1 * 8)(RX0), RCD0;
- /* Store Offset_i */
- movq RT0, (0 * 8)(RY1);
- movq RT1, (1 * 8)(RY1);
- /* Checksum_i = Checksum_{i-1} xor P_i */
- xor RAB0, (0 * 8)(RX2);
- xor RCD0, (1 * 8)(RX2);
- /* PX_i = P_i xor Offset_i */
- xorq RT0, RAB0;
- xorq RT1, RCD0;
- /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- movq 8(RY0), RY2;
- xorq (0 * 8)(RY2), RT0;
- xorq (1 * 8)(RY2), RT1;
- movq (2 * 8)(RX0), RAB1;
- movq (3 * 8)(RX0), RCD1;
- /* Store Offset_i */
- movq RT0, (2 * 8)(RY1);
- movq RT1, (3 * 8)(RY1);
- /* Checksum_i = Checksum_{i-1} xor P_i */
- xor RAB1, (0 * 8)(RX2);
- xor RCD1, (1 * 8)(RX2);
- /* PX_i = P_i xor Offset_i */
- xorq RT0, RAB1;
- xorq RT1, RCD1;
- /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- movq 16(RY0), RY2;
- xorq (0 * 8)(RY2), RT0;
- xorq (1 * 8)(RY2), RT1;
- movq (4 * 8)(RX0), RAB2;
- movq (5 * 8)(RX0), RCD2;
- /* Store Offset_i */
- movq RT0, (4 * 8)(RY1);
- movq RT1, (5 * 8)(RY1);
- /* Checksum_i = Checksum_{i-1} xor P_i */
- xor RAB2, (0 * 8)(RX2);
- xor RCD2, (1 * 8)(RX2);
- /* PX_i = P_i xor Offset_i */
- xorq RT0, RAB2;
- xorq RT1, RCD2;
- /* Store offset */
- movq RT0, (0 * 8)(RX1);
- movq RT1, (1 * 8)(RX1);
- /* CX_i = ENCIPHER(K, PX_i) */
- call __twofish_enc_blk3;
- movq (6 * 8)(%rsp), RX1; /*dst*/
- /* C_i = CX_i xor Offset_i */
- xorq RCD0, (0 * 8)(RX1);
- xorq RAB0, (1 * 8)(RX1);
- xorq RCD1, (2 * 8)(RX1);
- xorq RAB1, (3 * 8)(RX1);
- xorq RCD2, (4 * 8)(RX1);
- xorq RAB2, (5 * 8)(RX1);
- movq (0 * 8)(%rsp), %rbp;
- movq (1 * 8)(%rsp), %rbx;
- movq (2 * 8)(%rsp), %r12;
- movq (3 * 8)(%rsp), %r13;
- movq (4 * 8)(%rsp), %r14;
- movq (5 * 8)(%rsp), %r15;
- CFI_RESTORE(%rbp);
- CFI_RESTORE(%rbx);
- CFI_RESTORE(%r12);
- CFI_RESTORE(%r13);
- CFI_RESTORE(%r14);
- CFI_RESTORE(%r15);
- addq $(8 * 8), %rsp;
- CFI_ADJUST_CFA_OFFSET(-8 * 8);
- EXIT_SYSV_FUNC
- ret_spec_stop;
- CFI_ENDPROC();
- ELF(.size _gcry_twofish_amd64_ocb_enc,.-_gcry_twofish_amd64_ocb_enc;)
- .align 16
- .globl _gcry_twofish_amd64_ocb_dec
- ELF(.type _gcry_twofish_amd64_ocb_dec,@function;)
- _gcry_twofish_amd64_ocb_dec:
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (3 blocks)
- * %rdx: src (3 blocks)
- * %rcx: offset
- * %r8 : checksum
- * %r9 : L pointers (void *L[3])
- */
- CFI_STARTPROC();
- ENTER_SYSV_FUNC_PARAMS_6
- subq $(8 * 8), %rsp;
- CFI_ADJUST_CFA_OFFSET(8 * 8);
- movq %rbp, (0 * 8)(%rsp);
- movq %rbx, (1 * 8)(%rsp);
- movq %r12, (2 * 8)(%rsp);
- movq %r13, (3 * 8)(%rsp);
- movq %r14, (4 * 8)(%rsp);
- movq %r15, (5 * 8)(%rsp);
- CFI_REL_OFFSET(%rbp, 0 * 8);
- CFI_REL_OFFSET(%rbx, 1 * 8);
- CFI_REL_OFFSET(%r12, 2 * 8);
- CFI_REL_OFFSET(%r13, 3 * 8);
- CFI_REL_OFFSET(%r14, 4 * 8);
- CFI_REL_OFFSET(%r15, 5 * 8);
- movq %rsi, (6 * 8)(%rsp);
- movq %r8, (7 * 8)(%rsp);
- movq %rdx, RX0;
- movq %rcx, RX1;
- movq %r9, RY0;
- movq %rsi, RY1;
- /* Load offset */
- movq (0 * 8)(RX1), RT0;
- movq (1 * 8)(RX1), RT1;
- /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- movq (RY0), RY2;
- xorq (0 * 8)(RY2), RT0;
- xorq (1 * 8)(RY2), RT1;
- movq (0 * 8)(RX0), RAB0;
- movq (1 * 8)(RX0), RCD0;
- /* Store Offset_i */
- movq RT0, (0 * 8)(RY1);
- movq RT1, (1 * 8)(RY1);
- /* CX_i = C_i xor Offset_i */
- xorq RT0, RAB0;
- xorq RT1, RCD0;
- /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- movq 8(RY0), RY2;
- xorq (0 * 8)(RY2), RT0;
- xorq (1 * 8)(RY2), RT1;
- movq (2 * 8)(RX0), RAB1;
- movq (3 * 8)(RX0), RCD1;
- /* Store Offset_i */
- movq RT0, (2 * 8)(RY1);
- movq RT1, (3 * 8)(RY1);
- /* PX_i = P_i xor Offset_i */
- xorq RT0, RAB1;
- xorq RT1, RCD1;
- /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- movq 16(RY0), RY2;
- xorq (0 * 8)(RY2), RT0;
- xorq (1 * 8)(RY2), RT1;
- movq (4 * 8)(RX0), RAB2;
- movq (5 * 8)(RX0), RCD2;
- /* Store Offset_i */
- movq RT0, (4 * 8)(RY1);
- movq RT1, (5 * 8)(RY1);
- /* PX_i = P_i xor Offset_i */
- xorq RT0, RAB2;
- xorq RT1, RCD2;
- /* Store offset */
- movq RT0, (0 * 8)(RX1);
- movq RT1, (1 * 8)(RX1);
- /* PX_i = DECIPHER(K, CX_i) */
- call __twofish_dec_blk3;
- movq (7 * 8)(%rsp), RX2; /*checksum*/
- movq (6 * 8)(%rsp), RX1; /*dst*/
- /* Load checksum */
- movq (0 * 8)(RX2), RT0;
- movq (1 * 8)(RX2), RT1;
- /* P_i = PX_i xor Offset_i */
- xorq RCD0, (0 * 8)(RX1);
- xorq RAB0, (1 * 8)(RX1);
- xorq RCD1, (2 * 8)(RX1);
- xorq RAB1, (3 * 8)(RX1);
- xorq RCD2, (4 * 8)(RX1);
- xorq RAB2, (5 * 8)(RX1);
- /* Checksum_i = Checksum_{i-1} xor P_i */
- xorq (0 * 8)(RX1), RT0;
- xorq (1 * 8)(RX1), RT1;
- xorq (2 * 8)(RX1), RT0;
- xorq (3 * 8)(RX1), RT1;
- xorq (4 * 8)(RX1), RT0;
- xorq (5 * 8)(RX1), RT1;
- /* Store checksum */
- movq RT0, (0 * 8)(RX2);
- movq RT1, (1 * 8)(RX2);
- movq (0 * 8)(%rsp), %rbp;
- movq (1 * 8)(%rsp), %rbx;
- movq (2 * 8)(%rsp), %r12;
- movq (3 * 8)(%rsp), %r13;
- movq (4 * 8)(%rsp), %r14;
- movq (5 * 8)(%rsp), %r15;
- CFI_RESTORE(%rbp);
- CFI_RESTORE(%rbx);
- CFI_RESTORE(%r12);
- CFI_RESTORE(%r13);
- CFI_RESTORE(%r14);
- CFI_RESTORE(%r15);
- addq $(8 * 8), %rsp;
- CFI_ADJUST_CFA_OFFSET(-8 * 8);
- EXIT_SYSV_FUNC
- ret_spec_stop;
- CFI_ENDPROC();
- ELF(.size _gcry_twofish_amd64_ocb_dec,.-_gcry_twofish_amd64_ocb_dec;)
- .align 16
- .globl _gcry_twofish_amd64_ocb_auth
- ELF(.type _gcry_twofish_amd64_ocb_auth,@function;)
- _gcry_twofish_amd64_ocb_auth:
- /* input:
- * %rdi: ctx, CTX
- * %rsi: abuf (3 blocks)
- * %rdx: offset
- * %rcx: checksum
- * %r8 : L pointers (void *L[3])
- */
- CFI_STARTPROC();
- ENTER_SYSV_FUNC_PARAMS_5
- subq $(8 * 8), %rsp;
- CFI_ADJUST_CFA_OFFSET(8 * 8);
- movq %rbp, (0 * 8)(%rsp);
- movq %rbx, (1 * 8)(%rsp);
- movq %r12, (2 * 8)(%rsp);
- movq %r13, (3 * 8)(%rsp);
- movq %r14, (4 * 8)(%rsp);
- movq %r15, (5 * 8)(%rsp);
- CFI_REL_OFFSET(%rbp, 0 * 8);
- CFI_REL_OFFSET(%rbx, 1 * 8);
- CFI_REL_OFFSET(%r12, 2 * 8);
- CFI_REL_OFFSET(%r13, 3 * 8);
- CFI_REL_OFFSET(%r14, 4 * 8);
- CFI_REL_OFFSET(%r15, 5 * 8);
- movq %rcx, (6 * 8)(%rsp);
- movq %rsi, RX0;
- movq %rdx, RX1;
- movq %r8, RY0;
- /* Load offset */
- movq (0 * 8)(RX1), RT0;
- movq (1 * 8)(RX1), RT1;
- /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- movq (RY0), RY2;
- xorq (0 * 8)(RY2), RT0;
- xorq (1 * 8)(RY2), RT1;
- movq (0 * 8)(RX0), RAB0;
- movq (1 * 8)(RX0), RCD0;
- /* PX_i = P_i xor Offset_i */
- xorq RT0, RAB0;
- xorq RT1, RCD0;
- /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- movq 8(RY0), RY2;
- xorq (0 * 8)(RY2), RT0;
- xorq (1 * 8)(RY2), RT1;
- movq (2 * 8)(RX0), RAB1;
- movq (3 * 8)(RX0), RCD1;
- /* PX_i = P_i xor Offset_i */
- xorq RT0, RAB1;
- xorq RT1, RCD1;
- /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- movq 16(RY0), RY2;
- xorq (0 * 8)(RY2), RT0;
- xorq (1 * 8)(RY2), RT1;
- movq (4 * 8)(RX0), RAB2;
- movq (5 * 8)(RX0), RCD2;
- /* PX_i = P_i xor Offset_i */
- xorq RT0, RAB2;
- xorq RT1, RCD2;
- /* Store offset */
- movq RT0, (0 * 8)(RX1);
- movq RT1, (1 * 8)(RX1);
- /* C_i = ENCIPHER(K, PX_i) */
- call __twofish_enc_blk3;
- movq (6 * 8)(%rsp), RX1; /*checksum*/
- /* Checksum_i = C_i xor Checksum_i */
- xorq RCD0, RCD1;
- xorq RAB0, RAB1;
- xorq RCD1, RCD2;
- xorq RAB1, RAB2;
- xorq RCD2, (0 * 8)(RX1);
- xorq RAB2, (1 * 8)(RX1);
- movq (0 * 8)(%rsp), %rbp;
- movq (1 * 8)(%rsp), %rbx;
- movq (2 * 8)(%rsp), %r12;
- movq (3 * 8)(%rsp), %r13;
- movq (4 * 8)(%rsp), %r14;
- movq (5 * 8)(%rsp), %r15;
- CFI_RESTORE(%rbp);
- CFI_RESTORE(%rbx);
- CFI_RESTORE(%r12);
- CFI_RESTORE(%r13);
- CFI_RESTORE(%r14);
- CFI_RESTORE(%r15);
- addq $(8 * 8), %rsp;
- CFI_ADJUST_CFA_OFFSET(-8 * 8);
- EXIT_SYSV_FUNC
- ret_spec_stop;
- CFI_ENDPROC();
- ELF(.size _gcry_twofish_amd64_ocb_auth,.-_gcry_twofish_amd64_ocb_auth;)
- #endif /*USE_TWOFISH*/
- #endif /*__x86_64*/
|