123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751 |
- /*
- * AES-NI + SSE2 implementation of AEGIS-128
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
- #include <linux/linkage.h>
- #include <asm/frame.h>
- #define STATE0 %xmm0
- #define STATE1 %xmm1
- #define STATE2 %xmm2
- #define STATE3 %xmm3
- #define STATE4 %xmm4
- #define KEY %xmm5
- #define MSG %xmm5
- #define T0 %xmm6
- #define T1 %xmm7
- #define STATEP %rdi
- #define LEN %rsi
- #define SRC %rdx
- #define DST %rcx
- .section .rodata.cst16.aegis128_const, "aM", @progbits, 32
- .align 16
- .Laegis128_const_0:
- .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
- .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
- .Laegis128_const_1:
- .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
- .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
- .section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
- .align 16
- .Laegis128_counter:
- .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
- .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
- .text
- /*
- * aegis128_update
- * input:
- * STATE[0-4] - input state
- * output:
- * STATE[0-4] - output state (shifted positions)
- * changed:
- * T0
- */
- .macro aegis128_update
- movdqa STATE4, T0
- aesenc STATE0, STATE4
- aesenc STATE1, STATE0
- aesenc STATE2, STATE1
- aesenc STATE3, STATE2
- aesenc T0, STATE3
- .endm
- /*
- * __load_partial: internal ABI
- * input:
- * LEN - bytes
- * SRC - src
- * output:
- * MSG - message block
- * changed:
- * T0
- * %r8
- * %r9
- */
- __load_partial:
- xor %r9d, %r9d
- pxor MSG, MSG
- mov LEN, %r8
- and $0x1, %r8
- jz .Lld_partial_1
- mov LEN, %r8
- and $0x1E, %r8
- add SRC, %r8
- mov (%r8), %r9b
- .Lld_partial_1:
- mov LEN, %r8
- and $0x2, %r8
- jz .Lld_partial_2
- mov LEN, %r8
- and $0x1C, %r8
- add SRC, %r8
- shl $0x10, %r9
- mov (%r8), %r9w
- .Lld_partial_2:
- mov LEN, %r8
- and $0x4, %r8
- jz .Lld_partial_4
- mov LEN, %r8
- and $0x18, %r8
- add SRC, %r8
- shl $32, %r9
- mov (%r8), %r8d
- xor %r8, %r9
- .Lld_partial_4:
- movq %r9, MSG
- mov LEN, %r8
- and $0x8, %r8
- jz .Lld_partial_8
- mov LEN, %r8
- and $0x10, %r8
- add SRC, %r8
- pslldq $8, MSG
- movq (%r8), T0
- pxor T0, MSG
- .Lld_partial_8:
- ret
- ENDPROC(__load_partial)
- /*
- * __store_partial: internal ABI
- * input:
- * LEN - bytes
- * DST - dst
- * output:
- * T0 - message block
- * changed:
- * %r8
- * %r9
- * %r10
- */
- __store_partial:
- mov LEN, %r8
- mov DST, %r9
- movq T0, %r10
- cmp $8, %r8
- jl .Lst_partial_8
- mov %r10, (%r9)
- psrldq $8, T0
- movq T0, %r10
- sub $8, %r8
- add $8, %r9
- .Lst_partial_8:
- cmp $4, %r8
- jl .Lst_partial_4
- mov %r10d, (%r9)
- shr $32, %r10
- sub $4, %r8
- add $4, %r9
- .Lst_partial_4:
- cmp $2, %r8
- jl .Lst_partial_2
- mov %r10w, (%r9)
- shr $0x10, %r10
- sub $2, %r8
- add $2, %r9
- .Lst_partial_2:
- cmp $1, %r8
- jl .Lst_partial_1
- mov %r10b, (%r9)
- .Lst_partial_1:
- ret
- ENDPROC(__store_partial)
- /*
- * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv);
- */
- ENTRY(crypto_aegis128_aesni_init)
- FRAME_BEGIN
- /* load IV: */
- movdqu (%rdx), T1
- /* load key: */
- movdqa (%rsi), KEY
- pxor KEY, T1
- movdqa T1, STATE0
- movdqa KEY, STATE3
- movdqa KEY, STATE4
- /* load the constants: */
- movdqa .Laegis128_const_0, STATE2
- movdqa .Laegis128_const_1, STATE1
- pxor STATE2, STATE3
- pxor STATE1, STATE4
- /* update 10 times with KEY / KEY xor IV: */
- aegis128_update; pxor KEY, STATE4
- aegis128_update; pxor T1, STATE3
- aegis128_update; pxor KEY, STATE2
- aegis128_update; pxor T1, STATE1
- aegis128_update; pxor KEY, STATE0
- aegis128_update; pxor T1, STATE4
- aegis128_update; pxor KEY, STATE3
- aegis128_update; pxor T1, STATE2
- aegis128_update; pxor KEY, STATE1
- aegis128_update; pxor T1, STATE0
- /* store the state: */
- movdqu STATE0, 0x00(STATEP)
- movdqu STATE1, 0x10(STATEP)
- movdqu STATE2, 0x20(STATEP)
- movdqu STATE3, 0x30(STATEP)
- movdqu STATE4, 0x40(STATEP)
- FRAME_END
- ret
- ENDPROC(crypto_aegis128_aesni_init)
- /*
- * void crypto_aegis128_aesni_ad(void *state, unsigned int length,
- * const void *data);
- */
- ENTRY(crypto_aegis128_aesni_ad)
- FRAME_BEGIN
- cmp $0x10, LEN
- jb .Lad_out
- /* load the state: */
- movdqu 0x00(STATEP), STATE0
- movdqu 0x10(STATEP), STATE1
- movdqu 0x20(STATEP), STATE2
- movdqu 0x30(STATEP), STATE3
- movdqu 0x40(STATEP), STATE4
- mov SRC, %r8
- and $0xF, %r8
- jnz .Lad_u_loop
- .align 8
- .Lad_a_loop:
- movdqa 0x00(SRC), MSG
- aegis128_update
- pxor MSG, STATE4
- sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lad_out_1
- movdqa 0x10(SRC), MSG
- aegis128_update
- pxor MSG, STATE3
- sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lad_out_2
- movdqa 0x20(SRC), MSG
- aegis128_update
- pxor MSG, STATE2
- sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lad_out_3
- movdqa 0x30(SRC), MSG
- aegis128_update
- pxor MSG, STATE1
- sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lad_out_4
- movdqa 0x40(SRC), MSG
- aegis128_update
- pxor MSG, STATE0
- sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lad_out_0
- add $0x50, SRC
- jmp .Lad_a_loop
- .align 8
- .Lad_u_loop:
- movdqu 0x00(SRC), MSG
- aegis128_update
- pxor MSG, STATE4
- sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lad_out_1
- movdqu 0x10(SRC), MSG
- aegis128_update
- pxor MSG, STATE3
- sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lad_out_2
- movdqu 0x20(SRC), MSG
- aegis128_update
- pxor MSG, STATE2
- sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lad_out_3
- movdqu 0x30(SRC), MSG
- aegis128_update
- pxor MSG, STATE1
- sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lad_out_4
- movdqu 0x40(SRC), MSG
- aegis128_update
- pxor MSG, STATE0
- sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lad_out_0
- add $0x50, SRC
- jmp .Lad_u_loop
- /* store the state: */
- .Lad_out_0:
- movdqu STATE0, 0x00(STATEP)
- movdqu STATE1, 0x10(STATEP)
- movdqu STATE2, 0x20(STATEP)
- movdqu STATE3, 0x30(STATEP)
- movdqu STATE4, 0x40(STATEP)
- FRAME_END
- ret
- .Lad_out_1:
- movdqu STATE4, 0x00(STATEP)
- movdqu STATE0, 0x10(STATEP)
- movdqu STATE1, 0x20(STATEP)
- movdqu STATE2, 0x30(STATEP)
- movdqu STATE3, 0x40(STATEP)
- FRAME_END
- ret
- .Lad_out_2:
- movdqu STATE3, 0x00(STATEP)
- movdqu STATE4, 0x10(STATEP)
- movdqu STATE0, 0x20(STATEP)
- movdqu STATE1, 0x30(STATEP)
- movdqu STATE2, 0x40(STATEP)
- FRAME_END
- ret
- .Lad_out_3:
- movdqu STATE2, 0x00(STATEP)
- movdqu STATE3, 0x10(STATEP)
- movdqu STATE4, 0x20(STATEP)
- movdqu STATE0, 0x30(STATEP)
- movdqu STATE1, 0x40(STATEP)
- FRAME_END
- ret
- .Lad_out_4:
- movdqu STATE1, 0x00(STATEP)
- movdqu STATE2, 0x10(STATEP)
- movdqu STATE3, 0x20(STATEP)
- movdqu STATE4, 0x30(STATEP)
- movdqu STATE0, 0x40(STATEP)
- FRAME_END
- ret
- .Lad_out:
- FRAME_END
- ret
- ENDPROC(crypto_aegis128_aesni_ad)
- .macro encrypt_block a s0 s1 s2 s3 s4 i
- movdq\a (\i * 0x10)(SRC), MSG
- movdqa MSG, T0
- pxor \s1, T0
- pxor \s4, T0
- movdqa \s2, T1
- pand \s3, T1
- pxor T1, T0
- movdq\a T0, (\i * 0x10)(DST)
- aegis128_update
- pxor MSG, \s4
- sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lenc_out_\i
- .endm
- /*
- * void crypto_aegis128_aesni_enc(void *state, unsigned int length,
- * const void *src, void *dst);
- */
- ENTRY(crypto_aegis128_aesni_enc)
- FRAME_BEGIN
- cmp $0x10, LEN
- jb .Lenc_out
- /* load the state: */
- movdqu 0x00(STATEP), STATE0
- movdqu 0x10(STATEP), STATE1
- movdqu 0x20(STATEP), STATE2
- movdqu 0x30(STATEP), STATE3
- movdqu 0x40(STATEP), STATE4
- mov SRC, %r8
- or DST, %r8
- and $0xF, %r8
- jnz .Lenc_u_loop
- .align 8
- .Lenc_a_loop:
- encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
- encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
- encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
- encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
- encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
- add $0x50, SRC
- add $0x50, DST
- jmp .Lenc_a_loop
- .align 8
- .Lenc_u_loop:
- encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
- encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
- encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
- encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
- encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
- add $0x50, SRC
- add $0x50, DST
- jmp .Lenc_u_loop
- /* store the state: */
- .Lenc_out_0:
- movdqu STATE4, 0x00(STATEP)
- movdqu STATE0, 0x10(STATEP)
- movdqu STATE1, 0x20(STATEP)
- movdqu STATE2, 0x30(STATEP)
- movdqu STATE3, 0x40(STATEP)
- FRAME_END
- ret
- .Lenc_out_1:
- movdqu STATE3, 0x00(STATEP)
- movdqu STATE4, 0x10(STATEP)
- movdqu STATE0, 0x20(STATEP)
- movdqu STATE1, 0x30(STATEP)
- movdqu STATE2, 0x40(STATEP)
- FRAME_END
- ret
- .Lenc_out_2:
- movdqu STATE2, 0x00(STATEP)
- movdqu STATE3, 0x10(STATEP)
- movdqu STATE4, 0x20(STATEP)
- movdqu STATE0, 0x30(STATEP)
- movdqu STATE1, 0x40(STATEP)
- FRAME_END
- ret
- .Lenc_out_3:
- movdqu STATE1, 0x00(STATEP)
- movdqu STATE2, 0x10(STATEP)
- movdqu STATE3, 0x20(STATEP)
- movdqu STATE4, 0x30(STATEP)
- movdqu STATE0, 0x40(STATEP)
- FRAME_END
- ret
- .Lenc_out_4:
- movdqu STATE0, 0x00(STATEP)
- movdqu STATE1, 0x10(STATEP)
- movdqu STATE2, 0x20(STATEP)
- movdqu STATE3, 0x30(STATEP)
- movdqu STATE4, 0x40(STATEP)
- FRAME_END
- ret
- .Lenc_out:
- FRAME_END
- ret
- ENDPROC(crypto_aegis128_aesni_enc)
- /*
- * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
- * const void *src, void *dst);
- */
- ENTRY(crypto_aegis128_aesni_enc_tail)
- FRAME_BEGIN
- /* load the state: */
- movdqu 0x00(STATEP), STATE0
- movdqu 0x10(STATEP), STATE1
- movdqu 0x20(STATEP), STATE2
- movdqu 0x30(STATEP), STATE3
- movdqu 0x40(STATEP), STATE4
- /* encrypt message: */
- call __load_partial
- movdqa MSG, T0
- pxor STATE1, T0
- pxor STATE4, T0
- movdqa STATE2, T1
- pand STATE3, T1
- pxor T1, T0
- call __store_partial
- aegis128_update
- pxor MSG, STATE4
- /* store the state: */
- movdqu STATE4, 0x00(STATEP)
- movdqu STATE0, 0x10(STATEP)
- movdqu STATE1, 0x20(STATEP)
- movdqu STATE2, 0x30(STATEP)
- movdqu STATE3, 0x40(STATEP)
- FRAME_END
- ret
- ENDPROC(crypto_aegis128_aesni_enc_tail)
- .macro decrypt_block a s0 s1 s2 s3 s4 i
- movdq\a (\i * 0x10)(SRC), MSG
- pxor \s1, MSG
- pxor \s4, MSG
- movdqa \s2, T1
- pand \s3, T1
- pxor T1, MSG
- movdq\a MSG, (\i * 0x10)(DST)
- aegis128_update
- pxor MSG, \s4
- sub $0x10, LEN
- cmp $0x10, LEN
- jl .Ldec_out_\i
- .endm
- /*
- * void crypto_aegis128_aesni_dec(void *state, unsigned int length,
- * const void *src, void *dst);
- */
- ENTRY(crypto_aegis128_aesni_dec)
- FRAME_BEGIN
- cmp $0x10, LEN
- jb .Ldec_out
- /* load the state: */
- movdqu 0x00(STATEP), STATE0
- movdqu 0x10(STATEP), STATE1
- movdqu 0x20(STATEP), STATE2
- movdqu 0x30(STATEP), STATE3
- movdqu 0x40(STATEP), STATE4
- mov SRC, %r8
- or DST, %r8
- and $0xF, %r8
- jnz .Ldec_u_loop
- .align 8
- .Ldec_a_loop:
- decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
- decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
- decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
- decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
- decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
- add $0x50, SRC
- add $0x50, DST
- jmp .Ldec_a_loop
- .align 8
- .Ldec_u_loop:
- decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
- decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
- decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
- decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
- decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
- add $0x50, SRC
- add $0x50, DST
- jmp .Ldec_u_loop
- /* store the state: */
- .Ldec_out_0:
- movdqu STATE4, 0x00(STATEP)
- movdqu STATE0, 0x10(STATEP)
- movdqu STATE1, 0x20(STATEP)
- movdqu STATE2, 0x30(STATEP)
- movdqu STATE3, 0x40(STATEP)
- FRAME_END
- ret
- .Ldec_out_1:
- movdqu STATE3, 0x00(STATEP)
- movdqu STATE4, 0x10(STATEP)
- movdqu STATE0, 0x20(STATEP)
- movdqu STATE1, 0x30(STATEP)
- movdqu STATE2, 0x40(STATEP)
- FRAME_END
- ret
- .Ldec_out_2:
- movdqu STATE2, 0x00(STATEP)
- movdqu STATE3, 0x10(STATEP)
- movdqu STATE4, 0x20(STATEP)
- movdqu STATE0, 0x30(STATEP)
- movdqu STATE1, 0x40(STATEP)
- FRAME_END
- ret
- .Ldec_out_3:
- movdqu STATE1, 0x00(STATEP)
- movdqu STATE2, 0x10(STATEP)
- movdqu STATE3, 0x20(STATEP)
- movdqu STATE4, 0x30(STATEP)
- movdqu STATE0, 0x40(STATEP)
- FRAME_END
- ret
- .Ldec_out_4:
- movdqu STATE0, 0x00(STATEP)
- movdqu STATE1, 0x10(STATEP)
- movdqu STATE2, 0x20(STATEP)
- movdqu STATE3, 0x30(STATEP)
- movdqu STATE4, 0x40(STATEP)
- FRAME_END
- ret
- .Ldec_out:
- FRAME_END
- ret
- ENDPROC(crypto_aegis128_aesni_dec)
- /*
- * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
- * const void *src, void *dst);
- */
- ENTRY(crypto_aegis128_aesni_dec_tail)
- FRAME_BEGIN
- /* load the state: */
- movdqu 0x00(STATEP), STATE0
- movdqu 0x10(STATEP), STATE1
- movdqu 0x20(STATEP), STATE2
- movdqu 0x30(STATEP), STATE3
- movdqu 0x40(STATEP), STATE4
- /* decrypt message: */
- call __load_partial
- pxor STATE1, MSG
- pxor STATE4, MSG
- movdqa STATE2, T1
- pand STATE3, T1
- pxor T1, MSG
- movdqa MSG, T0
- call __store_partial
- /* mask with byte count: */
- movq LEN, T0
- punpcklbw T0, T0
- punpcklbw T0, T0
- punpcklbw T0, T0
- punpcklbw T0, T0
- movdqa .Laegis128_counter, T1
- pcmpgtb T1, T0
- pand T0, MSG
- aegis128_update
- pxor MSG, STATE4
- /* store the state: */
- movdqu STATE4, 0x00(STATEP)
- movdqu STATE0, 0x10(STATEP)
- movdqu STATE1, 0x20(STATEP)
- movdqu STATE2, 0x30(STATEP)
- movdqu STATE3, 0x40(STATEP)
- FRAME_END
- ret
- ENDPROC(crypto_aegis128_aesni_dec_tail)
- /*
- * void crypto_aegis128_aesni_final(void *state, void *tag_xor,
- * u64 assoclen, u64 cryptlen);
- */
- ENTRY(crypto_aegis128_aesni_final)
- FRAME_BEGIN
- /* load the state: */
- movdqu 0x00(STATEP), STATE0
- movdqu 0x10(STATEP), STATE1
- movdqu 0x20(STATEP), STATE2
- movdqu 0x30(STATEP), STATE3
- movdqu 0x40(STATEP), STATE4
- /* prepare length block: */
- movq %rdx, MSG
- movq %rcx, T0
- pslldq $8, T0
- pxor T0, MSG
- psllq $3, MSG /* multiply by 8 (to get bit count) */
- pxor STATE3, MSG
- /* update state: */
- aegis128_update; pxor MSG, STATE4
- aegis128_update; pxor MSG, STATE3
- aegis128_update; pxor MSG, STATE2
- aegis128_update; pxor MSG, STATE1
- aegis128_update; pxor MSG, STATE0
- aegis128_update; pxor MSG, STATE4
- aegis128_update; pxor MSG, STATE3
- /* xor tag: */
- movdqu (%rsi), MSG
- pxor STATE0, MSG
- pxor STATE1, MSG
- pxor STATE2, MSG
- pxor STATE3, MSG
- pxor STATE4, MSG
- movdqu MSG, (%rsi)
- FRAME_END
- ret
- ENDPROC(crypto_aegis128_aesni_final)
|