aegis128-aesni-asm.S 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751
  1. /*
  2. * AES-NI + SSE2 implementation of AEGIS-128
  3. *
  4. * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
  5. * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
  6. *
  7. * This program is free software; you can redistribute it and/or modify it
  8. * under the terms of the GNU General Public License version 2 as published
  9. * by the Free Software Foundation.
  10. */
  11. #include <linux/linkage.h>
  12. #include <asm/frame.h>
  13. #define STATE0 %xmm0
  14. #define STATE1 %xmm1
  15. #define STATE2 %xmm2
  16. #define STATE3 %xmm3
  17. #define STATE4 %xmm4
  18. #define KEY %xmm5
  19. #define MSG %xmm5
  20. #define T0 %xmm6
  21. #define T1 %xmm7
  22. #define STATEP %rdi
  23. #define LEN %rsi
  24. #define SRC %rdx
  25. #define DST %rcx
  26. .section .rodata.cst16.aegis128_const, "aM", @progbits, 32
  27. .align 16
  28. .Laegis128_const_0:
  29. .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
  30. .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
  31. .Laegis128_const_1:
  32. .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
  33. .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
  34. .section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
  35. .align 16
  36. .Laegis128_counter:
  37. .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
  38. .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
  39. .text
  40. /*
  41. * aegis128_update
  42. * input:
  43. * STATE[0-4] - input state
  44. * output:
  45. * STATE[0-4] - output state (shifted positions)
  46. * changed:
  47. * T0
  48. */
  49. .macro aegis128_update
  50. movdqa STATE4, T0
  51. aesenc STATE0, STATE4
  52. aesenc STATE1, STATE0
  53. aesenc STATE2, STATE1
  54. aesenc STATE3, STATE2
  55. aesenc T0, STATE3
  56. .endm
  57. /*
  58. * __load_partial: internal ABI
  59. * input:
  60. * LEN - bytes
  61. * SRC - src
  62. * output:
  63. * MSG - message block
  64. * changed:
  65. * T0
  66. * %r8
  67. * %r9
  68. */
  69. __load_partial:
  70. xor %r9d, %r9d
  71. pxor MSG, MSG
  72. mov LEN, %r8
  73. and $0x1, %r8
  74. jz .Lld_partial_1
  75. mov LEN, %r8
  76. and $0x1E, %r8
  77. add SRC, %r8
  78. mov (%r8), %r9b
  79. .Lld_partial_1:
  80. mov LEN, %r8
  81. and $0x2, %r8
  82. jz .Lld_partial_2
  83. mov LEN, %r8
  84. and $0x1C, %r8
  85. add SRC, %r8
  86. shl $0x10, %r9
  87. mov (%r8), %r9w
  88. .Lld_partial_2:
  89. mov LEN, %r8
  90. and $0x4, %r8
  91. jz .Lld_partial_4
  92. mov LEN, %r8
  93. and $0x18, %r8
  94. add SRC, %r8
  95. shl $32, %r9
  96. mov (%r8), %r8d
  97. xor %r8, %r9
  98. .Lld_partial_4:
  99. movq %r9, MSG
  100. mov LEN, %r8
  101. and $0x8, %r8
  102. jz .Lld_partial_8
  103. mov LEN, %r8
  104. and $0x10, %r8
  105. add SRC, %r8
  106. pslldq $8, MSG
  107. movq (%r8), T0
  108. pxor T0, MSG
  109. .Lld_partial_8:
  110. ret
  111. ENDPROC(__load_partial)
  112. /*
  113. * __store_partial: internal ABI
  114. * input:
  115. * LEN - bytes
  116. * DST - dst
  117. * output:
  118. * T0 - message block
  119. * changed:
  120. * %r8
  121. * %r9
  122. * %r10
  123. */
  124. __store_partial:
  125. mov LEN, %r8
  126. mov DST, %r9
  127. movq T0, %r10
  128. cmp $8, %r8
  129. jl .Lst_partial_8
  130. mov %r10, (%r9)
  131. psrldq $8, T0
  132. movq T0, %r10
  133. sub $8, %r8
  134. add $8, %r9
  135. .Lst_partial_8:
  136. cmp $4, %r8
  137. jl .Lst_partial_4
  138. mov %r10d, (%r9)
  139. shr $32, %r10
  140. sub $4, %r8
  141. add $4, %r9
  142. .Lst_partial_4:
  143. cmp $2, %r8
  144. jl .Lst_partial_2
  145. mov %r10w, (%r9)
  146. shr $0x10, %r10
  147. sub $2, %r8
  148. add $2, %r9
  149. .Lst_partial_2:
  150. cmp $1, %r8
  151. jl .Lst_partial_1
  152. mov %r10b, (%r9)
  153. .Lst_partial_1:
  154. ret
  155. ENDPROC(__store_partial)
  156. /*
  157. * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv);
  158. */
  159. ENTRY(crypto_aegis128_aesni_init)
  160. FRAME_BEGIN
  161. /* load IV: */
  162. movdqu (%rdx), T1
  163. /* load key: */
  164. movdqa (%rsi), KEY
  165. pxor KEY, T1
  166. movdqa T1, STATE0
  167. movdqa KEY, STATE3
  168. movdqa KEY, STATE4
  169. /* load the constants: */
  170. movdqa .Laegis128_const_0, STATE2
  171. movdqa .Laegis128_const_1, STATE1
  172. pxor STATE2, STATE3
  173. pxor STATE1, STATE4
  174. /* update 10 times with KEY / KEY xor IV: */
  175. aegis128_update; pxor KEY, STATE4
  176. aegis128_update; pxor T1, STATE3
  177. aegis128_update; pxor KEY, STATE2
  178. aegis128_update; pxor T1, STATE1
  179. aegis128_update; pxor KEY, STATE0
  180. aegis128_update; pxor T1, STATE4
  181. aegis128_update; pxor KEY, STATE3
  182. aegis128_update; pxor T1, STATE2
  183. aegis128_update; pxor KEY, STATE1
  184. aegis128_update; pxor T1, STATE0
  185. /* store the state: */
  186. movdqu STATE0, 0x00(STATEP)
  187. movdqu STATE1, 0x10(STATEP)
  188. movdqu STATE2, 0x20(STATEP)
  189. movdqu STATE3, 0x30(STATEP)
  190. movdqu STATE4, 0x40(STATEP)
  191. FRAME_END
  192. ret
  193. ENDPROC(crypto_aegis128_aesni_init)
  194. /*
  195. * void crypto_aegis128_aesni_ad(void *state, unsigned int length,
  196. * const void *data);
  197. */
  198. ENTRY(crypto_aegis128_aesni_ad)
  199. FRAME_BEGIN
  200. cmp $0x10, LEN
  201. jb .Lad_out
  202. /* load the state: */
  203. movdqu 0x00(STATEP), STATE0
  204. movdqu 0x10(STATEP), STATE1
  205. movdqu 0x20(STATEP), STATE2
  206. movdqu 0x30(STATEP), STATE3
  207. movdqu 0x40(STATEP), STATE4
  208. mov SRC, %r8
  209. and $0xF, %r8
  210. jnz .Lad_u_loop
  211. .align 8
  212. .Lad_a_loop:
  213. movdqa 0x00(SRC), MSG
  214. aegis128_update
  215. pxor MSG, STATE4
  216. sub $0x10, LEN
  217. cmp $0x10, LEN
  218. jl .Lad_out_1
  219. movdqa 0x10(SRC), MSG
  220. aegis128_update
  221. pxor MSG, STATE3
  222. sub $0x10, LEN
  223. cmp $0x10, LEN
  224. jl .Lad_out_2
  225. movdqa 0x20(SRC), MSG
  226. aegis128_update
  227. pxor MSG, STATE2
  228. sub $0x10, LEN
  229. cmp $0x10, LEN
  230. jl .Lad_out_3
  231. movdqa 0x30(SRC), MSG
  232. aegis128_update
  233. pxor MSG, STATE1
  234. sub $0x10, LEN
  235. cmp $0x10, LEN
  236. jl .Lad_out_4
  237. movdqa 0x40(SRC), MSG
  238. aegis128_update
  239. pxor MSG, STATE0
  240. sub $0x10, LEN
  241. cmp $0x10, LEN
  242. jl .Lad_out_0
  243. add $0x50, SRC
  244. jmp .Lad_a_loop
  245. .align 8
  246. .Lad_u_loop:
  247. movdqu 0x00(SRC), MSG
  248. aegis128_update
  249. pxor MSG, STATE4
  250. sub $0x10, LEN
  251. cmp $0x10, LEN
  252. jl .Lad_out_1
  253. movdqu 0x10(SRC), MSG
  254. aegis128_update
  255. pxor MSG, STATE3
  256. sub $0x10, LEN
  257. cmp $0x10, LEN
  258. jl .Lad_out_2
  259. movdqu 0x20(SRC), MSG
  260. aegis128_update
  261. pxor MSG, STATE2
  262. sub $0x10, LEN
  263. cmp $0x10, LEN
  264. jl .Lad_out_3
  265. movdqu 0x30(SRC), MSG
  266. aegis128_update
  267. pxor MSG, STATE1
  268. sub $0x10, LEN
  269. cmp $0x10, LEN
  270. jl .Lad_out_4
  271. movdqu 0x40(SRC), MSG
  272. aegis128_update
  273. pxor MSG, STATE0
  274. sub $0x10, LEN
  275. cmp $0x10, LEN
  276. jl .Lad_out_0
  277. add $0x50, SRC
  278. jmp .Lad_u_loop
  279. /* store the state: */
  280. .Lad_out_0:
  281. movdqu STATE0, 0x00(STATEP)
  282. movdqu STATE1, 0x10(STATEP)
  283. movdqu STATE2, 0x20(STATEP)
  284. movdqu STATE3, 0x30(STATEP)
  285. movdqu STATE4, 0x40(STATEP)
  286. FRAME_END
  287. ret
  288. .Lad_out_1:
  289. movdqu STATE4, 0x00(STATEP)
  290. movdqu STATE0, 0x10(STATEP)
  291. movdqu STATE1, 0x20(STATEP)
  292. movdqu STATE2, 0x30(STATEP)
  293. movdqu STATE3, 0x40(STATEP)
  294. FRAME_END
  295. ret
  296. .Lad_out_2:
  297. movdqu STATE3, 0x00(STATEP)
  298. movdqu STATE4, 0x10(STATEP)
  299. movdqu STATE0, 0x20(STATEP)
  300. movdqu STATE1, 0x30(STATEP)
  301. movdqu STATE2, 0x40(STATEP)
  302. FRAME_END
  303. ret
  304. .Lad_out_3:
  305. movdqu STATE2, 0x00(STATEP)
  306. movdqu STATE3, 0x10(STATEP)
  307. movdqu STATE4, 0x20(STATEP)
  308. movdqu STATE0, 0x30(STATEP)
  309. movdqu STATE1, 0x40(STATEP)
  310. FRAME_END
  311. ret
  312. .Lad_out_4:
  313. movdqu STATE1, 0x00(STATEP)
  314. movdqu STATE2, 0x10(STATEP)
  315. movdqu STATE3, 0x20(STATEP)
  316. movdqu STATE4, 0x30(STATEP)
  317. movdqu STATE0, 0x40(STATEP)
  318. FRAME_END
  319. ret
  320. .Lad_out:
  321. FRAME_END
  322. ret
  323. ENDPROC(crypto_aegis128_aesni_ad)
  324. .macro encrypt_block a s0 s1 s2 s3 s4 i
  325. movdq\a (\i * 0x10)(SRC), MSG
  326. movdqa MSG, T0
  327. pxor \s1, T0
  328. pxor \s4, T0
  329. movdqa \s2, T1
  330. pand \s3, T1
  331. pxor T1, T0
  332. movdq\a T0, (\i * 0x10)(DST)
  333. aegis128_update
  334. pxor MSG, \s4
  335. sub $0x10, LEN
  336. cmp $0x10, LEN
  337. jl .Lenc_out_\i
  338. .endm
  339. /*
  340. * void crypto_aegis128_aesni_enc(void *state, unsigned int length,
  341. * const void *src, void *dst);
  342. */
  343. ENTRY(crypto_aegis128_aesni_enc)
  344. FRAME_BEGIN
  345. cmp $0x10, LEN
  346. jb .Lenc_out
  347. /* load the state: */
  348. movdqu 0x00(STATEP), STATE0
  349. movdqu 0x10(STATEP), STATE1
  350. movdqu 0x20(STATEP), STATE2
  351. movdqu 0x30(STATEP), STATE3
  352. movdqu 0x40(STATEP), STATE4
  353. mov SRC, %r8
  354. or DST, %r8
  355. and $0xF, %r8
  356. jnz .Lenc_u_loop
  357. .align 8
  358. .Lenc_a_loop:
  359. encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
  360. encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
  361. encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
  362. encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
  363. encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
  364. add $0x50, SRC
  365. add $0x50, DST
  366. jmp .Lenc_a_loop
  367. .align 8
  368. .Lenc_u_loop:
  369. encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
  370. encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
  371. encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
  372. encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
  373. encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
  374. add $0x50, SRC
  375. add $0x50, DST
  376. jmp .Lenc_u_loop
  377. /* store the state: */
  378. .Lenc_out_0:
  379. movdqu STATE4, 0x00(STATEP)
  380. movdqu STATE0, 0x10(STATEP)
  381. movdqu STATE1, 0x20(STATEP)
  382. movdqu STATE2, 0x30(STATEP)
  383. movdqu STATE3, 0x40(STATEP)
  384. FRAME_END
  385. ret
  386. .Lenc_out_1:
  387. movdqu STATE3, 0x00(STATEP)
  388. movdqu STATE4, 0x10(STATEP)
  389. movdqu STATE0, 0x20(STATEP)
  390. movdqu STATE1, 0x30(STATEP)
  391. movdqu STATE2, 0x40(STATEP)
  392. FRAME_END
  393. ret
  394. .Lenc_out_2:
  395. movdqu STATE2, 0x00(STATEP)
  396. movdqu STATE3, 0x10(STATEP)
  397. movdqu STATE4, 0x20(STATEP)
  398. movdqu STATE0, 0x30(STATEP)
  399. movdqu STATE1, 0x40(STATEP)
  400. FRAME_END
  401. ret
  402. .Lenc_out_3:
  403. movdqu STATE1, 0x00(STATEP)
  404. movdqu STATE2, 0x10(STATEP)
  405. movdqu STATE3, 0x20(STATEP)
  406. movdqu STATE4, 0x30(STATEP)
  407. movdqu STATE0, 0x40(STATEP)
  408. FRAME_END
  409. ret
  410. .Lenc_out_4:
  411. movdqu STATE0, 0x00(STATEP)
  412. movdqu STATE1, 0x10(STATEP)
  413. movdqu STATE2, 0x20(STATEP)
  414. movdqu STATE3, 0x30(STATEP)
  415. movdqu STATE4, 0x40(STATEP)
  416. FRAME_END
  417. ret
  418. .Lenc_out:
  419. FRAME_END
  420. ret
  421. ENDPROC(crypto_aegis128_aesni_enc)
  422. /*
  423. * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
  424. * const void *src, void *dst);
  425. */
  426. ENTRY(crypto_aegis128_aesni_enc_tail)
  427. FRAME_BEGIN
  428. /* load the state: */
  429. movdqu 0x00(STATEP), STATE0
  430. movdqu 0x10(STATEP), STATE1
  431. movdqu 0x20(STATEP), STATE2
  432. movdqu 0x30(STATEP), STATE3
  433. movdqu 0x40(STATEP), STATE4
  434. /* encrypt message: */
  435. call __load_partial
  436. movdqa MSG, T0
  437. pxor STATE1, T0
  438. pxor STATE4, T0
  439. movdqa STATE2, T1
  440. pand STATE3, T1
  441. pxor T1, T0
  442. call __store_partial
  443. aegis128_update
  444. pxor MSG, STATE4
  445. /* store the state: */
  446. movdqu STATE4, 0x00(STATEP)
  447. movdqu STATE0, 0x10(STATEP)
  448. movdqu STATE1, 0x20(STATEP)
  449. movdqu STATE2, 0x30(STATEP)
  450. movdqu STATE3, 0x40(STATEP)
  451. FRAME_END
  452. ret
  453. ENDPROC(crypto_aegis128_aesni_enc_tail)
  454. .macro decrypt_block a s0 s1 s2 s3 s4 i
  455. movdq\a (\i * 0x10)(SRC), MSG
  456. pxor \s1, MSG
  457. pxor \s4, MSG
  458. movdqa \s2, T1
  459. pand \s3, T1
  460. pxor T1, MSG
  461. movdq\a MSG, (\i * 0x10)(DST)
  462. aegis128_update
  463. pxor MSG, \s4
  464. sub $0x10, LEN
  465. cmp $0x10, LEN
  466. jl .Ldec_out_\i
  467. .endm
  468. /*
  469. * void crypto_aegis128_aesni_dec(void *state, unsigned int length,
  470. * const void *src, void *dst);
  471. */
  472. ENTRY(crypto_aegis128_aesni_dec)
  473. FRAME_BEGIN
  474. cmp $0x10, LEN
  475. jb .Ldec_out
  476. /* load the state: */
  477. movdqu 0x00(STATEP), STATE0
  478. movdqu 0x10(STATEP), STATE1
  479. movdqu 0x20(STATEP), STATE2
  480. movdqu 0x30(STATEP), STATE3
  481. movdqu 0x40(STATEP), STATE4
  482. mov SRC, %r8
  483. or DST, %r8
  484. and $0xF, %r8
  485. jnz .Ldec_u_loop
  486. .align 8
  487. .Ldec_a_loop:
  488. decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
  489. decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
  490. decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
  491. decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
  492. decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
  493. add $0x50, SRC
  494. add $0x50, DST
  495. jmp .Ldec_a_loop
  496. .align 8
  497. .Ldec_u_loop:
  498. decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
  499. decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
  500. decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
  501. decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
  502. decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
  503. add $0x50, SRC
  504. add $0x50, DST
  505. jmp .Ldec_u_loop
  506. /* store the state: */
  507. .Ldec_out_0:
  508. movdqu STATE4, 0x00(STATEP)
  509. movdqu STATE0, 0x10(STATEP)
  510. movdqu STATE1, 0x20(STATEP)
  511. movdqu STATE2, 0x30(STATEP)
  512. movdqu STATE3, 0x40(STATEP)
  513. FRAME_END
  514. ret
  515. .Ldec_out_1:
  516. movdqu STATE3, 0x00(STATEP)
  517. movdqu STATE4, 0x10(STATEP)
  518. movdqu STATE0, 0x20(STATEP)
  519. movdqu STATE1, 0x30(STATEP)
  520. movdqu STATE2, 0x40(STATEP)
  521. FRAME_END
  522. ret
  523. .Ldec_out_2:
  524. movdqu STATE2, 0x00(STATEP)
  525. movdqu STATE3, 0x10(STATEP)
  526. movdqu STATE4, 0x20(STATEP)
  527. movdqu STATE0, 0x30(STATEP)
  528. movdqu STATE1, 0x40(STATEP)
  529. FRAME_END
  530. ret
  531. .Ldec_out_3:
  532. movdqu STATE1, 0x00(STATEP)
  533. movdqu STATE2, 0x10(STATEP)
  534. movdqu STATE3, 0x20(STATEP)
  535. movdqu STATE4, 0x30(STATEP)
  536. movdqu STATE0, 0x40(STATEP)
  537. FRAME_END
  538. ret
  539. .Ldec_out_4:
  540. movdqu STATE0, 0x00(STATEP)
  541. movdqu STATE1, 0x10(STATEP)
  542. movdqu STATE2, 0x20(STATEP)
  543. movdqu STATE3, 0x30(STATEP)
  544. movdqu STATE4, 0x40(STATEP)
  545. FRAME_END
  546. ret
  547. .Ldec_out:
  548. FRAME_END
  549. ret
  550. ENDPROC(crypto_aegis128_aesni_dec)
  551. /*
  552. * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
  553. * const void *src, void *dst);
  554. */
  555. ENTRY(crypto_aegis128_aesni_dec_tail)
  556. FRAME_BEGIN
  557. /* load the state: */
  558. movdqu 0x00(STATEP), STATE0
  559. movdqu 0x10(STATEP), STATE1
  560. movdqu 0x20(STATEP), STATE2
  561. movdqu 0x30(STATEP), STATE3
  562. movdqu 0x40(STATEP), STATE4
  563. /* decrypt message: */
  564. call __load_partial
  565. pxor STATE1, MSG
  566. pxor STATE4, MSG
  567. movdqa STATE2, T1
  568. pand STATE3, T1
  569. pxor T1, MSG
  570. movdqa MSG, T0
  571. call __store_partial
  572. /* mask with byte count: */
  573. movq LEN, T0
  574. punpcklbw T0, T0
  575. punpcklbw T0, T0
  576. punpcklbw T0, T0
  577. punpcklbw T0, T0
  578. movdqa .Laegis128_counter, T1
  579. pcmpgtb T1, T0
  580. pand T0, MSG
  581. aegis128_update
  582. pxor MSG, STATE4
  583. /* store the state: */
  584. movdqu STATE4, 0x00(STATEP)
  585. movdqu STATE0, 0x10(STATEP)
  586. movdqu STATE1, 0x20(STATEP)
  587. movdqu STATE2, 0x30(STATEP)
  588. movdqu STATE3, 0x40(STATEP)
  589. FRAME_END
  590. ret
  591. ENDPROC(crypto_aegis128_aesni_dec_tail)
  592. /*
  593. * void crypto_aegis128_aesni_final(void *state, void *tag_xor,
  594. * u64 assoclen, u64 cryptlen);
  595. */
  596. ENTRY(crypto_aegis128_aesni_final)
  597. FRAME_BEGIN
  598. /* load the state: */
  599. movdqu 0x00(STATEP), STATE0
  600. movdqu 0x10(STATEP), STATE1
  601. movdqu 0x20(STATEP), STATE2
  602. movdqu 0x30(STATEP), STATE3
  603. movdqu 0x40(STATEP), STATE4
  604. /* prepare length block: */
  605. movq %rdx, MSG
  606. movq %rcx, T0
  607. pslldq $8, T0
  608. pxor T0, MSG
  609. psllq $3, MSG /* multiply by 8 (to get bit count) */
  610. pxor STATE3, MSG
  611. /* update state: */
  612. aegis128_update; pxor MSG, STATE4
  613. aegis128_update; pxor MSG, STATE3
  614. aegis128_update; pxor MSG, STATE2
  615. aegis128_update; pxor MSG, STATE1
  616. aegis128_update; pxor MSG, STATE0
  617. aegis128_update; pxor MSG, STATE4
  618. aegis128_update; pxor MSG, STATE3
  619. /* xor tag: */
  620. movdqu (%rsi), MSG
  621. pxor STATE0, MSG
  622. pxor STATE1, MSG
  623. pxor STATE2, MSG
  624. pxor STATE3, MSG
  625. pxor STATE4, MSG
  626. movdqu MSG, (%rsi)
  627. FRAME_END
  628. ret
  629. ENDPROC(crypto_aegis128_aesni_final)