aes-ce-ccm-core.S 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. /*
  2. * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
  3. *
  4. * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2 as
  8. * published by the Free Software Foundation.
  9. */
  10. #include <linux/linkage.h>
  11. #include <asm/assembler.h>
  12. .text
  13. .arch armv8-a+crypto
  14. /*
  15. * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
  16. * u32 *macp, u8 const rk[], u32 rounds);
  17. */
  18. ENTRY(ce_aes_ccm_auth_data)
  19. ldr w8, [x3] /* leftover from prev round? */
  20. ld1 {v0.16b}, [x0] /* load mac */
  21. cbz w8, 1f
  22. sub w8, w8, #16
  23. eor v1.16b, v1.16b, v1.16b
  24. 0: ldrb w7, [x1], #1 /* get 1 byte of input */
  25. subs w2, w2, #1
  26. add w8, w8, #1
  27. ins v1.b[0], w7
  28. ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */
  29. beq 8f /* out of input? */
  30. cbnz w8, 0b
  31. eor v0.16b, v0.16b, v1.16b
  32. 1: ld1 {v3.4s}, [x4] /* load first round key */
  33. prfm pldl1strm, [x1]
  34. cmp w5, #12 /* which key size? */
  35. add x6, x4, #16
  36. sub w7, w5, #2 /* modified # of rounds */
  37. bmi 2f
  38. bne 5f
  39. mov v5.16b, v3.16b
  40. b 4f
  41. 2: mov v4.16b, v3.16b
  42. ld1 {v5.4s}, [x6], #16 /* load 2nd round key */
  43. 3: aese v0.16b, v4.16b
  44. aesmc v0.16b, v0.16b
  45. 4: ld1 {v3.4s}, [x6], #16 /* load next round key */
  46. aese v0.16b, v5.16b
  47. aesmc v0.16b, v0.16b
  48. 5: ld1 {v4.4s}, [x6], #16 /* load next round key */
  49. subs w7, w7, #3
  50. aese v0.16b, v3.16b
  51. aesmc v0.16b, v0.16b
  52. ld1 {v5.4s}, [x6], #16 /* load next round key */
  53. bpl 3b
  54. aese v0.16b, v4.16b
  55. subs w2, w2, #16 /* last data? */
  56. eor v0.16b, v0.16b, v5.16b /* final round */
  57. bmi 6f
  58. ld1 {v1.16b}, [x1], #16 /* load next input block */
  59. eor v0.16b, v0.16b, v1.16b /* xor with mac */
  60. bne 1b
  61. 6: st1 {v0.16b}, [x0] /* store mac */
  62. beq 10f
  63. adds w2, w2, #16
  64. beq 10f
  65. mov w8, w2
  66. 7: ldrb w7, [x1], #1
  67. umov w6, v0.b[0]
  68. eor w6, w6, w7
  69. strb w6, [x0], #1
  70. subs w2, w2, #1
  71. beq 10f
  72. ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
  73. b 7b
  74. 8: cbz w8, 91f
  75. mov w7, w8
  76. add w8, w8, #16
  77. 9: ext v1.16b, v1.16b, v1.16b, #1
  78. adds w7, w7, #1
  79. bne 9b
  80. 91: eor v0.16b, v0.16b, v1.16b
  81. st1 {v0.16b}, [x0]
  82. 10: str w8, [x3]
  83. ret
  84. ENDPROC(ce_aes_ccm_auth_data)
  85. /*
  86. * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
  87. * u32 rounds);
  88. */
  89. ENTRY(ce_aes_ccm_final)
  90. ld1 {v3.4s}, [x2], #16 /* load first round key */
  91. ld1 {v0.16b}, [x0] /* load mac */
  92. cmp w3, #12 /* which key size? */
  93. sub w3, w3, #2 /* modified # of rounds */
  94. ld1 {v1.16b}, [x1] /* load 1st ctriv */
  95. bmi 0f
  96. bne 3f
  97. mov v5.16b, v3.16b
  98. b 2f
  99. 0: mov v4.16b, v3.16b
  100. 1: ld1 {v5.4s}, [x2], #16 /* load next round key */
  101. aese v0.16b, v4.16b
  102. aesmc v0.16b, v0.16b
  103. aese v1.16b, v4.16b
  104. aesmc v1.16b, v1.16b
  105. 2: ld1 {v3.4s}, [x2], #16 /* load next round key */
  106. aese v0.16b, v5.16b
  107. aesmc v0.16b, v0.16b
  108. aese v1.16b, v5.16b
  109. aesmc v1.16b, v1.16b
  110. 3: ld1 {v4.4s}, [x2], #16 /* load next round key */
  111. subs w3, w3, #3
  112. aese v0.16b, v3.16b
  113. aesmc v0.16b, v0.16b
  114. aese v1.16b, v3.16b
  115. aesmc v1.16b, v1.16b
  116. bpl 1b
  117. aese v0.16b, v4.16b
  118. aese v1.16b, v4.16b
  119. /* final round key cancels out */
  120. eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
  121. st1 {v0.16b}, [x0] /* store result */
  122. ret
  123. ENDPROC(ce_aes_ccm_final)
  124. .macro aes_ccm_do_crypt,enc
  125. ldr x8, [x6, #8] /* load lower ctr */
  126. ld1 {v0.16b}, [x5] /* load mac */
  127. CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
  128. 0: /* outer loop */
  129. ld1 {v1.8b}, [x6] /* load upper ctr */
  130. prfm pldl1strm, [x1]
  131. add x8, x8, #1
  132. rev x9, x8
  133. cmp w4, #12 /* which key size? */
  134. sub w7, w4, #2 /* get modified # of rounds */
  135. ins v1.d[1], x9 /* no carry in lower ctr */
  136. ld1 {v3.4s}, [x3] /* load first round key */
  137. add x10, x3, #16
  138. bmi 1f
  139. bne 4f
  140. mov v5.16b, v3.16b
  141. b 3f
  142. 1: mov v4.16b, v3.16b
  143. ld1 {v5.4s}, [x10], #16 /* load 2nd round key */
  144. 2: /* inner loop: 3 rounds, 2x interleaved */
  145. aese v0.16b, v4.16b
  146. aesmc v0.16b, v0.16b
  147. aese v1.16b, v4.16b
  148. aesmc v1.16b, v1.16b
  149. 3: ld1 {v3.4s}, [x10], #16 /* load next round key */
  150. aese v0.16b, v5.16b
  151. aesmc v0.16b, v0.16b
  152. aese v1.16b, v5.16b
  153. aesmc v1.16b, v1.16b
  154. 4: ld1 {v4.4s}, [x10], #16 /* load next round key */
  155. subs w7, w7, #3
  156. aese v0.16b, v3.16b
  157. aesmc v0.16b, v0.16b
  158. aese v1.16b, v3.16b
  159. aesmc v1.16b, v1.16b
  160. ld1 {v5.4s}, [x10], #16 /* load next round key */
  161. bpl 2b
  162. aese v0.16b, v4.16b
  163. aese v1.16b, v4.16b
  164. subs w2, w2, #16
  165. bmi 6f /* partial block? */
  166. ld1 {v2.16b}, [x1], #16 /* load next input block */
  167. .if \enc == 1
  168. eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
  169. eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */
  170. .else
  171. eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
  172. eor v1.16b, v2.16b, v5.16b /* final round enc */
  173. .endif
  174. eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
  175. st1 {v1.16b}, [x0], #16 /* write output block */
  176. bne 0b
  177. CPU_LE( rev x8, x8 )
  178. st1 {v0.16b}, [x5] /* store mac */
  179. str x8, [x6, #8] /* store lsb end of ctr (BE) */
  180. 5: ret
  181. 6: eor v0.16b, v0.16b, v5.16b /* final round mac */
  182. eor v1.16b, v1.16b, v5.16b /* final round enc */
  183. st1 {v0.16b}, [x5] /* store mac */
  184. add w2, w2, #16 /* process partial tail block */
  185. 7: ldrb w9, [x1], #1 /* get 1 byte of input */
  186. umov w6, v1.b[0] /* get top crypted ctr byte */
  187. umov w7, v0.b[0] /* get top mac byte */
  188. .if \enc == 1
  189. eor w7, w7, w9
  190. eor w9, w9, w6
  191. .else
  192. eor w9, w9, w6
  193. eor w7, w7, w9
  194. .endif
  195. strb w9, [x0], #1 /* store out byte */
  196. strb w7, [x5], #1 /* store mac byte */
  197. subs w2, w2, #1
  198. beq 5b
  199. ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */
  200. ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */
  201. b 7b
  202. .endm
  203. /*
  204. * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
  205. * u8 const rk[], u32 rounds, u8 mac[],
  206. * u8 ctr[]);
  207. * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
  208. * u8 const rk[], u32 rounds, u8 mac[],
  209. * u8 ctr[]);
  210. */
  211. ENTRY(ce_aes_ccm_encrypt)
  212. aes_ccm_do_crypt 1
  213. ENDPROC(ce_aes_ccm_encrypt)
  214. ENTRY(ce_aes_ccm_decrypt)
  215. aes_ccm_do_crypt 0
  216. ENDPROC(ce_aes_ccm_decrypt)