aes-cipher-core.S 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. /*
  2. * Scalar AES core transform
  3. *
  4. * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2 as
  8. * published by the Free Software Foundation.
  9. */
  10. #include <linux/linkage.h>
  11. #include <asm/assembler.h>
  12. #include <asm/cache.h>
  13. .text
  14. rk .req x0
  15. out .req x1
  16. in .req x2
  17. rounds .req x3
  18. tt .req x2
  19. .macro __pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift
  20. .ifc \op\shift, b0
  21. ubfiz \reg0, \in0, #2, #8
  22. ubfiz \reg1, \in1e, #2, #8
  23. .else
  24. ubfx \reg0, \in0, #\shift, #8
  25. ubfx \reg1, \in1e, #\shift, #8
  26. .endif
  27. /*
  28. * AArch64 cannot do byte size indexed loads from a table containing
  29. * 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a
  30. * valid instruction. So perform the shift explicitly first for the
  31. * high bytes (the low byte is shifted implicitly by using ubfiz rather
  32. * than ubfx above)
  33. */
  34. .ifnc \op, b
  35. ldr \reg0, [tt, \reg0, uxtw #2]
  36. ldr \reg1, [tt, \reg1, uxtw #2]
  37. .else
  38. .if \shift > 0
  39. lsl \reg0, \reg0, #2
  40. lsl \reg1, \reg1, #2
  41. .endif
  42. ldrb \reg0, [tt, \reg0, uxtw]
  43. ldrb \reg1, [tt, \reg1, uxtw]
  44. .endif
  45. .endm
  46. .macro __pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift
  47. ubfx \reg0, \in0, #\shift, #8
  48. ubfx \reg1, \in1d, #\shift, #8
  49. ldr\op \reg0, [tt, \reg0, uxtw #\sz]
  50. ldr\op \reg1, [tt, \reg1, uxtw #\sz]
  51. .endm
  52. .macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op
  53. ldp \out0, \out1, [rk], #8
  54. __pair\enc \sz, \op, w12, w13, \in0, \in1, \in3, 0
  55. __pair\enc \sz, \op, w14, w15, \in1, \in2, \in0, 8
  56. __pair\enc \sz, \op, w16, w17, \in2, \in3, \in1, 16
  57. __pair\enc \sz, \op, \t0, \t1, \in3, \in0, \in2, 24
  58. eor \out0, \out0, w12
  59. eor \out1, \out1, w13
  60. eor \out0, \out0, w14, ror #24
  61. eor \out1, \out1, w15, ror #24
  62. eor \out0, \out0, w16, ror #16
  63. eor \out1, \out1, w17, ror #16
  64. eor \out0, \out0, \t0, ror #8
  65. eor \out1, \out1, \t1, ror #8
  66. .endm
  67. .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
  68. __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
  69. __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
  70. .endm
  71. .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
  72. __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
  73. __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
  74. .endm
  75. .macro do_crypt, round, ttab, ltab, bsz
  76. ldp w4, w5, [in]
  77. ldp w6, w7, [in, #8]
  78. ldp w8, w9, [rk], #16
  79. ldp w10, w11, [rk, #-8]
  80. CPU_BE( rev w4, w4 )
  81. CPU_BE( rev w5, w5 )
  82. CPU_BE( rev w6, w6 )
  83. CPU_BE( rev w7, w7 )
  84. eor w4, w4, w8
  85. eor w5, w5, w9
  86. eor w6, w6, w10
  87. eor w7, w7, w11
  88. adr_l tt, \ttab
  89. tbnz rounds, #1, 1f
  90. 0: \round w8, w9, w10, w11, w4, w5, w6, w7
  91. \round w4, w5, w6, w7, w8, w9, w10, w11
  92. 1: subs rounds, rounds, #4
  93. \round w8, w9, w10, w11, w4, w5, w6, w7
  94. b.ls 3f
  95. 2: \round w4, w5, w6, w7, w8, w9, w10, w11
  96. b 0b
  97. 3: adr_l tt, \ltab
  98. \round w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
  99. CPU_BE( rev w4, w4 )
  100. CPU_BE( rev w5, w5 )
  101. CPU_BE( rev w6, w6 )
  102. CPU_BE( rev w7, w7 )
  103. stp w4, w5, [out]
  104. stp w6, w7, [out, #8]
  105. ret
  106. .endm
  107. ENTRY(__aes_arm64_encrypt)
  108. do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2
  109. ENDPROC(__aes_arm64_encrypt)
  110. .align 5
  111. ENTRY(__aes_arm64_decrypt)
  112. do_crypt iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0
  113. ENDPROC(__aes_arm64_decrypt)
  114. .section ".rodata", "a"
  115. .align L1_CACHE_SHIFT
  116. .type __aes_arm64_inverse_sbox, %object
  117. __aes_arm64_inverse_sbox:
  118. .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
  119. .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
  120. .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
  121. .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
  122. .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
  123. .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
  124. .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
  125. .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
  126. .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
  127. .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
  128. .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
  129. .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
  130. .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
  131. .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
  132. .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
  133. .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
  134. .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
  135. .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
  136. .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
  137. .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
  138. .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
  139. .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
  140. .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
  141. .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
  142. .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
  143. .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
  144. .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
  145. .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
  146. .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
  147. .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
  148. .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
  149. .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  150. .size __aes_arm64_inverse_sbox, . - __aes_arm64_inverse_sbox