aes.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. /* MIT License
  2. *
  3. * Permission is hereby granted, free of charge, to any person
  4. * obtaining a copy of this software and associated documentation
  5. * files (the "Software"), to deal in the Software without
  6. * restriction, including without limitation the rights to use, copy,
  7. * modify, merge, publish, distribute, sublicense, and/or sell copies
  8. * of the Software, and to permit persons to whom the Software is
  9. * furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be
  12. * included in all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  15. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  16. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  17. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  18. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  19. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  20. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. * SOFTWARE.
  22. *
  23. */
  24. #if !defined(SIMDE_X86_AES_H)
  25. #define SIMDE_X86_AES_H
  26. /*
  27. * Advanced Encryption Standard
  28. * @author Dani Huertas
  29. * @email huertas.dani@gmail.com
  30. *
  31. * Based on the document FIPS PUB 197
  32. */
  33. #include "sse2.h"
  34. /*
  35. * Multiplication in GF(2^8)
  36. * http://en.wikipedia.org/wiki/Finite_field_arithmetic
  37. * Irreducible polynomial m(x) = x8 + x4 + x3 + x + 1
  38. *
  39. * NOTE: This function can be easily replaced with a look up table for a speed
  40. * boost, at the expense of an increase in memory size.
  41. SIMDE_FUNCTION_ATTRIBUTES
  42. uint8_t gmult(uint8_t a, uint8_t b) {
  43. uint8_t p = 0, i = 0, hbs = 0;
  44. for (i = 0; i < 8; i++) {
  45. if (b & 1) {
  46. p ^= a;
  47. }
  48. hbs = a & 0x80;
  49. a <<= 1;
  50. if (hbs) a ^= 0x1b; // 0000 0001 0001 1011
  51. b >>= 1;
  52. }
  53. return (uint8_t)p;
  54. }
  55. */
  56. #if !(defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO))
  57. #include "../simde-aes.h"
  58. /*
  59. * Transformation in the Cipher and Inverse Cipher in which a Round
  60. * Key is added to the State using an XOR operation. The length of a
  61. * Round Key equals the size of the State (i.e., for Nb = 4, the Round
  62. * Key length equals 128 bits/16 bytes).
  63. */
  64. SIMDE_FUNCTION_ATTRIBUTES
  65. void simde_x_aes_add_round_key(uint8_t *state, simde__m128i_private w, uint8_t r) {
  66. int Nb = simde_x_aes_Nb;
  67. uint8_t c;
  68. for (c = 0; c < Nb; c++) {
  69. state[Nb*0+c] = state[Nb*0+c]^w.u8[4*Nb*r+4*c+0];
  70. state[Nb*1+c] = state[Nb*1+c]^w.u8[4*Nb*r+4*c+1];
  71. state[Nb*2+c] = state[Nb*2+c]^w.u8[4*Nb*r+4*c+2];
  72. state[Nb*3+c] = state[Nb*3+c]^w.u8[4*Nb*r+4*c+3];
  73. }
  74. }
  75. /*
  76. * Transformation in the Cipher that takes all of the columns of the
  77. * State and mixes their data (independently of one another) to
  78. * produce new columns.
  79. */
  80. SIMDE_FUNCTION_ATTRIBUTES
  81. void simde_x_aes_mix_columns(uint8_t *state) {
  82. int Nb = simde_x_aes_Nb;
  83. // uint8_t k[] = {0x02, 0x01, 0x01, 0x03}; // a(x) = {02} + {01}x + {01}x2 + {03}x3
  84. uint8_t i, j, col[4], res[4];
  85. for (j = 0; j < Nb; j++) {
  86. for (i = 0; i < 4; i++) {
  87. col[i] = state[Nb*i+j];
  88. }
  89. //coef_mult(k, col, res);
  90. simde_x_aes_coef_mult_lookup(0, col, res);
  91. for (i = 0; i < 4; i++) {
  92. state[Nb*i+j] = res[i];
  93. }
  94. }
  95. }
  96. /*
  97. * Transformation in the Inverse Cipher that is the inverse of
  98. * MixColumns().
  99. */
  100. SIMDE_FUNCTION_ATTRIBUTES
  101. void simde_x_aes_inv_mix_columns(uint8_t *state) {
  102. int Nb = simde_x_aes_Nb;
  103. // uint8_t k[] = {0x0e, 0x09, 0x0d, 0x0b}; // a(x) = {0e} + {09}x + {0d}x2 + {0b}x3
  104. uint8_t i, j, col[4], res[4];
  105. for (j = 0; j < Nb; j++) {
  106. for (i = 0; i < 4; i++) {
  107. col[i] = state[Nb*i+j];
  108. }
  109. //coef_mult(k, col, res);
  110. simde_x_aes_coef_mult_lookup(4, col, res);
  111. for (i = 0; i < 4; i++) {
  112. state[Nb*i+j] = res[i];
  113. }
  114. }
  115. }
  116. /*
  117. * Transformation in the Cipher that processes the State by cyclically
  118. * shifting the last three rows of the State by different offsets.
  119. */
  120. SIMDE_FUNCTION_ATTRIBUTES
  121. void simde_x_aes_shift_rows(uint8_t *state) {
  122. int Nb = simde_x_aes_Nb;
  123. uint8_t i, k, s, tmp;
  124. for (i = 1; i < 4; i++) {
  125. // shift(1,4)=1; shift(2,4)=2; shift(3,4)=3
  126. // shift(r, 4) = r;
  127. s = 0;
  128. while (s < i) {
  129. tmp = state[Nb*i+0];
  130. for (k = 1; k < Nb; k++) {
  131. state[Nb*i+k-1] = state[Nb*i+k];
  132. }
  133. state[Nb*i+Nb-1] = tmp;
  134. s++;
  135. }
  136. }
  137. }
  138. /*
  139. * Transformation in the Inverse Cipher that is the inverse of
  140. * ShiftRows().
  141. */
  142. SIMDE_FUNCTION_ATTRIBUTES
  143. void simde_x_aes_inv_shift_rows(uint8_t *state) {
  144. uint8_t Nb = simde_x_aes_Nb;
  145. uint8_t i, k, s, tmp;
  146. for (i = 1; i < 4; i++) {
  147. s = 0;
  148. while (s < i) {
  149. tmp = state[Nb*i+Nb-1];
  150. for (k = Nb-1; k > 0; k--) {
  151. state[Nb*i+k] = state[Nb*i+k-1];
  152. }
  153. state[Nb*i+0] = tmp;
  154. s++;
  155. }
  156. }
  157. }
  158. /*
  159. * Transformation in the Cipher that processes the State using a non
  160. * linear byte substitution table (S-box) that operates on each of the
  161. * State bytes independently.
  162. */
  163. SIMDE_FUNCTION_ATTRIBUTES
  164. void simde_x_aes_sub_bytes(uint8_t *state) {
  165. int Nb = simde_x_aes_Nb;
  166. uint8_t i, j;
  167. for (i = 0; i < 4; i++) {
  168. for (j = 0; j < Nb; j++) {
  169. // s_box row: yyyy ----
  170. // s_box col: ---- xxxx
  171. // s_box[16*(yyyy) + xxxx] == s_box[yyyyxxxx]
  172. state[Nb*i+j] = simde_x_aes_s_box[state[Nb*i+j]];
  173. }
  174. }
  175. }
  176. /*
  177. * Transformation in the Inverse Cipher that is the inverse of
  178. * SubBytes().
  179. */
  180. SIMDE_FUNCTION_ATTRIBUTES
  181. void simde_x_aes_inv_sub_bytes(uint8_t *state) {
  182. int Nb = simde_x_aes_Nb;
  183. uint8_t i, j;
  184. for (i = 0; i < 4; i++) {
  185. for (j = 0; j < Nb; j++) {
  186. state[Nb*i+j] = simde_x_aes_inv_s_box[state[Nb*i+j]];
  187. }
  188. }
  189. }
  190. /*
  191. * Performs the AES cipher operation
  192. */
  193. SIMDE_FUNCTION_ATTRIBUTES
  194. void simde_x_aes_enc(simde__m128i_private in, simde__m128i_private *out, simde__m128i_private w, int is_last) {
  195. int Nb = simde_x_aes_Nb;
  196. uint8_t state[4*simde_x_aes_Nb];
  197. uint8_t r = 0, i, j;
  198. for (i = 0; i < 4; i++) {
  199. for (j = 0; j < Nb; j++) {
  200. state[Nb*i+j] = in.u8[i+4*j];
  201. }
  202. }
  203. simde_x_aes_sub_bytes(state);
  204. simde_x_aes_shift_rows(state);
  205. if (!is_last)
  206. simde_x_aes_mix_columns(state);
  207. simde_x_aes_add_round_key(state, w, r);
  208. for (i = 0; i < 4; i++) {
  209. for (j = 0; j < Nb; j++) {
  210. out->u8[i+4*j] = state[Nb*i+j];
  211. }
  212. }
  213. }
  214. /*
  215. * Performs the AES inverse cipher operation
  216. */
  217. SIMDE_FUNCTION_ATTRIBUTES
  218. void simde_x_aes_dec(simde__m128i_private in, simde__m128i_private *out, simde__m128i_private w, int is_last) {
  219. int Nb = simde_x_aes_Nb;
  220. uint8_t state[4*simde_x_aes_Nb];
  221. uint8_t r = 0, i, j;
  222. for (i = 0; i < 4; i++) {
  223. for (j = 0; j < Nb; j++) {
  224. state[Nb*i+j] = in.u8[i+4*j];
  225. }
  226. }
  227. simde_x_aes_inv_shift_rows(state);
  228. simde_x_aes_inv_sub_bytes(state);
  229. if (!is_last)
  230. simde_x_aes_inv_mix_columns(state);
  231. simde_x_aes_add_round_key(state, w, r);
  232. for (i = 0; i < 4; i++) {
  233. for (j = 0; j < Nb; j++) {
  234. out->u8[i+4*j] = state[Nb*i+j];
  235. }
  236. }
  237. }
  238. #endif // if !(defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO))
  239. SIMDE_FUNCTION_ATTRIBUTES
  240. simde__m128i simde_mm_aesenc_si128(simde__m128i a, simde__m128i round_key) {
  241. #if defined(SIMDE_X86_AES_NATIVE)
  242. return _mm_aesenc_si128(a, round_key);
  243. #else
  244. simde__m128i_private result_;
  245. simde__m128i_private a_ = simde__m128i_to_private(a);
  246. simde__m128i_private round_key_ = simde__m128i_to_private(round_key);
  247. #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO)
  248. result_.neon_u8 = veorq_u8(
  249. vaesmcq_u8(vaeseq_u8(a_.neon_u8, vdupq_n_u8(0))),
  250. round_key_.neon_u8);
  251. #else
  252. simde_x_aes_enc(a_, &result_, round_key_, 0);
  253. #endif
  254. return simde__m128i_from_private(result_);
  255. #endif
  256. }
  257. #if defined(SIMDE_X86_AES_ENABLE_NATIVE_ALIASES)
  258. #define _mm_aesenc_si128(a, b) simde_mm_aesenc_si128(a, b)
  259. #endif
  260. SIMDE_FUNCTION_ATTRIBUTES
  261. simde__m128i simde_mm_aesdec_si128(simde__m128i a, simde__m128i round_key) {
  262. #if defined(SIMDE_X86_AES_NATIVE)
  263. return _mm_aesdec_si128(a, round_key);
  264. #else
  265. simde__m128i_private result_;
  266. simde__m128i_private a_ = simde__m128i_to_private(a);
  267. simde__m128i_private round_key_ = simde__m128i_to_private(round_key);
  268. #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO)
  269. result_.neon_u8 = veorq_u8(
  270. vaesimcq_u8(vaesdq_u8(a_.neon_u8, vdupq_n_u8(0))),
  271. round_key_.neon_u8);
  272. #else
  273. simde_x_aes_dec(a_, &result_, round_key_, 0);
  274. #endif
  275. return simde__m128i_from_private(result_);
  276. #endif
  277. }
  278. #if defined(SIMDE_X86_AES_ENABLE_NATIVE_ALIASES)
  279. #define _mm_aesdec_si128(a, b) simde_mm_aesdec_si128(a, b)
  280. #endif
  281. SIMDE_FUNCTION_ATTRIBUTES
  282. simde__m128i simde_mm_aesenclast_si128(simde__m128i a, simde__m128i round_key) {
  283. #if defined(SIMDE_X86_AES_NATIVE)
  284. return _mm_aesenclast_si128(a, round_key);
  285. #else
  286. simde__m128i_private result_;
  287. simde__m128i_private a_ = simde__m128i_to_private(a);
  288. simde__m128i_private round_key_ = simde__m128i_to_private(round_key);
  289. #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO)
  290. result_.neon_u8 = vaeseq_u8(a_.neon_u8, vdupq_n_u8(0));
  291. result_.neon_i32 = veorq_s32(result_.neon_i32, round_key_.neon_i32); // _mm_xor_si128
  292. #else
  293. simde_x_aes_enc(a_, &result_, round_key_, 1);
  294. #endif
  295. return simde__m128i_from_private(result_);
  296. #endif
  297. }
  298. #if defined(SIMDE_X86_AES_ENABLE_NATIVE_ALIASES)
  299. #define _mm_aesenclast_si128(a, b) simde_mm_aesenclast_si128(a, b)
  300. #endif
  301. SIMDE_FUNCTION_ATTRIBUTES
  302. simde__m128i simde_mm_aesdeclast_si128(simde__m128i a, simde__m128i round_key) {
  303. #if defined(SIMDE_X86_AES_NATIVE)
  304. return _mm_aesdeclast_si128(a, round_key);
  305. #else
  306. simde__m128i_private result_;
  307. simde__m128i_private a_ = simde__m128i_to_private(a);
  308. simde__m128i_private round_key_ = simde__m128i_to_private(round_key);
  309. #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO)
  310. result_.neon_u8 = veorq_u8(
  311. vaesdq_u8(a_.neon_u8, vdupq_n_u8(0)),
  312. round_key_.neon_u8);
  313. #else
  314. simde_x_aes_dec(a_, &result_, round_key_, 1);
  315. #endif
  316. return simde__m128i_from_private(result_);
  317. #endif
  318. }
  319. #if defined(SIMDE_X86_AES_ENABLE_NATIVE_ALIASES)
  320. #define _mm_aesdeclast_si128(a, b) simde_mm_aesdeclast_si128(a, b)
  321. #endif
  322. SIMDE_FUNCTION_ATTRIBUTES
  323. simde__m128i simde_mm_aesimc_si128(simde__m128i a) {
  324. #if defined(SIMDE_X86_AES_NATIVE)
  325. return _mm_aesimc_si128(a);
  326. #else
  327. simde__m128i_private result_ = simde__m128i_to_private(simde_mm_setzero_si128());
  328. simde__m128i_private a_ = simde__m128i_to_private(a);
  329. #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO)
  330. result_.neon_u8 = vaesimcq_u8(a_.neon_u8);
  331. #else
  332. int Nb = simde_x_aes_Nb;
  333. // uint8_t k[] = {0x0e, 0x09, 0x0d, 0x0b}; // a(x) = {0e} + {09}x + {0d}x2 + {0b}x3
  334. uint8_t i, j, col[4], res[4];
  335. for (j = 0; j < Nb; j++) {
  336. for (i = 0; i < 4; i++) {
  337. col[i] = a_.u8[Nb*j+i];
  338. }
  339. //coef_mult(k, col, res);
  340. simde_x_aes_coef_mult_lookup(4, col, res);
  341. for (i = 0; i < 4; i++) {
  342. result_.u8[Nb*j+i] = res[i];
  343. }
  344. }
  345. #endif
  346. return simde__m128i_from_private(result_);
  347. #endif
  348. }
  349. #if defined(SIMDE_X86_AES_ENABLE_NATIVE_ALIASES)
  350. #define _mm_aesimc_si128(a) simde_mm_aesimc_si128(a)
  351. #endif
  352. #undef simde_x_aes_Nb
  353. #endif /* !defined(SIMDE_X86_AES_H) */