rijndael-ppc-common.h 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. /* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
  2. * Copyright (C) 2019 Shawn Landden <shawn@git.icu>
  3. * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
  4. *
  5. * This file is part of Libgcrypt.
  6. *
  7. * Libgcrypt is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as
  9. * published by the Free Software Foundation; either version 2.1 of
  10. * the License, or (at your option) any later version.
  11. *
  12. * Libgcrypt is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  19. *
  20. * Alternatively, this code may be used in OpenSSL from The OpenSSL Project,
  21. * and Cryptogams by Andy Polyakov, and if made part of a release of either
  22. * or both projects, is thereafter dual-licensed under the license said project
  23. * is released under.
  24. */
  25. #ifndef G10_RIJNDAEL_PPC_COMMON_H
  26. #define G10_RIJNDAEL_PPC_COMMON_H
  27. #include <altivec.h>
  28. typedef vector unsigned char block;
  29. typedef vector unsigned int vec_u32;
  30. typedef union
  31. {
  32. u32 data32[4];
  33. } __attribute__((packed, aligned(1), may_alias)) u128_t;
  34. #define ALWAYS_INLINE inline __attribute__((always_inline))
  35. #define NO_INLINE __attribute__((noinline))
  36. #define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
  37. #define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
  38. #define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
  39. #define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
  40. #define ALIGNED_LOAD(in_ptr, offs) \
  41. (asm_aligned_ld ((offs) * 16, (const void *)(in_ptr)))
  42. #define ALIGNED_STORE(out_ptr, offs, vec) \
  43. (asm_aligned_st ((vec), (offs) * 16, (void *)(out_ptr)))
  44. #define VEC_BE_SWAP(vec, bige_const) (asm_be_swap ((vec), (bige_const)))
  45. #define VEC_LOAD_BE(in_ptr, offs, bige_const) \
  46. (asm_be_swap (asm_load_be_noswap ((offs) * 16, (const void *)(in_ptr)), \
  47. bige_const))
  48. #define VEC_LOAD_BE_NOSWAP(in_ptr, offs) \
  49. (asm_load_be_noswap ((offs) * 16, (const unsigned char *)(in_ptr)))
  50. #define VEC_STORE_BE(out_ptr, offs, vec, bige_const) \
  51. (asm_store_be_noswap (asm_be_swap ((vec), (bige_const)), (offs) * 16, \
  52. (void *)(out_ptr)))
  53. #define VEC_STORE_BE_NOSWAP(out_ptr, offs, vec) \
  54. (asm_store_be_noswap ((vec), (offs) * 16, (void *)(out_ptr)))
  55. #define ROUND_KEY_VARIABLES \
  56. block rkey0, rkeylast
  57. #define PRELOAD_ROUND_KEYS(nrounds) \
  58. do { \
  59. rkey0 = ALIGNED_LOAD (rk, 0); \
  60. rkeylast = ALIGNED_LOAD (rk, nrounds); \
  61. } while (0)
  62. #define AES_ENCRYPT(blk, nrounds) \
  63. do { \
  64. blk ^= rkey0; \
  65. blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 1)); \
  66. blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 2)); \
  67. blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 3)); \
  68. blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 4)); \
  69. blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 5)); \
  70. blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 6)); \
  71. blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 7)); \
  72. blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 8)); \
  73. blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 9)); \
  74. if (nrounds >= 12) \
  75. { \
  76. blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 10)); \
  77. blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 11)); \
  78. if (rounds > 12) \
  79. { \
  80. blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 12)); \
  81. blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 13)); \
  82. } \
  83. } \
  84. blk = asm_cipherlast_be (blk, rkeylast); \
  85. } while (0)
  86. #define AES_DECRYPT(blk, nrounds) \
  87. do { \
  88. blk ^= rkey0; \
  89. blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 1)); \
  90. blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 2)); \
  91. blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 3)); \
  92. blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 4)); \
  93. blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 5)); \
  94. blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 6)); \
  95. blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 7)); \
  96. blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 8)); \
  97. blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 9)); \
  98. if (nrounds >= 12) \
  99. { \
  100. blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 10)); \
  101. blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 11)); \
  102. if (rounds > 12) \
  103. { \
  104. blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 12)); \
  105. blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 13)); \
  106. } \
  107. } \
  108. blk = asm_ncipherlast_be (blk, rkeylast); \
  109. } while (0)
  110. #define ROUND_KEY_VARIABLES_ALL \
  111. block rkey0, rkey1, rkey2, rkey3, rkey4, rkey5, rkey6, rkey7, rkey8, \
  112. rkey9, rkey10, rkey11, rkey12, rkey13, rkeylast
  113. #define PRELOAD_ROUND_KEYS_ALL(nrounds) \
  114. do { \
  115. rkey0 = ALIGNED_LOAD (rk, 0); \
  116. rkey1 = ALIGNED_LOAD (rk, 1); \
  117. rkey2 = ALIGNED_LOAD (rk, 2); \
  118. rkey3 = ALIGNED_LOAD (rk, 3); \
  119. rkey4 = ALIGNED_LOAD (rk, 4); \
  120. rkey5 = ALIGNED_LOAD (rk, 5); \
  121. rkey6 = ALIGNED_LOAD (rk, 6); \
  122. rkey7 = ALIGNED_LOAD (rk, 7); \
  123. rkey8 = ALIGNED_LOAD (rk, 8); \
  124. rkey9 = ALIGNED_LOAD (rk, 9); \
  125. if (nrounds >= 12) \
  126. { \
  127. rkey10 = ALIGNED_LOAD (rk, 10); \
  128. rkey11 = ALIGNED_LOAD (rk, 11); \
  129. if (rounds > 12) \
  130. { \
  131. rkey12 = ALIGNED_LOAD (rk, 12); \
  132. rkey13 = ALIGNED_LOAD (rk, 13); \
  133. } \
  134. } \
  135. rkeylast = ALIGNED_LOAD (rk, nrounds); \
  136. } while (0)
  137. static ASM_FUNC_ATTR_INLINE block
  138. asm_aligned_ld(unsigned long offset, const void *ptr)
  139. {
  140. block vec;
  141. #if __GNUC__ >= 4
  142. if (__builtin_constant_p (offset) && offset == 0)
  143. __asm__ volatile ("lvx %0,0,%1\n\t"
  144. : "=v" (vec)
  145. : "r" ((uintptr_t)ptr)
  146. : "memory");
  147. else
  148. #endif
  149. __asm__ volatile ("lvx %0,%1,%2\n\t"
  150. : "=v" (vec)
  151. : "r" (offset), "r" ((uintptr_t)ptr)
  152. : "memory", "r0");
  153. return vec;
  154. }
  155. static ASM_FUNC_ATTR_INLINE void
  156. asm_aligned_st(block vec, unsigned long offset, void *ptr)
  157. {
  158. #if __GNUC__ >= 4
  159. if (__builtin_constant_p (offset) && offset == 0)
  160. __asm__ volatile ("stvx %0,0,%1\n\t"
  161. :
  162. : "v" (vec), "r" ((uintptr_t)ptr)
  163. : "memory");
  164. else
  165. #endif
  166. __asm__ volatile ("stvx %0,%1,%2\n\t"
  167. :
  168. : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr)
  169. : "memory", "r0");
  170. }
  171. static ASM_FUNC_ATTR_INLINE block
  172. asm_vperm1(block vec, block mask)
  173. {
  174. block o;
  175. __asm__ volatile ("vperm %0,%1,%1,%2\n\t"
  176. : "=v" (o)
  177. : "v" (vec), "v" (mask));
  178. return o;
  179. }
  180. static ASM_FUNC_ATTR_INLINE block
  181. asm_add_uint128(block a, block b)
  182. {
  183. block res;
  184. __asm__ volatile ("vadduqm %0,%1,%2\n\t"
  185. : "=v" (res)
  186. : "v" (a), "v" (b));
  187. return res;
  188. }
  189. static ASM_FUNC_ATTR_INLINE block
  190. asm_add_uint64(block a, block b)
  191. {
  192. block res;
  193. __asm__ volatile ("vaddudm %0,%1,%2\n\t"
  194. : "=v" (res)
  195. : "v" (a), "v" (b));
  196. return res;
  197. }
  198. static ASM_FUNC_ATTR_INLINE block
  199. asm_sra_int64(block a, block b)
  200. {
  201. block res;
  202. __asm__ volatile ("vsrad %0,%1,%2\n\t"
  203. : "=v" (res)
  204. : "v" (a), "v" (b));
  205. return res;
  206. }
  207. static block
  208. asm_swap_uint64_halfs(block a)
  209. {
  210. block res;
  211. __asm__ volatile ("xxswapd %x0, %x1"
  212. : "=wa" (res)
  213. : "wa" (a));
  214. return res;
  215. }
  216. static ASM_FUNC_ATTR_INLINE block
  217. asm_xor(block a, block b)
  218. {
  219. block res;
  220. __asm__ volatile ("vxor %0,%1,%2\n\t"
  221. : "=v" (res)
  222. : "v" (a), "v" (b));
  223. return res;
  224. }
  225. static ASM_FUNC_ATTR_INLINE block
  226. asm_sbox_be(block b)
  227. {
  228. block o;
  229. __asm__ volatile ("vsbox %0, %1\n\t"
  230. : "=v" (o)
  231. : "v" (b));
  232. return o;
  233. }
  234. static ASM_FUNC_ATTR_INLINE block
  235. asm_cipher_be(block b, block rk)
  236. {
  237. block o;
  238. __asm__ volatile ("vcipher %0, %1, %2\n\t"
  239. : "=v" (o)
  240. : "v" (b), "v" (rk));
  241. return o;
  242. }
  243. static ASM_FUNC_ATTR_INLINE block
  244. asm_cipherlast_be(block b, block rk)
  245. {
  246. block o;
  247. __asm__ volatile ("vcipherlast %0, %1, %2\n\t"
  248. : "=v" (o)
  249. : "v" (b), "v" (rk));
  250. return o;
  251. }
  252. static ASM_FUNC_ATTR_INLINE block
  253. asm_ncipher_be(block b, block rk)
  254. {
  255. block o;
  256. __asm__ volatile ("vncipher %0, %1, %2\n\t"
  257. : "=v" (o)
  258. : "v" (b), "v" (rk));
  259. return o;
  260. }
  261. static ASM_FUNC_ATTR_INLINE block
  262. asm_ncipherlast_be(block b, block rk)
  263. {
  264. block o;
  265. __asm__ volatile ("vncipherlast %0, %1, %2\n\t"
  266. : "=v" (o)
  267. : "v" (b), "v" (rk));
  268. return o;
  269. }
  270. /* Make a decryption key from an encryption key. */
  271. static ASM_FUNC_ATTR_INLINE void
  272. internal_aes_ppc_prepare_decryption (RIJNDAEL_context *ctx)
  273. {
  274. u128_t *ekey = (u128_t *)(void *)ctx->keyschenc;
  275. u128_t *dkey = (u128_t *)(void *)ctx->keyschdec;
  276. int rounds = ctx->rounds;
  277. int rr;
  278. int r;
  279. r = 0;
  280. rr = rounds;
  281. for (r = 0, rr = rounds; r <= rounds; r++, rr--)
  282. {
  283. ALIGNED_STORE (dkey, r, ALIGNED_LOAD (ekey, rr));
  284. }
  285. }
  286. #endif /* G10_RIJNDAEL_PPC_COMMON_H */