rijndael.c 66 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096
  1. /* Rijndael (AES) for GnuPG
  2. * Copyright (C) 2000, 2001, 2002, 2003, 2007,
  3. * 2008, 2011 Free Software Foundation, Inc.
  4. *
  5. * This file is part of Libgcrypt.
  6. *
  7. * Libgcrypt is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as
  9. * published by the Free Software Foundation; either version 2.1 of
  10. * the License, or (at your option) any later version.
  11. *
  12. * Libgcrypt is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  19. *******************************************************************
  20. * The code here is based on the optimized implementation taken from
  21. * http://www.esat.kuleuven.ac.be/~rijmen/rijndael/ on Oct 2, 2000,
  22. * which carries this notice:
  23. *------------------------------------------
  24. * rijndael-alg-fst.c v2.3 April '2000
  25. *
  26. * Optimised ANSI C code
  27. *
  28. * authors: v1.0: Antoon Bosselaers
  29. * v2.0: Vincent Rijmen
  30. * v2.3: Paulo Barreto
  31. *
  32. * This code is placed in the public domain.
  33. *------------------------------------------
  34. *
  35. * The SP800-38a document is available at:
  36. * http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf
  37. *
  38. */
  39. #include <config.h>
  40. #include <stdio.h>
  41. #include <stdlib.h>
  42. #include <string.h> /* for memcmp() */
  43. #include "types.h" /* for byte and u32 typedefs */
  44. #include "g10lib.h"
  45. #include "cipher.h"
  46. #define MAXKC (256/32)
  47. #define MAXROUNDS 14
  48. #define BLOCKSIZE (128/8)
  49. /* Helper macro to force alignment to 16 bytes. */
  50. #ifdef __GNUC__
  51. # define ATTR_ALIGNED_16 __attribute__ ((aligned (16)))
  52. #else
  53. # define ATTR_ALIGNED_16
  54. #endif
  55. /* USE_PADLOCK indicates whether to compile the padlock specific
  56. code. */
  57. #undef USE_PADLOCK
  58. #ifdef ENABLE_PADLOCK_SUPPORT
  59. # if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
  60. # define USE_PADLOCK 1
  61. # endif
  62. #endif /*ENABLE_PADLOCK_SUPPORT*/
  63. /* USE_AESNI inidicates whether to compile with Intel AES-NI code. We
  64. need the vector-size attribute which seems to be available since
  65. gcc 3. However, to be on the safe side we require at least gcc 4. */
  66. #undef USE_AESNI
  67. #ifdef ENABLE_AESNI_SUPPORT
  68. # if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && __GNUC__ >= 4
  69. # define USE_AESNI 1
  70. # endif
  71. #endif /* ENABLE_AESNI_SUPPORT */
  72. #ifdef USE_AESNI
  73. typedef int m128i_t __attribute__ ((__vector_size__ (16)));
  74. #endif /*USE_AESNI*/
  75. /* Define an u32 variant for the sake of gcc 4.4's strict aliasing. */
  76. #if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 4 )
  77. typedef u32 __attribute__ ((__may_alias__)) u32_a_t;
  78. #else
  79. typedef u32 u32_a_t;
  80. #endif
  81. /* Our context object. */
  82. typedef struct
  83. {
  84. /* The first fields are the keyschedule arrays. This is so that
  85. they are aligned on a 16 byte boundary if using gcc. This
  86. alignment is required for the AES-NI code and a good idea in any
  87. case. The alignment is guaranteed due to the way cipher.c
  88. allocates the space for the context. The PROPERLY_ALIGNED_TYPE
  89. hack is used to force a minimal alignment if not using gcc of if
  90. the alignment requirement is higher that 16 bytes. */
  91. union
  92. {
  93. PROPERLY_ALIGNED_TYPE dummy;
  94. byte keyschedule[MAXROUNDS+1][4][4];
  95. #ifdef USE_PADLOCK
  96. /* The key as passed to the padlock engine. It is only used if
  97. the padlock engine is used (USE_PADLOCK, below). */
  98. unsigned char padlock_key[16] __attribute__ ((aligned (16)));
  99. #endif /*USE_PADLOCK*/
  100. } u1;
  101. union
  102. {
  103. PROPERLY_ALIGNED_TYPE dummy;
  104. byte keyschedule[MAXROUNDS+1][4][4];
  105. } u2;
  106. int rounds; /* Key-length-dependent number of rounds. */
  107. int decryption_prepared; /* The decryption key schedule is available. */
  108. #ifdef USE_PADLOCK
  109. int use_padlock; /* Padlock shall be used. */
  110. #endif /*USE_PADLOCK*/
  111. #ifdef USE_AESNI
  112. int use_aesni; /* AES-NI shall be used. */
  113. #endif /*USE_AESNI*/
  114. } RIJNDAEL_context ATTR_ALIGNED_16;
  115. /* Macros defining alias for the keyschedules. */
  116. #define keyschenc u1.keyschedule
  117. #define keyschdec u2.keyschedule
  118. #define padlockkey u1.padlock_key
  119. /* Two macros to be called prior and after the use of AESNI
  120. instructions. There should be no external function calls between
  121. the use of these macros. There purpose is to make sure that the
  122. SSE regsiters are cleared and won't reveal any information about
  123. the key or the data. */
  124. #ifdef USE_AESNI
  125. # define aesni_prepare() do { } while (0)
  126. # define aesni_cleanup() \
  127. do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \
  128. "pxor %%xmm1, %%xmm1\n" :: ); \
  129. } while (0)
  130. # define aesni_cleanup_2_4() \
  131. do { asm volatile ("pxor %%xmm2, %%xmm2\n\t" \
  132. "pxor %%xmm3, %%xmm3\n" \
  133. "pxor %%xmm4, %%xmm4\n":: ); \
  134. } while (0)
  135. #else
  136. # define aesni_prepare() do { } while (0)
  137. # define aesni_cleanup() do { } while (0)
  138. #endif
  139. /* All the numbers. */
  140. #include "rijndael-tables.h"
  141. /* Function prototypes. */
  142. #ifdef USE_AESNI
  143. /* We don't want to inline these functions to help gcc allocate enough
  144. registers. */
  145. static void do_aesni_ctr (const RIJNDAEL_context *ctx, unsigned char *ctr,
  146. unsigned char *b, const unsigned char *a)
  147. __attribute__ ((__noinline__));
  148. static void do_aesni_ctr_4 (const RIJNDAEL_context *ctx, unsigned char *ctr,
  149. unsigned char *b, const unsigned char *a)
  150. __attribute__ ((__noinline__));
  151. #endif /*USE_AESNI*/
  152. static const char *selftest(void);
  153. /* Perform the key setup. */
  154. static gcry_err_code_t
  155. do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen)
  156. {
  157. static int initialized = 0;
  158. static const char *selftest_failed=0;
  159. int rounds;
  160. unsigned int i;
  161. int j, r, t, rconpointer = 0;
  162. int KC;
  163. union
  164. {
  165. PROPERLY_ALIGNED_TYPE dummy;
  166. byte k[MAXKC][4];
  167. } k;
  168. #define k k.k
  169. union
  170. {
  171. PROPERLY_ALIGNED_TYPE dummy;
  172. byte tk[MAXKC][4];
  173. } tk;
  174. #define tk tk.tk
  175. /* The on-the-fly self tests are only run in non-fips mode. In fips
  176. mode explicit self-tests are required. Actually the on-the-fly
  177. self-tests are not fully thread-safe and it might happen that a
  178. failed self-test won't get noticed in another thread.
  179. FIXME: We might want to have a central registry of succeeded
  180. self-tests. */
  181. if (!fips_mode () && !initialized)
  182. {
  183. initialized = 1;
  184. selftest_failed = selftest ();
  185. if (selftest_failed)
  186. log_error ("%s\n", selftest_failed );
  187. }
  188. if (selftest_failed)
  189. return GPG_ERR_SELFTEST_FAILED;
  190. ctx->decryption_prepared = 0;
  191. #ifdef USE_PADLOCK
  192. ctx->use_padlock = 0;
  193. #endif
  194. #ifdef USE_AESNI
  195. ctx->use_aesni = 0;
  196. #endif
  197. if( keylen == 128/8 )
  198. {
  199. rounds = 10;
  200. KC = 4;
  201. if (0)
  202. {
  203. ;
  204. }
  205. #ifdef USE_PADLOCK
  206. else if ((_gcry_get_hw_features () & HWF_PADLOCK_AES))
  207. {
  208. ctx->use_padlock = 1;
  209. memcpy (ctx->padlockkey, key, keylen);
  210. }
  211. #endif
  212. #ifdef USE_AESNI
  213. else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI))
  214. {
  215. ctx->use_aesni = 1;
  216. }
  217. #endif
  218. }
  219. else if ( keylen == 192/8 )
  220. {
  221. rounds = 12;
  222. KC = 6;
  223. if (0)
  224. {
  225. ;
  226. }
  227. #ifdef USE_AESNI
  228. else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI))
  229. {
  230. ctx->use_aesni = 1;
  231. }
  232. #endif
  233. }
  234. else if ( keylen == 256/8 )
  235. {
  236. rounds = 14;
  237. KC = 8;
  238. if (0)
  239. {
  240. ;
  241. }
  242. #ifdef USE_AESNI
  243. else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI))
  244. {
  245. ctx->use_aesni = 1;
  246. }
  247. #endif
  248. }
  249. else
  250. return GPG_ERR_INV_KEYLEN;
  251. ctx->rounds = rounds;
  252. /* NB: We don't yet support Padlock hardware key generation. */
  253. if (0)
  254. ;
  255. #ifdef USE_AESNI_is_disabled_here
  256. else if (ctx->use_aesni && ctx->rounds == 10)
  257. {
  258. /* Note: This code works for AES-128 but it is not much better
  259. than using the standard key schedule. We disable it for
  260. now and don't put any effort into implementing this for
  261. AES-192 and AES-256. */
  262. asm volatile ("movl %[key], %%esi\n\t"
  263. "movdqu (%%esi), %%xmm1\n\t" /* xmm1 := key */
  264. "movl %[ksch], %%esi\n\t"
  265. "movdqa %%xmm1, (%%esi)\n\t" /* ksch[0] := xmm1 */
  266. "aeskeygenassist $0x01, %%xmm1, %%xmm2\n\t"
  267. "call .Lexpand128_%=\n\t"
  268. "movdqa %%xmm1, 0x10(%%esi)\n\t" /* ksch[1] := xmm1 */
  269. "aeskeygenassist $0x02, %%xmm1, %%xmm2\n\t"
  270. "call .Lexpand128_%=\n\t"
  271. "movdqa %%xmm1, 0x20(%%esi)\n\t" /* ksch[2] := xmm1 */
  272. "aeskeygenassist $0x04, %%xmm1, %%xmm2\n\t"
  273. "call .Lexpand128_%=\n\t"
  274. "movdqa %%xmm1, 0x30(%%esi)\n\t" /* ksch[3] := xmm1 */
  275. "aeskeygenassist $0x08, %%xmm1, %%xmm2\n\t"
  276. "call .Lexpand128_%=\n\t"
  277. "movdqa %%xmm1, 0x40(%%esi)\n\t" /* ksch[4] := xmm1 */
  278. "aeskeygenassist $0x10, %%xmm1, %%xmm2\n\t"
  279. "call .Lexpand128_%=\n\t"
  280. "movdqa %%xmm1, 0x50(%%esi)\n\t" /* ksch[5] := xmm1 */
  281. "aeskeygenassist $0x20, %%xmm1, %%xmm2\n\t"
  282. "call .Lexpand128_%=\n\t"
  283. "movdqa %%xmm1, 0x60(%%esi)\n\t" /* ksch[6] := xmm1 */
  284. "aeskeygenassist $0x40, %%xmm1, %%xmm2\n\t"
  285. "call .Lexpand128_%=\n\t"
  286. "movdqa %%xmm1, 0x70(%%esi)\n\t" /* ksch[7] := xmm1 */
  287. "aeskeygenassist $0x80, %%xmm1, %%xmm2\n\t"
  288. "call .Lexpand128_%=\n\t"
  289. "movdqa %%xmm1, 0x80(%%esi)\n\t" /* ksch[8] := xmm1 */
  290. "aeskeygenassist $0x1b, %%xmm1, %%xmm2\n\t"
  291. "call .Lexpand128_%=\n\t"
  292. "movdqa %%xmm1, 0x90(%%esi)\n\t" /* ksch[9] := xmm1 */
  293. "aeskeygenassist $0x36, %%xmm1, %%xmm2\n\t"
  294. "call .Lexpand128_%=\n\t"
  295. "movdqa %%xmm1, 0xa0(%%esi)\n\t" /* ksch[10] := xmm1 */
  296. "jmp .Lleave%=\n"
  297. ".Lexpand128_%=:\n\t"
  298. "pshufd $0xff, %%xmm2, %%xmm2\n\t"
  299. "movdqa %%xmm1, %%xmm3\n\t"
  300. "pslldq $4, %%xmm3\n\t"
  301. "pxor %%xmm3, %%xmm1\n\t"
  302. "pslldq $4, %%xmm3\n\t"
  303. "pxor %%xmm3, %%xmm1\n\t"
  304. "pslldq $4, %%xmm3\n\t"
  305. "pxor %%xmm3, %%xmm2\n\t"
  306. "pxor %%xmm2, %%xmm1\n\t"
  307. "ret\n"
  308. ".Lleave%=:\n\t"
  309. "pxor %%xmm1, %%xmm1\n\t"
  310. "pxor %%xmm2, %%xmm2\n\t"
  311. "pxor %%xmm3, %%xmm3\n"
  312. :
  313. : [key] "g" (key), [ksch] "g" (ctx->keyschenc)
  314. : "%esi", "cc", "memory" );
  315. }
  316. #endif /*USE_AESNI*/
  317. else
  318. {
  319. #define W (ctx->keyschenc)
  320. for (i = 0; i < keylen; i++)
  321. {
  322. k[i >> 2][i & 3] = key[i];
  323. }
  324. for (j = KC-1; j >= 0; j--)
  325. {
  326. *((u32_a_t*)tk[j]) = *((u32_a_t*)k[j]);
  327. }
  328. r = 0;
  329. t = 0;
  330. /* Copy values into round key array. */
  331. for (j = 0; (j < KC) && (r < rounds + 1); )
  332. {
  333. for (; (j < KC) && (t < 4); j++, t++)
  334. {
  335. *((u32_a_t*)W[r][t]) = *((u32_a_t*)tk[j]);
  336. }
  337. if (t == 4)
  338. {
  339. r++;
  340. t = 0;
  341. }
  342. }
  343. while (r < rounds + 1)
  344. {
  345. /* While not enough round key material calculated calculate
  346. new values. */
  347. tk[0][0] ^= S[tk[KC-1][1]];
  348. tk[0][1] ^= S[tk[KC-1][2]];
  349. tk[0][2] ^= S[tk[KC-1][3]];
  350. tk[0][3] ^= S[tk[KC-1][0]];
  351. tk[0][0] ^= rcon[rconpointer++];
  352. if (KC != 8)
  353. {
  354. for (j = 1; j < KC; j++)
  355. {
  356. *((u32_a_t*)tk[j]) ^= *((u32_a_t*)tk[j-1]);
  357. }
  358. }
  359. else
  360. {
  361. for (j = 1; j < KC/2; j++)
  362. {
  363. *((u32_a_t*)tk[j]) ^= *((u32_a_t*)tk[j-1]);
  364. }
  365. tk[KC/2][0] ^= S[tk[KC/2 - 1][0]];
  366. tk[KC/2][1] ^= S[tk[KC/2 - 1][1]];
  367. tk[KC/2][2] ^= S[tk[KC/2 - 1][2]];
  368. tk[KC/2][3] ^= S[tk[KC/2 - 1][3]];
  369. for (j = KC/2 + 1; j < KC; j++)
  370. {
  371. *((u32_a_t*)tk[j]) ^= *((u32_a_t*)tk[j-1]);
  372. }
  373. }
  374. /* Copy values into round key array. */
  375. for (j = 0; (j < KC) && (r < rounds + 1); )
  376. {
  377. for (; (j < KC) && (t < 4); j++, t++)
  378. {
  379. *((u32_a_t*)W[r][t]) = *((u32_a_t*)tk[j]);
  380. }
  381. if (t == 4)
  382. {
  383. r++;
  384. t = 0;
  385. }
  386. }
  387. }
  388. #undef W
  389. }
  390. return 0;
  391. #undef tk
  392. #undef k
  393. }
  394. static gcry_err_code_t
  395. rijndael_setkey (void *context, const byte *key, const unsigned keylen)
  396. {
  397. RIJNDAEL_context *ctx = context;
  398. int rc = do_setkey (ctx, key, keylen);
  399. _gcry_burn_stack ( 100 + 16*sizeof(int));
  400. return rc;
  401. }
  402. /* Make a decryption key from an encryption key. */
  403. static void
  404. prepare_decryption( RIJNDAEL_context *ctx )
  405. {
  406. int r;
  407. #ifdef USE_AESNI
  408. if (ctx->use_aesni)
  409. {
  410. /* The AES-NI decrypt instructions use the Equivalent Inverse
  411. Cipher, thus we can't use the the standard decrypt key
  412. preparation. */
  413. m128i_t *ekey = (m128i_t*)ctx->keyschenc;
  414. m128i_t *dkey = (m128i_t*)ctx->keyschdec;
  415. int rr;
  416. dkey[0] = ekey[ctx->rounds];
  417. for (r=1, rr=ctx->rounds-1; r < ctx->rounds; r++, rr--)
  418. {
  419. asm volatile
  420. ("movdqu %[ekey], %%xmm1\n\t"
  421. /*"aesimc %%xmm1, %%xmm1\n\t"*/
  422. ".byte 0x66, 0x0f, 0x38, 0xdb, 0xc9\n\t"
  423. "movdqu %%xmm1, %[dkey]"
  424. : [dkey] "=m" (dkey[r])
  425. : [ekey] "m" (ekey[rr]) );
  426. }
  427. dkey[r] = ekey[0];
  428. }
  429. else
  430. #endif /*USE_AESNI*/
  431. {
  432. union
  433. {
  434. PROPERLY_ALIGNED_TYPE dummy;
  435. byte *w;
  436. } w;
  437. #define w w.w
  438. for (r=0; r < MAXROUNDS+1; r++ )
  439. {
  440. *((u32_a_t*)ctx->keyschdec[r][0]) = *((u32_a_t*)ctx->keyschenc[r][0]);
  441. *((u32_a_t*)ctx->keyschdec[r][1]) = *((u32_a_t*)ctx->keyschenc[r][1]);
  442. *((u32_a_t*)ctx->keyschdec[r][2]) = *((u32_a_t*)ctx->keyschenc[r][2]);
  443. *((u32_a_t*)ctx->keyschdec[r][3]) = *((u32_a_t*)ctx->keyschenc[r][3]);
  444. }
  445. #define W (ctx->keyschdec)
  446. for (r = 1; r < ctx->rounds; r++)
  447. {
  448. w = W[r][0];
  449. *((u32_a_t*)w) = *((u32_a_t*)U1[w[0]]) ^ *((u32_a_t*)U2[w[1]])
  450. ^ *((u32_a_t*)U3[w[2]]) ^ *((u32_a_t*)U4[w[3]]);
  451. w = W[r][1];
  452. *((u32_a_t*)w) = *((u32_a_t*)U1[w[0]]) ^ *((u32_a_t*)U2[w[1]])
  453. ^ *((u32_a_t*)U3[w[2]]) ^ *((u32_a_t*)U4[w[3]]);
  454. w = W[r][2];
  455. *((u32_a_t*)w) = *((u32_a_t*)U1[w[0]]) ^ *((u32_a_t*)U2[w[1]])
  456. ^ *((u32_a_t*)U3[w[2]]) ^ *((u32_a_t*)U4[w[3]]);
  457. w = W[r][3];
  458. *((u32_a_t*)w) = *((u32_a_t*)U1[w[0]]) ^ *((u32_a_t*)U2[w[1]])
  459. ^ *((u32_a_t*)U3[w[2]]) ^ *((u32_a_t*)U4[w[3]]);
  460. }
  461. #undef W
  462. #undef w
  463. }
  464. }
  465. /* Encrypt one block. A and B need to be aligned on a 4 byte
  466. boundary. A and B may be the same. */
  467. static void
  468. do_encrypt_aligned (const RIJNDAEL_context *ctx,
  469. unsigned char *b, const unsigned char *a)
  470. {
  471. #define rk (ctx->keyschenc)
  472. int rounds = ctx->rounds;
  473. int r;
  474. union
  475. {
  476. u32 tempu32[4]; /* Force correct alignment. */
  477. byte temp[4][4];
  478. } u;
  479. *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(a )) ^ *((u32_a_t*)rk[0][0]);
  480. *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(a+ 4)) ^ *((u32_a_t*)rk[0][1]);
  481. *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(a+ 8)) ^ *((u32_a_t*)rk[0][2]);
  482. *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(a+12)) ^ *((u32_a_t*)rk[0][3]);
  483. *((u32_a_t*)(b )) = (*((u32_a_t*)T1[u.temp[0][0]])
  484. ^ *((u32_a_t*)T2[u.temp[1][1]])
  485. ^ *((u32_a_t*)T3[u.temp[2][2]])
  486. ^ *((u32_a_t*)T4[u.temp[3][3]]));
  487. *((u32_a_t*)(b + 4)) = (*((u32_a_t*)T1[u.temp[1][0]])
  488. ^ *((u32_a_t*)T2[u.temp[2][1]])
  489. ^ *((u32_a_t*)T3[u.temp[3][2]])
  490. ^ *((u32_a_t*)T4[u.temp[0][3]]));
  491. *((u32_a_t*)(b + 8)) = (*((u32_a_t*)T1[u.temp[2][0]])
  492. ^ *((u32_a_t*)T2[u.temp[3][1]])
  493. ^ *((u32_a_t*)T3[u.temp[0][2]])
  494. ^ *((u32_a_t*)T4[u.temp[1][3]]));
  495. *((u32_a_t*)(b +12)) = (*((u32_a_t*)T1[u.temp[3][0]])
  496. ^ *((u32_a_t*)T2[u.temp[0][1]])
  497. ^ *((u32_a_t*)T3[u.temp[1][2]])
  498. ^ *((u32_a_t*)T4[u.temp[2][3]]));
  499. for (r = 1; r < rounds-1; r++)
  500. {
  501. *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b )) ^ *((u32_a_t*)rk[r][0]);
  502. *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[r][1]);
  503. *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[r][2]);
  504. *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[r][3]);
  505. *((u32_a_t*)(b )) = (*((u32_a_t*)T1[u.temp[0][0]])
  506. ^ *((u32_a_t*)T2[u.temp[1][1]])
  507. ^ *((u32_a_t*)T3[u.temp[2][2]])
  508. ^ *((u32_a_t*)T4[u.temp[3][3]]));
  509. *((u32_a_t*)(b + 4)) = (*((u32_a_t*)T1[u.temp[1][0]])
  510. ^ *((u32_a_t*)T2[u.temp[2][1]])
  511. ^ *((u32_a_t*)T3[u.temp[3][2]])
  512. ^ *((u32_a_t*)T4[u.temp[0][3]]));
  513. *((u32_a_t*)(b + 8)) = (*((u32_a_t*)T1[u.temp[2][0]])
  514. ^ *((u32_a_t*)T2[u.temp[3][1]])
  515. ^ *((u32_a_t*)T3[u.temp[0][2]])
  516. ^ *((u32_a_t*)T4[u.temp[1][3]]));
  517. *((u32_a_t*)(b +12)) = (*((u32_a_t*)T1[u.temp[3][0]])
  518. ^ *((u32_a_t*)T2[u.temp[0][1]])
  519. ^ *((u32_a_t*)T3[u.temp[1][2]])
  520. ^ *((u32_a_t*)T4[u.temp[2][3]]));
  521. }
  522. /* Last round is special. */
  523. *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b )) ^ *((u32_a_t*)rk[rounds-1][0]);
  524. *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[rounds-1][1]);
  525. *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[rounds-1][2]);
  526. *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[rounds-1][3]);
  527. b[ 0] = T1[u.temp[0][0]][1];
  528. b[ 1] = T1[u.temp[1][1]][1];
  529. b[ 2] = T1[u.temp[2][2]][1];
  530. b[ 3] = T1[u.temp[3][3]][1];
  531. b[ 4] = T1[u.temp[1][0]][1];
  532. b[ 5] = T1[u.temp[2][1]][1];
  533. b[ 6] = T1[u.temp[3][2]][1];
  534. b[ 7] = T1[u.temp[0][3]][1];
  535. b[ 8] = T1[u.temp[2][0]][1];
  536. b[ 9] = T1[u.temp[3][1]][1];
  537. b[10] = T1[u.temp[0][2]][1];
  538. b[11] = T1[u.temp[1][3]][1];
  539. b[12] = T1[u.temp[3][0]][1];
  540. b[13] = T1[u.temp[0][1]][1];
  541. b[14] = T1[u.temp[1][2]][1];
  542. b[15] = T1[u.temp[2][3]][1];
  543. *((u32_a_t*)(b )) ^= *((u32_a_t*)rk[rounds][0]);
  544. *((u32_a_t*)(b+ 4)) ^= *((u32_a_t*)rk[rounds][1]);
  545. *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[rounds][2]);
  546. *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[rounds][3]);
  547. #undef rk
  548. }
  549. static void
  550. do_encrypt (const RIJNDAEL_context *ctx,
  551. unsigned char *bx, const unsigned char *ax)
  552. {
  553. /* BX and AX are not necessary correctly aligned. Thus we might
  554. need to copy them here. We try to align to a 16 bytes. */
  555. if (((size_t)ax & 0x0f) || ((size_t)bx & 0x0f))
  556. {
  557. union
  558. {
  559. u32 dummy[4];
  560. byte a[16] ATTR_ALIGNED_16;
  561. } a;
  562. union
  563. {
  564. u32 dummy[4];
  565. byte b[16] ATTR_ALIGNED_16;
  566. } b;
  567. memcpy (a.a, ax, 16);
  568. do_encrypt_aligned (ctx, b.b, a.a);
  569. memcpy (bx, b.b, 16);
  570. }
  571. else
  572. {
  573. do_encrypt_aligned (ctx, bx, ax);
  574. }
  575. }
  576. /* Encrypt or decrypt one block using the padlock engine. A and B may
  577. be the same. */
  578. #ifdef USE_PADLOCK
  579. static void
  580. do_padlock (const RIJNDAEL_context *ctx, int decrypt_flag,
  581. unsigned char *bx, const unsigned char *ax)
  582. {
  583. /* BX and AX are not necessary correctly aligned. Thus we need to
  584. copy them here. */
  585. unsigned char a[16] __attribute__ ((aligned (16)));
  586. unsigned char b[16] __attribute__ ((aligned (16)));
  587. unsigned int cword[4] __attribute__ ((aligned (16)));
  588. /* The control word fields are:
  589. 127:12 11:10 9 8 7 6 5 4 3:0
  590. RESERVED KSIZE CRYPT INTER KEYGN CIPHR ALIGN DGEST ROUND */
  591. cword[0] = (ctx->rounds & 15); /* (The mask is just a safeguard.) */
  592. cword[1] = 0;
  593. cword[2] = 0;
  594. cword[3] = 0;
  595. if (decrypt_flag)
  596. cword[0] |= 0x00000200;
  597. memcpy (a, ax, 16);
  598. asm volatile
  599. ("pushfl\n\t" /* Force key reload. */
  600. "popfl\n\t"
  601. "xchg %3, %%ebx\n\t" /* Load key. */
  602. "movl $1, %%ecx\n\t" /* Init counter for just one block. */
  603. ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XSTORE ECB. */
  604. "xchg %3, %%ebx\n" /* Restore GOT register. */
  605. : /* No output */
  606. : "S" (a), "D" (b), "d" (cword), "r" (ctx->padlockkey)
  607. : "%ecx", "cc", "memory"
  608. );
  609. memcpy (bx, b, 16);
  610. }
  611. #endif /*USE_PADLOCK*/
  612. #ifdef USE_AESNI
  613. /* Encrypt one block using the Intel AES-NI instructions. A and B may
  614. be the same; they need to be properly aligned to 16 bytes.
  615. Our problem here is that gcc does not allow the "x" constraint for
  616. SSE registers in asm unless you compile with -msse. The common
  617. wisdom is to use a separate file for SSE instructions and build it
  618. separately. This would require a lot of extra build system stuff,
  619. similar to what we do in mpi/ for the asm stuff. What we do
  620. instead is to use standard registers and a bit more of plain asm
  621. which copies the data and key stuff to the SSE registers and later
  622. back. If we decide to implement some block modes with parallelized
  623. AES instructions, it might indeed be better to use plain asm ala
  624. mpi/. */
  625. static void
  626. do_aesni_enc_aligned (const RIJNDAEL_context *ctx,
  627. unsigned char *b, const unsigned char *a)
  628. {
  629. #define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
  630. #define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
  631. /* Note: For now we relax the alignment requirement for A and B: It
  632. does not make much difference because in many case we would need
  633. to memcpy them to an extra buffer; using the movdqu is much faster
  634. that memcpy and movdqa. For CFB we know that the IV is properly
  635. aligned but that is a special case. We should better implement
  636. CFB direct in asm. */
  637. asm volatile ("movdqu %[src], %%xmm0\n\t" /* xmm0 := *a */
  638. "movl %[key], %%esi\n\t" /* esi := keyschenc */
  639. "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */
  640. "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */
  641. "movdqa 0x10(%%esi), %%xmm1\n\t"
  642. aesenc_xmm1_xmm0
  643. "movdqa 0x20(%%esi), %%xmm1\n\t"
  644. aesenc_xmm1_xmm0
  645. "movdqa 0x30(%%esi), %%xmm1\n\t"
  646. aesenc_xmm1_xmm0
  647. "movdqa 0x40(%%esi), %%xmm1\n\t"
  648. aesenc_xmm1_xmm0
  649. "movdqa 0x50(%%esi), %%xmm1\n\t"
  650. aesenc_xmm1_xmm0
  651. "movdqa 0x60(%%esi), %%xmm1\n\t"
  652. aesenc_xmm1_xmm0
  653. "movdqa 0x70(%%esi), %%xmm1\n\t"
  654. aesenc_xmm1_xmm0
  655. "movdqa 0x80(%%esi), %%xmm1\n\t"
  656. aesenc_xmm1_xmm0
  657. "movdqa 0x90(%%esi), %%xmm1\n\t"
  658. aesenc_xmm1_xmm0
  659. "movdqa 0xa0(%%esi), %%xmm1\n\t"
  660. "cmp $10, %[rounds]\n\t"
  661. "jz .Lenclast%=\n\t"
  662. aesenc_xmm1_xmm0
  663. "movdqa 0xb0(%%esi), %%xmm1\n\t"
  664. aesenc_xmm1_xmm0
  665. "movdqa 0xc0(%%esi), %%xmm1\n\t"
  666. "cmp $12, %[rounds]\n\t"
  667. "jz .Lenclast%=\n\t"
  668. aesenc_xmm1_xmm0
  669. "movdqa 0xd0(%%esi), %%xmm1\n\t"
  670. aesenc_xmm1_xmm0
  671. "movdqa 0xe0(%%esi), %%xmm1\n"
  672. ".Lenclast%=:\n\t"
  673. aesenclast_xmm1_xmm0
  674. "movdqu %%xmm0, %[dst]\n"
  675. : [dst] "=m" (*b)
  676. : [src] "m" (*a),
  677. [key] "r" (ctx->keyschenc),
  678. [rounds] "r" (ctx->rounds)
  679. : "%esi", "cc", "memory");
  680. #undef aesenc_xmm1_xmm0
  681. #undef aesenclast_xmm1_xmm0
  682. }
  683. static void
  684. do_aesni_dec_aligned (const RIJNDAEL_context *ctx,
  685. unsigned char *b, const unsigned char *a)
  686. {
  687. #define aesdec_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xde, 0xc1\n\t"
  688. #define aesdeclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xc1\n\t"
  689. asm volatile ("movdqu %[src], %%xmm0\n\t" /* xmm0 := *a */
  690. "movl %[key], %%esi\n\t"
  691. "movdqa (%%esi), %%xmm1\n\t"
  692. "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */
  693. "movdqa 0x10(%%esi), %%xmm1\n\t"
  694. aesdec_xmm1_xmm0
  695. "movdqa 0x20(%%esi), %%xmm1\n\t"
  696. aesdec_xmm1_xmm0
  697. "movdqa 0x30(%%esi), %%xmm1\n\t"
  698. aesdec_xmm1_xmm0
  699. "movdqa 0x40(%%esi), %%xmm1\n\t"
  700. aesdec_xmm1_xmm0
  701. "movdqa 0x50(%%esi), %%xmm1\n\t"
  702. aesdec_xmm1_xmm0
  703. "movdqa 0x60(%%esi), %%xmm1\n\t"
  704. aesdec_xmm1_xmm0
  705. "movdqa 0x70(%%esi), %%xmm1\n\t"
  706. aesdec_xmm1_xmm0
  707. "movdqa 0x80(%%esi), %%xmm1\n\t"
  708. aesdec_xmm1_xmm0
  709. "movdqa 0x90(%%esi), %%xmm1\n\t"
  710. aesdec_xmm1_xmm0
  711. "movdqa 0xa0(%%esi), %%xmm1\n\t"
  712. "cmp $10, %[rounds]\n\t"
  713. "jz .Ldeclast%=\n\t"
  714. aesdec_xmm1_xmm0
  715. "movdqa 0xb0(%%esi), %%xmm1\n\t"
  716. aesdec_xmm1_xmm0
  717. "movdqa 0xc0(%%esi), %%xmm1\n\t"
  718. "cmp $12, %[rounds]\n\t"
  719. "jz .Ldeclast%=\n\t"
  720. aesdec_xmm1_xmm0
  721. "movdqa 0xd0(%%esi), %%xmm1\n\t"
  722. aesdec_xmm1_xmm0
  723. "movdqa 0xe0(%%esi), %%xmm1\n"
  724. ".Ldeclast%=:\n\t"
  725. aesdeclast_xmm1_xmm0
  726. "movdqu %%xmm0, %[dst]\n"
  727. : [dst] "=m" (*b)
  728. : [src] "m" (*a),
  729. [key] "r" (ctx->keyschdec),
  730. [rounds] "r" (ctx->rounds)
  731. : "%esi", "cc", "memory");
  732. #undef aesdec_xmm1_xmm0
  733. #undef aesdeclast_xmm1_xmm0
  734. }
  735. /* Perform a CFB encryption or decryption round using the
  736. initialization vector IV and the input block A. Write the result
  737. to the output block B and update IV. IV needs to be 16 byte
  738. aligned. */
  739. static void
  740. do_aesni_cfb (const RIJNDAEL_context *ctx, int decrypt_flag,
  741. unsigned char *iv, unsigned char *b, const unsigned char *a)
  742. {
  743. #define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
  744. #define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
  745. asm volatile ("movdqa %[iv], %%xmm0\n\t" /* xmm0 := IV */
  746. "movl %[key], %%esi\n\t" /* esi := keyschenc */
  747. "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */
  748. "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */
  749. "movdqa 0x10(%%esi), %%xmm1\n\t"
  750. aesenc_xmm1_xmm0
  751. "movdqa 0x20(%%esi), %%xmm1\n\t"
  752. aesenc_xmm1_xmm0
  753. "movdqa 0x30(%%esi), %%xmm1\n\t"
  754. aesenc_xmm1_xmm0
  755. "movdqa 0x40(%%esi), %%xmm1\n\t"
  756. aesenc_xmm1_xmm0
  757. "movdqa 0x50(%%esi), %%xmm1\n\t"
  758. aesenc_xmm1_xmm0
  759. "movdqa 0x60(%%esi), %%xmm1\n\t"
  760. aesenc_xmm1_xmm0
  761. "movdqa 0x70(%%esi), %%xmm1\n\t"
  762. aesenc_xmm1_xmm0
  763. "movdqa 0x80(%%esi), %%xmm1\n\t"
  764. aesenc_xmm1_xmm0
  765. "movdqa 0x90(%%esi), %%xmm1\n\t"
  766. aesenc_xmm1_xmm0
  767. "movdqa 0xa0(%%esi), %%xmm1\n\t"
  768. "cmp $10, %[rounds]\n\t"
  769. "jz .Lenclast%=\n\t"
  770. aesenc_xmm1_xmm0
  771. "movdqa 0xb0(%%esi), %%xmm1\n\t"
  772. aesenc_xmm1_xmm0
  773. "movdqa 0xc0(%%esi), %%xmm1\n\t"
  774. "cmp $12, %[rounds]\n\t"
  775. "jz .Lenclast%=\n\t"
  776. aesenc_xmm1_xmm0
  777. "movdqa 0xd0(%%esi), %%xmm1\n\t"
  778. aesenc_xmm1_xmm0
  779. "movdqa 0xe0(%%esi), %%xmm1\n"
  780. ".Lenclast%=:\n\t"
  781. aesenclast_xmm1_xmm0
  782. "movdqu %[src], %%xmm1\n\t" /* Save input. */
  783. "pxor %%xmm1, %%xmm0\n\t" /* xmm0 = input ^ IV */
  784. "cmp $1, %[decrypt]\n\t"
  785. "jz .Ldecrypt_%=\n\t"
  786. "movdqa %%xmm0, %[iv]\n\t" /* [encrypt] Store IV. */
  787. "jmp .Lleave_%=\n"
  788. ".Ldecrypt_%=:\n\t"
  789. "movdqa %%xmm1, %[iv]\n" /* [decrypt] Store IV. */
  790. ".Lleave_%=:\n\t"
  791. "movdqu %%xmm0, %[dst]\n" /* Store output. */
  792. : [iv] "+m" (*iv), [dst] "=m" (*b)
  793. : [src] "m" (*a),
  794. [key] "g" (ctx->keyschenc),
  795. [rounds] "g" (ctx->rounds),
  796. [decrypt] "m" (decrypt_flag)
  797. : "%esi", "cc", "memory");
  798. #undef aesenc_xmm1_xmm0
  799. #undef aesenclast_xmm1_xmm0
  800. }
  801. /* Perform a CTR encryption round using the counter CTR and the input
  802. block A. Write the result to the output block B and update CTR.
  803. CTR needs to be a 16 byte aligned little-endian value. */
  804. static void
  805. do_aesni_ctr (const RIJNDAEL_context *ctx,
  806. unsigned char *ctr, unsigned char *b, const unsigned char *a)
  807. {
  808. #define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
  809. #define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
  810. static unsigned char be_mask[16] __attribute__ ((aligned (16))) =
  811. { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
  812. asm volatile ("movdqa %[ctr], %%xmm0\n\t" /* xmm0, xmm2 := CTR */
  813. "movaps %%xmm0, %%xmm2\n\t"
  814. "mov $1, %%esi\n\t" /* xmm2++ (big-endian) */
  815. "movd %%esi, %%xmm1\n\t"
  816. "pshufb %[mask], %%xmm2\n\t"
  817. "paddq %%xmm1, %%xmm2\n\t"
  818. "pshufb %[mask], %%xmm2\n\t"
  819. "movdqa %%xmm2, %[ctr]\n" /* Update CTR. */
  820. "movl %[key], %%esi\n\t" /* esi := keyschenc */
  821. "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */
  822. "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */
  823. "movdqa 0x10(%%esi), %%xmm1\n\t"
  824. aesenc_xmm1_xmm0
  825. "movdqa 0x20(%%esi), %%xmm1\n\t"
  826. aesenc_xmm1_xmm0
  827. "movdqa 0x30(%%esi), %%xmm1\n\t"
  828. aesenc_xmm1_xmm0
  829. "movdqa 0x40(%%esi), %%xmm1\n\t"
  830. aesenc_xmm1_xmm0
  831. "movdqa 0x50(%%esi), %%xmm1\n\t"
  832. aesenc_xmm1_xmm0
  833. "movdqa 0x60(%%esi), %%xmm1\n\t"
  834. aesenc_xmm1_xmm0
  835. "movdqa 0x70(%%esi), %%xmm1\n\t"
  836. aesenc_xmm1_xmm0
  837. "movdqa 0x80(%%esi), %%xmm1\n\t"
  838. aesenc_xmm1_xmm0
  839. "movdqa 0x90(%%esi), %%xmm1\n\t"
  840. aesenc_xmm1_xmm0
  841. "movdqa 0xa0(%%esi), %%xmm1\n\t"
  842. "cmp $10, %[rounds]\n\t"
  843. "jz .Lenclast%=\n\t"
  844. aesenc_xmm1_xmm0
  845. "movdqa 0xb0(%%esi), %%xmm1\n\t"
  846. aesenc_xmm1_xmm0
  847. "movdqa 0xc0(%%esi), %%xmm1\n\t"
  848. "cmp $12, %[rounds]\n\t"
  849. "jz .Lenclast%=\n\t"
  850. aesenc_xmm1_xmm0
  851. "movdqa 0xd0(%%esi), %%xmm1\n\t"
  852. aesenc_xmm1_xmm0
  853. "movdqa 0xe0(%%esi), %%xmm1\n"
  854. ".Lenclast%=:\n\t"
  855. aesenclast_xmm1_xmm0
  856. "movdqu %[src], %%xmm1\n\t" /* xmm1 := input */
  857. "pxor %%xmm1, %%xmm0\n\t" /* EncCTR ^= input */
  858. "movdqu %%xmm0, %[dst]" /* Store EncCTR. */
  859. : [ctr] "+m" (*ctr), [dst] "=m" (*b)
  860. : [src] "m" (*a),
  861. [key] "g" (ctx->keyschenc),
  862. [rounds] "g" (ctx->rounds),
  863. [mask] "m" (*be_mask)
  864. : "%esi", "cc", "memory");
  865. #undef aesenc_xmm1_xmm0
  866. #undef aesenclast_xmm1_xmm0
  867. }
  868. /* Four blocks at a time variant of do_aesni_ctr. */
  869. static void
  870. do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
  871. unsigned char *ctr, unsigned char *b, const unsigned char *a)
  872. {
  873. #define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
  874. #define aesenc_xmm1_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd1\n\t"
  875. #define aesenc_xmm1_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd9\n\t"
  876. #define aesenc_xmm1_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xe1\n\t"
  877. #define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
  878. #define aesenclast_xmm1_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd1\n\t"
  879. #define aesenclast_xmm1_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd9\n\t"
  880. #define aesenclast_xmm1_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xe1\n\t"
  881. static unsigned char be_mask[16] __attribute__ ((aligned (16))) =
  882. { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
  883. /* Register usage:
  884. esi keyschedule
  885. xmm0 CTR-0
  886. xmm1 temp / round key
  887. xmm2 CTR-1
  888. xmm3 CTR-2
  889. xmm4 CTR-3
  890. xmm5 temp
  891. */
  892. asm volatile ("movdqa %[ctr], %%xmm0\n\t" /* xmm0, xmm2 := CTR */
  893. "movaps %%xmm0, %%xmm2\n\t"
  894. "mov $1, %%esi\n\t" /* xmm1 := 1 */
  895. "movd %%esi, %%xmm1\n\t"
  896. "pshufb %[mask], %%xmm2\n\t" /* xmm2 := le(xmm2) */
  897. "paddq %%xmm1, %%xmm2\n\t" /* xmm2++ */
  898. "movaps %%xmm2, %%xmm3\n\t" /* xmm3 := xmm2 */
  899. "paddq %%xmm1, %%xmm3\n\t" /* xmm3++ */
  900. "movaps %%xmm3, %%xmm4\n\t" /* xmm4 := xmm3 */
  901. "paddq %%xmm1, %%xmm4\n\t" /* xmm4++ */
  902. "movaps %%xmm4, %%xmm5\n\t" /* xmm5 := xmm4 */
  903. "paddq %%xmm1, %%xmm5\n\t" /* xmm5++ */
  904. "pshufb %[mask], %%xmm2\n\t" /* xmm2 := be(xmm2) */
  905. "pshufb %[mask], %%xmm3\n\t" /* xmm3 := be(xmm3) */
  906. "pshufb %[mask], %%xmm4\n\t" /* xmm4 := be(xmm4) */
  907. "pshufb %[mask], %%xmm5\n\t" /* xmm5 := be(xmm5) */
  908. "movdqa %%xmm5, %[ctr]\n" /* Update CTR. */
  909. "movl %[key], %%esi\n\t" /* esi := keyschenc */
  910. "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */
  911. "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */
  912. "pxor %%xmm1, %%xmm2\n\t" /* xmm2 ^= key[0] */
  913. "pxor %%xmm1, %%xmm3\n\t" /* xmm3 ^= key[0] */
  914. "pxor %%xmm1, %%xmm4\n\t" /* xmm4 ^= key[0] */
  915. "movdqa 0x10(%%esi), %%xmm1\n\t"
  916. aesenc_xmm1_xmm0
  917. aesenc_xmm1_xmm2
  918. aesenc_xmm1_xmm3
  919. aesenc_xmm1_xmm4
  920. "movdqa 0x20(%%esi), %%xmm1\n\t"
  921. aesenc_xmm1_xmm0
  922. aesenc_xmm1_xmm2
  923. aesenc_xmm1_xmm3
  924. aesenc_xmm1_xmm4
  925. "movdqa 0x30(%%esi), %%xmm1\n\t"
  926. aesenc_xmm1_xmm0
  927. aesenc_xmm1_xmm2
  928. aesenc_xmm1_xmm3
  929. aesenc_xmm1_xmm4
  930. "movdqa 0x40(%%esi), %%xmm1\n\t"
  931. aesenc_xmm1_xmm0
  932. aesenc_xmm1_xmm2
  933. aesenc_xmm1_xmm3
  934. aesenc_xmm1_xmm4
  935. "movdqa 0x50(%%esi), %%xmm1\n\t"
  936. aesenc_xmm1_xmm0
  937. aesenc_xmm1_xmm2
  938. aesenc_xmm1_xmm3
  939. aesenc_xmm1_xmm4
  940. "movdqa 0x60(%%esi), %%xmm1\n\t"
  941. aesenc_xmm1_xmm0
  942. aesenc_xmm1_xmm2
  943. aesenc_xmm1_xmm3
  944. aesenc_xmm1_xmm4
  945. "movdqa 0x70(%%esi), %%xmm1\n\t"
  946. aesenc_xmm1_xmm0
  947. aesenc_xmm1_xmm2
  948. aesenc_xmm1_xmm3
  949. aesenc_xmm1_xmm4
  950. "movdqa 0x80(%%esi), %%xmm1\n\t"
  951. aesenc_xmm1_xmm0
  952. aesenc_xmm1_xmm2
  953. aesenc_xmm1_xmm3
  954. aesenc_xmm1_xmm4
  955. "movdqa 0x90(%%esi), %%xmm1\n\t"
  956. aesenc_xmm1_xmm0
  957. aesenc_xmm1_xmm2
  958. aesenc_xmm1_xmm3
  959. aesenc_xmm1_xmm4
  960. "movdqa 0xa0(%%esi), %%xmm1\n\t"
  961. "cmp $10, %[rounds]\n\t"
  962. "jz .Lenclast%=\n\t"
  963. aesenc_xmm1_xmm0
  964. aesenc_xmm1_xmm2
  965. aesenc_xmm1_xmm3
  966. aesenc_xmm1_xmm4
  967. "movdqa 0xb0(%%esi), %%xmm1\n\t"
  968. aesenc_xmm1_xmm0
  969. aesenc_xmm1_xmm2
  970. aesenc_xmm1_xmm3
  971. aesenc_xmm1_xmm4
  972. "movdqa 0xc0(%%esi), %%xmm1\n\t"
  973. "cmp $12, %[rounds]\n\t"
  974. "jz .Lenclast%=\n\t"
  975. aesenc_xmm1_xmm0
  976. aesenc_xmm1_xmm2
  977. aesenc_xmm1_xmm3
  978. aesenc_xmm1_xmm4
  979. "movdqa 0xd0(%%esi), %%xmm1\n\t"
  980. aesenc_xmm1_xmm0
  981. aesenc_xmm1_xmm2
  982. aesenc_xmm1_xmm3
  983. aesenc_xmm1_xmm4
  984. "movdqa 0xe0(%%esi), %%xmm1\n"
  985. ".Lenclast%=:\n\t"
  986. aesenclast_xmm1_xmm0
  987. aesenclast_xmm1_xmm2
  988. aesenclast_xmm1_xmm3
  989. aesenclast_xmm1_xmm4
  990. "movdqu %[src], %%xmm1\n\t" /* Get block 1. */
  991. "pxor %%xmm1, %%xmm0\n\t" /* EncCTR-1 ^= input */
  992. "movdqu %%xmm0, %[dst]\n\t" /* Store block 1 */
  993. "movdqu (16)%[src], %%xmm1\n\t" /* Get block 2. */
  994. "pxor %%xmm1, %%xmm2\n\t" /* EncCTR-2 ^= input */
  995. "movdqu %%xmm2, (16)%[dst]\n\t" /* Store block 2. */
  996. "movdqu (32)%[src], %%xmm1\n\t" /* Get block 3. */
  997. "pxor %%xmm1, %%xmm3\n\t" /* EncCTR-3 ^= input */
  998. "movdqu %%xmm3, (32)%[dst]\n\t" /* Store block 3. */
  999. "movdqu (48)%[src], %%xmm1\n\t" /* Get block 4. */
  1000. "pxor %%xmm1, %%xmm4\n\t" /* EncCTR-4 ^= input */
  1001. "movdqu %%xmm4, (48)%[dst]" /* Store block 4. */
  1002. : [ctr] "+m" (*ctr), [dst] "=m" (*b)
  1003. : [src] "m" (*a),
  1004. [key] "g" (ctx->keyschenc),
  1005. [rounds] "g" (ctx->rounds),
  1006. [mask] "m" (*be_mask)
  1007. : "%esi", "cc", "memory");
  1008. #undef aesenc_xmm1_xmm0
  1009. #undef aesenc_xmm1_xmm2
  1010. #undef aesenc_xmm1_xmm3
  1011. #undef aesenc_xmm1_xmm4
  1012. #undef aesenclast_xmm1_xmm0
  1013. #undef aesenclast_xmm1_xmm2
  1014. #undef aesenclast_xmm1_xmm3
  1015. #undef aesenclast_xmm1_xmm4
  1016. }
  1017. static void
  1018. do_aesni (RIJNDAEL_context *ctx, int decrypt_flag,
  1019. unsigned char *bx, const unsigned char *ax)
  1020. {
  1021. if (decrypt_flag)
  1022. {
  1023. if (!ctx->decryption_prepared )
  1024. {
  1025. prepare_decryption ( ctx );
  1026. ctx->decryption_prepared = 1;
  1027. }
  1028. do_aesni_dec_aligned (ctx, bx, ax);
  1029. }
  1030. else
  1031. do_aesni_enc_aligned (ctx, bx, ax);
  1032. }
  1033. #endif /*USE_AESNI*/
  1034. static void
  1035. rijndael_encrypt (void *context, byte *b, const byte *a)
  1036. {
  1037. RIJNDAEL_context *ctx = context;
  1038. if (0)
  1039. ;
  1040. #ifdef USE_PADLOCK
  1041. else if (ctx->use_padlock)
  1042. {
  1043. do_padlock (ctx, 0, b, a);
  1044. _gcry_burn_stack (48 + 15 /* possible padding for alignment */);
  1045. }
  1046. #endif /*USE_PADLOCK*/
  1047. #ifdef USE_AESNI
  1048. else if (ctx->use_aesni)
  1049. {
  1050. aesni_prepare ();
  1051. do_aesni (ctx, 0, b, a);
  1052. aesni_cleanup ();
  1053. }
  1054. #endif /*USE_AESNI*/
  1055. else
  1056. {
  1057. do_encrypt (ctx, b, a);
  1058. _gcry_burn_stack (56 + 2*sizeof(int));
  1059. }
  1060. }
  1061. /* Bulk encryption of complete blocks in CFB mode. Caller needs to
  1062. make sure that IV is aligned on an unsigned long boundary. This
  1063. function is only intended for the bulk encryption feature of
  1064. cipher.c. */
  1065. void
  1066. _gcry_aes_cfb_enc (void *context, unsigned char *iv,
  1067. void *outbuf_arg, const void *inbuf_arg,
  1068. unsigned int nblocks)
  1069. {
  1070. RIJNDAEL_context *ctx = context;
  1071. unsigned char *outbuf = outbuf_arg;
  1072. const unsigned char *inbuf = inbuf_arg;
  1073. unsigned char *ivp;
  1074. int i;
  1075. if (0)
  1076. ;
  1077. #ifdef USE_PADLOCK
  1078. else if (ctx->use_padlock)
  1079. {
  1080. /* Fixme: Let Padlock do the CFBing. */
  1081. for ( ;nblocks; nblocks-- )
  1082. {
  1083. /* Encrypt the IV. */
  1084. do_padlock (ctx, 0, iv, iv);
  1085. /* XOR the input with the IV and store input into IV. */
  1086. for (ivp=iv,i=0; i < BLOCKSIZE; i++ )
  1087. *outbuf++ = (*ivp++ ^= *inbuf++);
  1088. }
  1089. }
  1090. #endif /*USE_PADLOCK*/
  1091. #ifdef USE_AESNI
  1092. else if (ctx->use_aesni)
  1093. {
  1094. aesni_prepare ();
  1095. for ( ;nblocks; nblocks-- )
  1096. {
  1097. do_aesni_cfb (ctx, 0, iv, outbuf, inbuf);
  1098. outbuf += BLOCKSIZE;
  1099. inbuf += BLOCKSIZE;
  1100. }
  1101. aesni_cleanup ();
  1102. }
  1103. #endif /*USE_AESNI*/
  1104. else
  1105. {
  1106. for ( ;nblocks; nblocks-- )
  1107. {
  1108. /* Encrypt the IV. */
  1109. do_encrypt_aligned (ctx, iv, iv);
  1110. /* XOR the input with the IV and store input into IV. */
  1111. for (ivp=iv,i=0; i < BLOCKSIZE; i++ )
  1112. *outbuf++ = (*ivp++ ^= *inbuf++);
  1113. }
  1114. }
  1115. _gcry_burn_stack (48 + 2*sizeof(int));
  1116. }
  1117. /* Bulk encryption of complete blocks in CBC mode. Caller needs to
  1118. make sure that IV is aligned on an unsigned long boundary. This
  1119. function is only intended for the bulk encryption feature of
  1120. cipher.c. */
  1121. void
  1122. _gcry_aes_cbc_enc (void *context, unsigned char *iv,
  1123. void *outbuf_arg, const void *inbuf_arg,
  1124. unsigned int nblocks, int cbc_mac)
  1125. {
  1126. RIJNDAEL_context *ctx = context;
  1127. unsigned char *outbuf = outbuf_arg;
  1128. const unsigned char *inbuf = inbuf_arg;
  1129. unsigned char *ivp;
  1130. int i;
  1131. #ifdef USE_AESNI
  1132. if (ctx->use_aesni)
  1133. aesni_prepare ();
  1134. #endif /*USE_AESNI*/
  1135. for ( ;nblocks; nblocks-- )
  1136. {
  1137. for (ivp=iv, i=0; i < BLOCKSIZE; i++ )
  1138. outbuf[i] = inbuf[i] ^ *ivp++;
  1139. if (0)
  1140. ;
  1141. #ifdef USE_PADLOCK
  1142. else if (ctx->use_padlock)
  1143. do_padlock (ctx, 0, outbuf, outbuf);
  1144. #endif /*USE_PADLOCK*/
  1145. #ifdef USE_AESNI
  1146. else if (ctx->use_aesni)
  1147. do_aesni (ctx, 0, outbuf, outbuf);
  1148. #endif /*USE_AESNI*/
  1149. else
  1150. do_encrypt (ctx, outbuf, outbuf );
  1151. memcpy (iv, outbuf, BLOCKSIZE);
  1152. inbuf += BLOCKSIZE;
  1153. if (!cbc_mac)
  1154. outbuf += BLOCKSIZE;
  1155. }
  1156. #ifdef USE_AESNI
  1157. if (ctx->use_aesni)
  1158. aesni_cleanup ();
  1159. #endif /*USE_AESNI*/
  1160. _gcry_burn_stack (48 + 2*sizeof(int));
  1161. }
  1162. /* Bulk encryption of complete blocks in CTR mode. Caller needs to
  1163. make sure that CTR is aligned on a 16 byte boundary if AESNI; the
  1164. minimum alignment is for an u32. This function is only intended
  1165. for the bulk encryption feature of cipher.c. CTR is expected to be
  1166. of size BLOCKSIZE. */
  1167. void
  1168. _gcry_aes_ctr_enc (void *context, unsigned char *ctr,
  1169. void *outbuf_arg, const void *inbuf_arg,
  1170. unsigned int nblocks)
  1171. {
  1172. RIJNDAEL_context *ctx = context;
  1173. unsigned char *outbuf = outbuf_arg;
  1174. const unsigned char *inbuf = inbuf_arg;
  1175. unsigned char *p;
  1176. int i;
  1177. if (0)
  1178. ;
  1179. #ifdef USE_AESNI
  1180. else if (ctx->use_aesni)
  1181. {
  1182. aesni_prepare ();
  1183. for ( ;nblocks > 3 ; nblocks -= 4 )
  1184. {
  1185. do_aesni_ctr_4 (ctx, ctr, outbuf, inbuf);
  1186. outbuf += 4*BLOCKSIZE;
  1187. inbuf += 4*BLOCKSIZE;
  1188. }
  1189. for ( ;nblocks; nblocks-- )
  1190. {
  1191. do_aesni_ctr (ctx, ctr, outbuf, inbuf);
  1192. outbuf += BLOCKSIZE;
  1193. inbuf += BLOCKSIZE;
  1194. }
  1195. aesni_cleanup ();
  1196. aesni_cleanup_2_4 ();
  1197. }
  1198. #endif /*USE_AESNI*/
  1199. else
  1200. {
  1201. union { unsigned char x1[16]; u32 x32[4]; } tmp;
  1202. for ( ;nblocks; nblocks-- )
  1203. {
  1204. /* Encrypt the counter. */
  1205. do_encrypt_aligned (ctx, tmp.x1, ctr);
  1206. /* XOR the input with the encrypted counter and store in output. */
  1207. for (p=tmp.x1, i=0; i < BLOCKSIZE; i++)
  1208. *outbuf++ = (*p++ ^= *inbuf++);
  1209. /* Increment the counter. */
  1210. for (i = BLOCKSIZE; i > 0; i--)
  1211. {
  1212. ctr[i-1]++;
  1213. if (ctr[i-1])
  1214. break;
  1215. }
  1216. }
  1217. }
  1218. _gcry_burn_stack (48 + 2*sizeof(int));
  1219. }
  1220. /* Decrypt one block. A and B need to be aligned on a 4 byte boundary
  1221. and the decryption must have been prepared. A and B may be the
  1222. same. */
  1223. static void
  1224. do_decrypt_aligned (RIJNDAEL_context *ctx,
  1225. unsigned char *b, const unsigned char *a)
  1226. {
  1227. #define rk (ctx->keyschdec)
  1228. int rounds = ctx->rounds;
  1229. int r;
  1230. union
  1231. {
  1232. u32 tempu32[4]; /* Force correct alignment. */
  1233. byte temp[4][4];
  1234. } u;
  1235. *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(a )) ^ *((u32_a_t*)rk[rounds][0]);
  1236. *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(a+ 4)) ^ *((u32_a_t*)rk[rounds][1]);
  1237. *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(a+ 8)) ^ *((u32_a_t*)rk[rounds][2]);
  1238. *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(a+12)) ^ *((u32_a_t*)rk[rounds][3]);
  1239. *((u32_a_t*)(b )) = (*((u32_a_t*)T5[u.temp[0][0]])
  1240. ^ *((u32_a_t*)T6[u.temp[3][1]])
  1241. ^ *((u32_a_t*)T7[u.temp[2][2]])
  1242. ^ *((u32_a_t*)T8[u.temp[1][3]]));
  1243. *((u32_a_t*)(b+ 4)) = (*((u32_a_t*)T5[u.temp[1][0]])
  1244. ^ *((u32_a_t*)T6[u.temp[0][1]])
  1245. ^ *((u32_a_t*)T7[u.temp[3][2]])
  1246. ^ *((u32_a_t*)T8[u.temp[2][3]]));
  1247. *((u32_a_t*)(b+ 8)) = (*((u32_a_t*)T5[u.temp[2][0]])
  1248. ^ *((u32_a_t*)T6[u.temp[1][1]])
  1249. ^ *((u32_a_t*)T7[u.temp[0][2]])
  1250. ^ *((u32_a_t*)T8[u.temp[3][3]]));
  1251. *((u32_a_t*)(b+12)) = (*((u32_a_t*)T5[u.temp[3][0]])
  1252. ^ *((u32_a_t*)T6[u.temp[2][1]])
  1253. ^ *((u32_a_t*)T7[u.temp[1][2]])
  1254. ^ *((u32_a_t*)T8[u.temp[0][3]]));
  1255. for (r = rounds-1; r > 1; r--)
  1256. {
  1257. *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b )) ^ *((u32_a_t*)rk[r][0]);
  1258. *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[r][1]);
  1259. *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[r][2]);
  1260. *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[r][3]);
  1261. *((u32_a_t*)(b )) = (*((u32_a_t*)T5[u.temp[0][0]])
  1262. ^ *((u32_a_t*)T6[u.temp[3][1]])
  1263. ^ *((u32_a_t*)T7[u.temp[2][2]])
  1264. ^ *((u32_a_t*)T8[u.temp[1][3]]));
  1265. *((u32_a_t*)(b+ 4)) = (*((u32_a_t*)T5[u.temp[1][0]])
  1266. ^ *((u32_a_t*)T6[u.temp[0][1]])
  1267. ^ *((u32_a_t*)T7[u.temp[3][2]])
  1268. ^ *((u32_a_t*)T8[u.temp[2][3]]));
  1269. *((u32_a_t*)(b+ 8)) = (*((u32_a_t*)T5[u.temp[2][0]])
  1270. ^ *((u32_a_t*)T6[u.temp[1][1]])
  1271. ^ *((u32_a_t*)T7[u.temp[0][2]])
  1272. ^ *((u32_a_t*)T8[u.temp[3][3]]));
  1273. *((u32_a_t*)(b+12)) = (*((u32_a_t*)T5[u.temp[3][0]])
  1274. ^ *((u32_a_t*)T6[u.temp[2][1]])
  1275. ^ *((u32_a_t*)T7[u.temp[1][2]])
  1276. ^ *((u32_a_t*)T8[u.temp[0][3]]));
  1277. }
  1278. /* Last round is special. */
  1279. *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b )) ^ *((u32_a_t*)rk[1][0]);
  1280. *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[1][1]);
  1281. *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[1][2]);
  1282. *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[1][3]);
  1283. b[ 0] = S5[u.temp[0][0]];
  1284. b[ 1] = S5[u.temp[3][1]];
  1285. b[ 2] = S5[u.temp[2][2]];
  1286. b[ 3] = S5[u.temp[1][3]];
  1287. b[ 4] = S5[u.temp[1][0]];
  1288. b[ 5] = S5[u.temp[0][1]];
  1289. b[ 6] = S5[u.temp[3][2]];
  1290. b[ 7] = S5[u.temp[2][3]];
  1291. b[ 8] = S5[u.temp[2][0]];
  1292. b[ 9] = S5[u.temp[1][1]];
  1293. b[10] = S5[u.temp[0][2]];
  1294. b[11] = S5[u.temp[3][3]];
  1295. b[12] = S5[u.temp[3][0]];
  1296. b[13] = S5[u.temp[2][1]];
  1297. b[14] = S5[u.temp[1][2]];
  1298. b[15] = S5[u.temp[0][3]];
  1299. *((u32_a_t*)(b )) ^= *((u32_a_t*)rk[0][0]);
  1300. *((u32_a_t*)(b+ 4)) ^= *((u32_a_t*)rk[0][1]);
  1301. *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[0][2]);
  1302. *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[0][3]);
  1303. #undef rk
  1304. }
  1305. /* Decrypt one block. AX and BX may be the same. */
  1306. static void
  1307. do_decrypt (RIJNDAEL_context *ctx, byte *bx, const byte *ax)
  1308. {
  1309. if ( !ctx->decryption_prepared )
  1310. {
  1311. prepare_decryption ( ctx );
  1312. _gcry_burn_stack (64);
  1313. ctx->decryption_prepared = 1;
  1314. }
  1315. /* BX and AX are not necessary correctly aligned. Thus we might
  1316. need to copy them here. We try to align to a 16 bytes. */
  1317. if (((size_t)ax & 0x0f) || ((size_t)bx & 0x0f))
  1318. {
  1319. union
  1320. {
  1321. u32 dummy[4];
  1322. byte a[16] ATTR_ALIGNED_16;
  1323. } a;
  1324. union
  1325. {
  1326. u32 dummy[4];
  1327. byte b[16] ATTR_ALIGNED_16;
  1328. } b;
  1329. memcpy (a.a, ax, 16);
  1330. do_decrypt_aligned (ctx, b.b, a.a);
  1331. memcpy (bx, b.b, 16);
  1332. }
  1333. else
  1334. {
  1335. do_decrypt_aligned (ctx, bx, ax);
  1336. }
  1337. }
  1338. static void
  1339. rijndael_decrypt (void *context, byte *b, const byte *a)
  1340. {
  1341. RIJNDAEL_context *ctx = context;
  1342. if (0)
  1343. ;
  1344. #ifdef USE_PADLOCK
  1345. else if (ctx->use_padlock)
  1346. {
  1347. do_padlock (ctx, 1, b, a);
  1348. _gcry_burn_stack (48 + 2*sizeof(int) /* FIXME */);
  1349. }
  1350. #endif /*USE_PADLOCK*/
  1351. #ifdef USE_AESNI
  1352. else if (ctx->use_aesni)
  1353. {
  1354. aesni_prepare ();
  1355. do_aesni (ctx, 1, b, a);
  1356. aesni_cleanup ();
  1357. }
  1358. #endif /*USE_AESNI*/
  1359. else
  1360. {
  1361. do_decrypt (ctx, b, a);
  1362. _gcry_burn_stack (56+2*sizeof(int));
  1363. }
  1364. }
  1365. /* Bulk decryption of complete blocks in CFB mode. Caller needs to
  1366. make sure that IV is aligned on an unisgned lonhg boundary. This
  1367. function is only intended for the bulk encryption feature of
  1368. cipher.c. */
  1369. void
  1370. _gcry_aes_cfb_dec (void *context, unsigned char *iv,
  1371. void *outbuf_arg, const void *inbuf_arg,
  1372. unsigned int nblocks)
  1373. {
  1374. RIJNDAEL_context *ctx = context;
  1375. unsigned char *outbuf = outbuf_arg;
  1376. const unsigned char *inbuf = inbuf_arg;
  1377. unsigned char *ivp;
  1378. unsigned char temp;
  1379. int i;
  1380. if (0)
  1381. ;
  1382. #ifdef USE_PADLOCK
  1383. else if (ctx->use_padlock)
  1384. {
  1385. /* Fixme: Let Padlock do the CFBing. */
  1386. for ( ;nblocks; nblocks-- )
  1387. {
  1388. do_padlock (ctx, 0, iv, iv);
  1389. for (ivp=iv,i=0; i < BLOCKSIZE; i++ )
  1390. {
  1391. temp = *inbuf++;
  1392. *outbuf++ = *ivp ^ temp;
  1393. *ivp++ = temp;
  1394. }
  1395. }
  1396. }
  1397. #endif /*USE_PADLOCK*/
  1398. #ifdef USE_AESNI
  1399. else if (ctx->use_aesni)
  1400. {
  1401. aesni_prepare ();
  1402. for ( ;nblocks; nblocks-- )
  1403. {
  1404. do_aesni_cfb (ctx, 1, iv, outbuf, inbuf);
  1405. outbuf += BLOCKSIZE;
  1406. inbuf += BLOCKSIZE;
  1407. }
  1408. aesni_cleanup ();
  1409. }
  1410. #endif /*USE_AESNI*/
  1411. else
  1412. {
  1413. for ( ;nblocks; nblocks-- )
  1414. {
  1415. do_encrypt_aligned (ctx, iv, iv);
  1416. for (ivp=iv,i=0; i < BLOCKSIZE; i++ )
  1417. {
  1418. temp = *inbuf++;
  1419. *outbuf++ = *ivp ^ temp;
  1420. *ivp++ = temp;
  1421. }
  1422. }
  1423. }
  1424. _gcry_burn_stack (48 + 2*sizeof(int));
  1425. }
  1426. /* Bulk decryption of complete blocks in CBC mode. Caller needs to
  1427. make sure that IV is aligned on an unsigned long boundary. This
  1428. function is only intended for the bulk encryption feature of
  1429. cipher.c. */
  1430. void
  1431. _gcry_aes_cbc_dec (void *context, unsigned char *iv,
  1432. void *outbuf_arg, const void *inbuf_arg,
  1433. unsigned int nblocks)
  1434. {
  1435. RIJNDAEL_context *ctx = context;
  1436. unsigned char *outbuf = outbuf_arg;
  1437. const unsigned char *inbuf = inbuf_arg;
  1438. unsigned char *ivp;
  1439. int i;
  1440. unsigned char savebuf[BLOCKSIZE];
  1441. #ifdef USE_AESNI
  1442. if (ctx->use_aesni)
  1443. aesni_prepare ();
  1444. #endif /*USE_AESNI*/
  1445. for ( ;nblocks; nblocks-- )
  1446. {
  1447. /* We need to save INBUF away because it may be identical to
  1448. OUTBUF. */
  1449. memcpy (savebuf, inbuf, BLOCKSIZE);
  1450. if (0)
  1451. ;
  1452. #ifdef USE_PADLOCK
  1453. else if (ctx->use_padlock)
  1454. do_padlock (ctx, 1, outbuf, inbuf);
  1455. #endif /*USE_PADLOCK*/
  1456. #ifdef USE_AESNI
  1457. else if (ctx->use_aesni)
  1458. do_aesni (ctx, 1, outbuf, inbuf);
  1459. #endif /*USE_AESNI*/
  1460. else
  1461. do_decrypt (ctx, outbuf, inbuf);
  1462. for (ivp=iv, i=0; i < BLOCKSIZE; i++ )
  1463. outbuf[i] ^= *ivp++;
  1464. memcpy (iv, savebuf, BLOCKSIZE);
  1465. inbuf += BLOCKSIZE;
  1466. outbuf += BLOCKSIZE;
  1467. }
  1468. #ifdef USE_AESNI
  1469. if (ctx->use_aesni)
  1470. aesni_cleanup ();
  1471. #endif /*USE_AESNI*/
  1472. _gcry_burn_stack (48 + 2*sizeof(int) + BLOCKSIZE + 4*sizeof (char*));
  1473. }
  1474. /* Run the self-tests for AES 128. Returns NULL on success. */
  1475. static const char*
  1476. selftest_basic_128 (void)
  1477. {
  1478. RIJNDAEL_context ctx;
  1479. unsigned char scratch[16];
  1480. /* The test vectors are from the AES supplied ones; more or less
  1481. randomly taken from ecb_tbl.txt (I=42,81,14) */
  1482. #if 1
  1483. static const unsigned char plaintext_128[16] =
  1484. {
  1485. 0x01,0x4B,0xAF,0x22,0x78,0xA6,0x9D,0x33,
  1486. 0x1D,0x51,0x80,0x10,0x36,0x43,0xE9,0x9A
  1487. };
  1488. static const unsigned char key_128[16] =
  1489. {
  1490. 0xE8,0xE9,0xEA,0xEB,0xED,0xEE,0xEF,0xF0,
  1491. 0xF2,0xF3,0xF4,0xF5,0xF7,0xF8,0xF9,0xFA
  1492. };
  1493. static const unsigned char ciphertext_128[16] =
  1494. {
  1495. 0x67,0x43,0xC3,0xD1,0x51,0x9A,0xB4,0xF2,
  1496. 0xCD,0x9A,0x78,0xAB,0x09,0xA5,0x11,0xBD
  1497. };
  1498. #else
  1499. /* Test vectors from fips-197, appendix C. */
  1500. # warning debug test vectors in use
  1501. static const unsigned char plaintext_128[16] =
  1502. {
  1503. 0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,
  1504. 0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff
  1505. };
  1506. static const unsigned char key_128[16] =
  1507. {
  1508. 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
  1509. 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
  1510. /* 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, */
  1511. /* 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c */
  1512. };
  1513. static const unsigned char ciphertext_128[16] =
  1514. {
  1515. 0x69,0xc4,0xe0,0xd8,0x6a,0x7b,0x04,0x30,
  1516. 0xd8,0xcd,0xb7,0x80,0x70,0xb4,0xc5,0x5a
  1517. };
  1518. #endif
  1519. rijndael_setkey (&ctx, key_128, sizeof (key_128));
  1520. rijndael_encrypt (&ctx, scratch, plaintext_128);
  1521. if (memcmp (scratch, ciphertext_128, sizeof (ciphertext_128)))
  1522. return "AES-128 test encryption failed.";
  1523. rijndael_decrypt (&ctx, scratch, scratch);
  1524. if (memcmp (scratch, plaintext_128, sizeof (plaintext_128)))
  1525. return "AES-128 test decryption failed.";
  1526. return NULL;
  1527. }
  1528. /* Run the self-tests for AES 192. Returns NULL on success. */
  1529. static const char*
  1530. selftest_basic_192 (void)
  1531. {
  1532. RIJNDAEL_context ctx;
  1533. unsigned char scratch[16];
  1534. static unsigned char plaintext_192[16] =
  1535. {
  1536. 0x76,0x77,0x74,0x75,0xF1,0xF2,0xF3,0xF4,
  1537. 0xF8,0xF9,0xE6,0xE7,0x77,0x70,0x71,0x72
  1538. };
  1539. static unsigned char key_192[24] =
  1540. {
  1541. 0x04,0x05,0x06,0x07,0x09,0x0A,0x0B,0x0C,
  1542. 0x0E,0x0F,0x10,0x11,0x13,0x14,0x15,0x16,
  1543. 0x18,0x19,0x1A,0x1B,0x1D,0x1E,0x1F,0x20
  1544. };
  1545. static const unsigned char ciphertext_192[16] =
  1546. {
  1547. 0x5D,0x1E,0xF2,0x0D,0xCE,0xD6,0xBC,0xBC,
  1548. 0x12,0x13,0x1A,0xC7,0xC5,0x47,0x88,0xAA
  1549. };
  1550. rijndael_setkey (&ctx, key_192, sizeof(key_192));
  1551. rijndael_encrypt (&ctx, scratch, plaintext_192);
  1552. if (memcmp (scratch, ciphertext_192, sizeof (ciphertext_192)))
  1553. return "AES-192 test encryption failed.";
  1554. rijndael_decrypt (&ctx, scratch, scratch);
  1555. if (memcmp (scratch, plaintext_192, sizeof (plaintext_192)))
  1556. return "AES-192 test decryption failed.";
  1557. return NULL;
  1558. }
  1559. /* Run the self-tests for AES 256. Returns NULL on success. */
  1560. static const char*
  1561. selftest_basic_256 (void)
  1562. {
  1563. RIJNDAEL_context ctx;
  1564. unsigned char scratch[16];
  1565. static unsigned char plaintext_256[16] =
  1566. {
  1567. 0x06,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F,
  1568. 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x21
  1569. };
  1570. static unsigned char key_256[32] =
  1571. {
  1572. 0x08,0x09,0x0A,0x0B,0x0D,0x0E,0x0F,0x10,
  1573. 0x12,0x13,0x14,0x15,0x17,0x18,0x19,0x1A,
  1574. 0x1C,0x1D,0x1E,0x1F,0x21,0x22,0x23,0x24,
  1575. 0x26,0x27,0x28,0x29,0x2B,0x2C,0x2D,0x2E
  1576. };
  1577. static const unsigned char ciphertext_256[16] =
  1578. {
  1579. 0x08,0x0E,0x95,0x17,0xEB,0x16,0x77,0x71,
  1580. 0x9A,0xCF,0x72,0x80,0x86,0x04,0x0A,0xE3
  1581. };
  1582. rijndael_setkey (&ctx, key_256, sizeof(key_256));
  1583. rijndael_encrypt (&ctx, scratch, plaintext_256);
  1584. if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256)))
  1585. return "AES-256 test encryption failed.";
  1586. rijndael_decrypt (&ctx, scratch, scratch);
  1587. if (memcmp (scratch, plaintext_256, sizeof (plaintext_256)))
  1588. return "AES-256 test decryption failed.";
  1589. return NULL;
  1590. }
  1591. /* Run all the self-tests and return NULL on success. This function
  1592. is used for the on-the-fly self-tests. */
  1593. static const char *
  1594. selftest (void)
  1595. {
  1596. const char *r;
  1597. if ( (r = selftest_basic_128 ())
  1598. || (r = selftest_basic_192 ())
  1599. || (r = selftest_basic_256 ()) )
  1600. return r;
  1601. return r;
  1602. }
  1603. /* SP800-38a.pdf for AES-128. */
  1604. static const char *
  1605. selftest_fips_128_38a (int requested_mode)
  1606. {
  1607. struct tv
  1608. {
  1609. int mode;
  1610. const unsigned char key[16];
  1611. const unsigned char iv[16];
  1612. struct
  1613. {
  1614. const unsigned char input[16];
  1615. const unsigned char output[16];
  1616. } data[4];
  1617. } tv[2] =
  1618. {
  1619. {
  1620. GCRY_CIPHER_MODE_CFB, /* F.3.13, CFB128-AES128 */
  1621. { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6,
  1622. 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c },
  1623. { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
  1624. 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
  1625. {
  1626. { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96,
  1627. 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a },
  1628. { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20,
  1629. 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } },
  1630. { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c,
  1631. 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 },
  1632. { 0xc8, 0xa6, 0x45, 0x37, 0xa0, 0xb3, 0xa9, 0x3f,
  1633. 0xcd, 0xe3, 0xcd, 0xad, 0x9f, 0x1c, 0xe5, 0x8b } },
  1634. { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11,
  1635. 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef },
  1636. { 0x26, 0x75, 0x1f, 0x67, 0xa3, 0xcb, 0xb1, 0x40,
  1637. 0xb1, 0x80, 0x8c, 0xf1, 0x87, 0xa4, 0xf4, 0xdf } },
  1638. { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17,
  1639. 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 },
  1640. { 0xc0, 0x4b, 0x05, 0x35, 0x7c, 0x5d, 0x1c, 0x0e,
  1641. 0xea, 0xc4, 0xc6, 0x6f, 0x9f, 0xf7, 0xf2, 0xe6 } }
  1642. }
  1643. },
  1644. {
  1645. GCRY_CIPHER_MODE_OFB,
  1646. { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6,
  1647. 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c },
  1648. { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
  1649. 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
  1650. {
  1651. { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96,
  1652. 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a },
  1653. { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20,
  1654. 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } },
  1655. { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c,
  1656. 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 },
  1657. { 0x77, 0x89, 0x50, 0x8d, 0x16, 0x91, 0x8f, 0x03,
  1658. 0xf5, 0x3c, 0x52, 0xda, 0xc5, 0x4e, 0xd8, 0x25 } },
  1659. { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11,
  1660. 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef },
  1661. { 0x97, 0x40, 0x05, 0x1e, 0x9c, 0x5f, 0xec, 0xf6,
  1662. 0x43, 0x44, 0xf7, 0xa8, 0x22, 0x60, 0xed, 0xcc } },
  1663. { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17,
  1664. 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 },
  1665. { 0x30, 0x4c, 0x65, 0x28, 0xf6, 0x59, 0xc7, 0x78,
  1666. 0x66, 0xa5, 0x10, 0xd9, 0xc1, 0xd6, 0xae, 0x5e } },
  1667. }
  1668. }
  1669. };
  1670. unsigned char scratch[16];
  1671. gpg_error_t err;
  1672. int tvi, idx;
  1673. gcry_cipher_hd_t hdenc = NULL;
  1674. gcry_cipher_hd_t hddec = NULL;
  1675. #define Fail(a) do { \
  1676. _gcry_cipher_close (hdenc); \
  1677. _gcry_cipher_close (hddec); \
  1678. return a; \
  1679. } while (0)
  1680. gcry_assert (sizeof tv[0].data[0].input == sizeof scratch);
  1681. gcry_assert (sizeof tv[0].data[0].output == sizeof scratch);
  1682. for (tvi=0; tvi < DIM (tv); tvi++)
  1683. if (tv[tvi].mode == requested_mode)
  1684. break;
  1685. if (tvi == DIM (tv))
  1686. Fail ("no test data for this mode");
  1687. err = _gcry_cipher_open (&hdenc, GCRY_CIPHER_AES, tv[tvi].mode, 0);
  1688. if (err)
  1689. Fail ("open");
  1690. err = _gcry_cipher_open (&hddec, GCRY_CIPHER_AES, tv[tvi].mode, 0);
  1691. if (err)
  1692. Fail ("open");
  1693. err = _gcry_cipher_setkey (hdenc, tv[tvi].key, sizeof tv[tvi].key);
  1694. if (!err)
  1695. err = _gcry_cipher_setkey (hddec, tv[tvi].key, sizeof tv[tvi].key);
  1696. if (err)
  1697. Fail ("set key");
  1698. err = _gcry_cipher_setiv (hdenc, tv[tvi].iv, sizeof tv[tvi].iv);
  1699. if (!err)
  1700. err = _gcry_cipher_setiv (hddec, tv[tvi].iv, sizeof tv[tvi].iv);
  1701. if (err)
  1702. Fail ("set IV");
  1703. for (idx=0; idx < DIM (tv[tvi].data); idx++)
  1704. {
  1705. err = _gcry_cipher_encrypt (hdenc, scratch, sizeof scratch,
  1706. tv[tvi].data[idx].input,
  1707. sizeof tv[tvi].data[idx].input);
  1708. if (err)
  1709. Fail ("encrypt command");
  1710. if (memcmp (scratch, tv[tvi].data[idx].output, sizeof scratch))
  1711. Fail ("encrypt mismatch");
  1712. err = _gcry_cipher_decrypt (hddec, scratch, sizeof scratch,
  1713. tv[tvi].data[idx].output,
  1714. sizeof tv[tvi].data[idx].output);
  1715. if (err)
  1716. Fail ("decrypt command");
  1717. if (memcmp (scratch, tv[tvi].data[idx].input, sizeof scratch))
  1718. Fail ("decrypt mismatch");
  1719. }
  1720. #undef Fail
  1721. _gcry_cipher_close (hdenc);
  1722. _gcry_cipher_close (hddec);
  1723. return NULL;
  1724. }
  1725. /* Complete selftest for AES-128 with all modes and driver code. */
  1726. static gpg_err_code_t
  1727. selftest_fips_128 (int extended, selftest_report_func_t report)
  1728. {
  1729. const char *what;
  1730. const char *errtxt;
  1731. what = "low-level";
  1732. errtxt = selftest_basic_128 ();
  1733. if (errtxt)
  1734. goto failed;
  1735. if (extended)
  1736. {
  1737. what = "cfb";
  1738. errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_CFB);
  1739. if (errtxt)
  1740. goto failed;
  1741. what = "ofb";
  1742. errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_OFB);
  1743. if (errtxt)
  1744. goto failed;
  1745. }
  1746. return 0; /* Succeeded. */
  1747. failed:
  1748. if (report)
  1749. report ("cipher", GCRY_CIPHER_AES128, what, errtxt);
  1750. return GPG_ERR_SELFTEST_FAILED;
  1751. }
  1752. /* Complete selftest for AES-192. */
  1753. static gpg_err_code_t
  1754. selftest_fips_192 (int extended, selftest_report_func_t report)
  1755. {
  1756. const char *what;
  1757. const char *errtxt;
  1758. (void)extended; /* No extended tests available. */
  1759. what = "low-level";
  1760. errtxt = selftest_basic_192 ();
  1761. if (errtxt)
  1762. goto failed;
  1763. return 0; /* Succeeded. */
  1764. failed:
  1765. if (report)
  1766. report ("cipher", GCRY_CIPHER_AES192, what, errtxt);
  1767. return GPG_ERR_SELFTEST_FAILED;
  1768. }
  1769. /* Complete selftest for AES-256. */
  1770. static gpg_err_code_t
  1771. selftest_fips_256 (int extended, selftest_report_func_t report)
  1772. {
  1773. const char *what;
  1774. const char *errtxt;
  1775. (void)extended; /* No extended tests available. */
  1776. what = "low-level";
  1777. errtxt = selftest_basic_256 ();
  1778. if (errtxt)
  1779. goto failed;
  1780. return 0; /* Succeeded. */
  1781. failed:
  1782. if (report)
  1783. report ("cipher", GCRY_CIPHER_AES256, what, errtxt);
  1784. return GPG_ERR_SELFTEST_FAILED;
  1785. }
  1786. /* Run a full self-test for ALGO and return 0 on success. */
  1787. static gpg_err_code_t
  1788. run_selftests (int algo, int extended, selftest_report_func_t report)
  1789. {
  1790. gpg_err_code_t ec;
  1791. switch (algo)
  1792. {
  1793. case GCRY_CIPHER_AES128:
  1794. ec = selftest_fips_128 (extended, report);
  1795. break;
  1796. case GCRY_CIPHER_AES192:
  1797. ec = selftest_fips_192 (extended, report);
  1798. break;
  1799. case GCRY_CIPHER_AES256:
  1800. ec = selftest_fips_256 (extended, report);
  1801. break;
  1802. default:
  1803. ec = GPG_ERR_CIPHER_ALGO;
  1804. break;
  1805. }
  1806. return ec;
  1807. }
  1808. static const char *rijndael_names[] =
  1809. {
  1810. "RIJNDAEL",
  1811. "AES128",
  1812. "AES-128",
  1813. NULL
  1814. };
  1815. static gcry_cipher_oid_spec_t rijndael_oids[] =
  1816. {
  1817. { "2.16.840.1.101.3.4.1.1", GCRY_CIPHER_MODE_ECB },
  1818. { "2.16.840.1.101.3.4.1.2", GCRY_CIPHER_MODE_CBC },
  1819. { "2.16.840.1.101.3.4.1.3", GCRY_CIPHER_MODE_OFB },
  1820. { "2.16.840.1.101.3.4.1.4", GCRY_CIPHER_MODE_CFB },
  1821. { NULL }
  1822. };
  1823. gcry_cipher_spec_t _gcry_cipher_spec_aes =
  1824. {
  1825. "AES", rijndael_names, rijndael_oids, 16, 128, sizeof (RIJNDAEL_context),
  1826. rijndael_setkey, rijndael_encrypt, rijndael_decrypt
  1827. };
  1828. cipher_extra_spec_t _gcry_cipher_extraspec_aes =
  1829. {
  1830. run_selftests
  1831. };
  1832. static const char *rijndael192_names[] =
  1833. {
  1834. "RIJNDAEL192",
  1835. "AES-192",
  1836. NULL
  1837. };
  1838. static gcry_cipher_oid_spec_t rijndael192_oids[] =
  1839. {
  1840. { "2.16.840.1.101.3.4.1.21", GCRY_CIPHER_MODE_ECB },
  1841. { "2.16.840.1.101.3.4.1.22", GCRY_CIPHER_MODE_CBC },
  1842. { "2.16.840.1.101.3.4.1.23", GCRY_CIPHER_MODE_OFB },
  1843. { "2.16.840.1.101.3.4.1.24", GCRY_CIPHER_MODE_CFB },
  1844. { NULL }
  1845. };
  1846. gcry_cipher_spec_t _gcry_cipher_spec_aes192 =
  1847. {
  1848. "AES192", rijndael192_names, rijndael192_oids, 16, 192, sizeof (RIJNDAEL_context),
  1849. rijndael_setkey, rijndael_encrypt, rijndael_decrypt
  1850. };
  1851. cipher_extra_spec_t _gcry_cipher_extraspec_aes192 =
  1852. {
  1853. run_selftests
  1854. };
  1855. static const char *rijndael256_names[] =
  1856. {
  1857. "RIJNDAEL256",
  1858. "AES-256",
  1859. NULL
  1860. };
  1861. static gcry_cipher_oid_spec_t rijndael256_oids[] =
  1862. {
  1863. { "2.16.840.1.101.3.4.1.41", GCRY_CIPHER_MODE_ECB },
  1864. { "2.16.840.1.101.3.4.1.42", GCRY_CIPHER_MODE_CBC },
  1865. { "2.16.840.1.101.3.4.1.43", GCRY_CIPHER_MODE_OFB },
  1866. { "2.16.840.1.101.3.4.1.44", GCRY_CIPHER_MODE_CFB },
  1867. { NULL }
  1868. };
  1869. gcry_cipher_spec_t _gcry_cipher_spec_aes256 =
  1870. {
  1871. "AES256", rijndael256_names, rijndael256_oids, 16, 256,
  1872. sizeof (RIJNDAEL_context),
  1873. rijndael_setkey, rijndael_encrypt, rijndael_decrypt
  1874. };
  1875. cipher_extra_spec_t _gcry_cipher_extraspec_aes256 =
  1876. {
  1877. run_selftests
  1878. };