12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096 |
- /* Rijndael (AES) for GnuPG
- * Copyright (C) 2000, 2001, 2002, 2003, 2007,
- * 2008, 2011 Free Software Foundation, Inc.
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses/>.
- *******************************************************************
- * The code here is based on the optimized implementation taken from
- * http://www.esat.kuleuven.ac.be/~rijmen/rijndael/ on Oct 2, 2000,
- * which carries this notice:
- *------------------------------------------
- * rijndael-alg-fst.c v2.3 April '2000
- *
- * Optimised ANSI C code
- *
- * authors: v1.0: Antoon Bosselaers
- * v2.0: Vincent Rijmen
- * v2.3: Paulo Barreto
- *
- * This code is placed in the public domain.
- *------------------------------------------
- *
- * The SP800-38a document is available at:
- * http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf
- *
- */
- #include <config.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h> /* for memcmp() */
- #include "types.h" /* for byte and u32 typedefs */
- #include "g10lib.h"
- #include "cipher.h"
- #define MAXKC (256/32)
- #define MAXROUNDS 14
- #define BLOCKSIZE (128/8)
- /* Helper macro to force alignment to 16 bytes. */
- #ifdef __GNUC__
- # define ATTR_ALIGNED_16 __attribute__ ((aligned (16)))
- #else
- # define ATTR_ALIGNED_16
- #endif
- /* USE_PADLOCK indicates whether to compile the padlock specific
- code. */
- #undef USE_PADLOCK
- #ifdef ENABLE_PADLOCK_SUPPORT
- # if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
- # define USE_PADLOCK 1
- # endif
- #endif /*ENABLE_PADLOCK_SUPPORT*/
- /* USE_AESNI inidicates whether to compile with Intel AES-NI code. We
- need the vector-size attribute which seems to be available since
- gcc 3. However, to be on the safe side we require at least gcc 4. */
- #undef USE_AESNI
- #ifdef ENABLE_AESNI_SUPPORT
- # if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && __GNUC__ >= 4
- # define USE_AESNI 1
- # endif
- #endif /* ENABLE_AESNI_SUPPORT */
- #ifdef USE_AESNI
- typedef int m128i_t __attribute__ ((__vector_size__ (16)));
- #endif /*USE_AESNI*/
- /* Define an u32 variant for the sake of gcc 4.4's strict aliasing. */
- #if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 4 )
- typedef u32 __attribute__ ((__may_alias__)) u32_a_t;
- #else
- typedef u32 u32_a_t;
- #endif
- /* Our context object. */
- typedef struct
- {
- /* The first fields are the keyschedule arrays. This is so that
- they are aligned on a 16 byte boundary if using gcc. This
- alignment is required for the AES-NI code and a good idea in any
- case. The alignment is guaranteed due to the way cipher.c
- allocates the space for the context. The PROPERLY_ALIGNED_TYPE
- hack is used to force a minimal alignment if not using gcc of if
- the alignment requirement is higher that 16 bytes. */
- union
- {
- PROPERLY_ALIGNED_TYPE dummy;
- byte keyschedule[MAXROUNDS+1][4][4];
- #ifdef USE_PADLOCK
- /* The key as passed to the padlock engine. It is only used if
- the padlock engine is used (USE_PADLOCK, below). */
- unsigned char padlock_key[16] __attribute__ ((aligned (16)));
- #endif /*USE_PADLOCK*/
- } u1;
- union
- {
- PROPERLY_ALIGNED_TYPE dummy;
- byte keyschedule[MAXROUNDS+1][4][4];
- } u2;
- int rounds; /* Key-length-dependent number of rounds. */
- int decryption_prepared; /* The decryption key schedule is available. */
- #ifdef USE_PADLOCK
- int use_padlock; /* Padlock shall be used. */
- #endif /*USE_PADLOCK*/
- #ifdef USE_AESNI
- int use_aesni; /* AES-NI shall be used. */
- #endif /*USE_AESNI*/
- } RIJNDAEL_context ATTR_ALIGNED_16;
- /* Macros defining alias for the keyschedules. */
- #define keyschenc u1.keyschedule
- #define keyschdec u2.keyschedule
- #define padlockkey u1.padlock_key
- /* Two macros to be called prior and after the use of AESNI
- instructions. There should be no external function calls between
- the use of these macros. There purpose is to make sure that the
- SSE regsiters are cleared and won't reveal any information about
- the key or the data. */
- #ifdef USE_AESNI
- # define aesni_prepare() do { } while (0)
- # define aesni_cleanup() \
- do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \
- "pxor %%xmm1, %%xmm1\n" :: ); \
- } while (0)
- # define aesni_cleanup_2_4() \
- do { asm volatile ("pxor %%xmm2, %%xmm2\n\t" \
- "pxor %%xmm3, %%xmm3\n" \
- "pxor %%xmm4, %%xmm4\n":: ); \
- } while (0)
- #else
- # define aesni_prepare() do { } while (0)
- # define aesni_cleanup() do { } while (0)
- #endif
- /* All the numbers. */
- #include "rijndael-tables.h"
- /* Function prototypes. */
- #ifdef USE_AESNI
- /* We don't want to inline these functions to help gcc allocate enough
- registers. */
- static void do_aesni_ctr (const RIJNDAEL_context *ctx, unsigned char *ctr,
- unsigned char *b, const unsigned char *a)
- __attribute__ ((__noinline__));
- static void do_aesni_ctr_4 (const RIJNDAEL_context *ctx, unsigned char *ctr,
- unsigned char *b, const unsigned char *a)
- __attribute__ ((__noinline__));
- #endif /*USE_AESNI*/
- static const char *selftest(void);
- /* Perform the key setup. */
- static gcry_err_code_t
- do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen)
- {
- static int initialized = 0;
- static const char *selftest_failed=0;
- int rounds;
- unsigned int i;
- int j, r, t, rconpointer = 0;
- int KC;
- union
- {
- PROPERLY_ALIGNED_TYPE dummy;
- byte k[MAXKC][4];
- } k;
- #define k k.k
- union
- {
- PROPERLY_ALIGNED_TYPE dummy;
- byte tk[MAXKC][4];
- } tk;
- #define tk tk.tk
- /* The on-the-fly self tests are only run in non-fips mode. In fips
- mode explicit self-tests are required. Actually the on-the-fly
- self-tests are not fully thread-safe and it might happen that a
- failed self-test won't get noticed in another thread.
- FIXME: We might want to have a central registry of succeeded
- self-tests. */
- if (!fips_mode () && !initialized)
- {
- initialized = 1;
- selftest_failed = selftest ();
- if (selftest_failed)
- log_error ("%s\n", selftest_failed );
- }
- if (selftest_failed)
- return GPG_ERR_SELFTEST_FAILED;
- ctx->decryption_prepared = 0;
- #ifdef USE_PADLOCK
- ctx->use_padlock = 0;
- #endif
- #ifdef USE_AESNI
- ctx->use_aesni = 0;
- #endif
- if( keylen == 128/8 )
- {
- rounds = 10;
- KC = 4;
- if (0)
- {
- ;
- }
- #ifdef USE_PADLOCK
- else if ((_gcry_get_hw_features () & HWF_PADLOCK_AES))
- {
- ctx->use_padlock = 1;
- memcpy (ctx->padlockkey, key, keylen);
- }
- #endif
- #ifdef USE_AESNI
- else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI))
- {
- ctx->use_aesni = 1;
- }
- #endif
- }
- else if ( keylen == 192/8 )
- {
- rounds = 12;
- KC = 6;
- if (0)
- {
- ;
- }
- #ifdef USE_AESNI
- else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI))
- {
- ctx->use_aesni = 1;
- }
- #endif
- }
- else if ( keylen == 256/8 )
- {
- rounds = 14;
- KC = 8;
- if (0)
- {
- ;
- }
- #ifdef USE_AESNI
- else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI))
- {
- ctx->use_aesni = 1;
- }
- #endif
- }
- else
- return GPG_ERR_INV_KEYLEN;
- ctx->rounds = rounds;
- /* NB: We don't yet support Padlock hardware key generation. */
- if (0)
- ;
- #ifdef USE_AESNI_is_disabled_here
- else if (ctx->use_aesni && ctx->rounds == 10)
- {
- /* Note: This code works for AES-128 but it is not much better
- than using the standard key schedule. We disable it for
- now and don't put any effort into implementing this for
- AES-192 and AES-256. */
- asm volatile ("movl %[key], %%esi\n\t"
- "movdqu (%%esi), %%xmm1\n\t" /* xmm1 := key */
- "movl %[ksch], %%esi\n\t"
- "movdqa %%xmm1, (%%esi)\n\t" /* ksch[0] := xmm1 */
- "aeskeygenassist $0x01, %%xmm1, %%xmm2\n\t"
- "call .Lexpand128_%=\n\t"
- "movdqa %%xmm1, 0x10(%%esi)\n\t" /* ksch[1] := xmm1 */
- "aeskeygenassist $0x02, %%xmm1, %%xmm2\n\t"
- "call .Lexpand128_%=\n\t"
- "movdqa %%xmm1, 0x20(%%esi)\n\t" /* ksch[2] := xmm1 */
- "aeskeygenassist $0x04, %%xmm1, %%xmm2\n\t"
- "call .Lexpand128_%=\n\t"
- "movdqa %%xmm1, 0x30(%%esi)\n\t" /* ksch[3] := xmm1 */
- "aeskeygenassist $0x08, %%xmm1, %%xmm2\n\t"
- "call .Lexpand128_%=\n\t"
- "movdqa %%xmm1, 0x40(%%esi)\n\t" /* ksch[4] := xmm1 */
- "aeskeygenassist $0x10, %%xmm1, %%xmm2\n\t"
- "call .Lexpand128_%=\n\t"
- "movdqa %%xmm1, 0x50(%%esi)\n\t" /* ksch[5] := xmm1 */
- "aeskeygenassist $0x20, %%xmm1, %%xmm2\n\t"
- "call .Lexpand128_%=\n\t"
- "movdqa %%xmm1, 0x60(%%esi)\n\t" /* ksch[6] := xmm1 */
- "aeskeygenassist $0x40, %%xmm1, %%xmm2\n\t"
- "call .Lexpand128_%=\n\t"
- "movdqa %%xmm1, 0x70(%%esi)\n\t" /* ksch[7] := xmm1 */
- "aeskeygenassist $0x80, %%xmm1, %%xmm2\n\t"
- "call .Lexpand128_%=\n\t"
- "movdqa %%xmm1, 0x80(%%esi)\n\t" /* ksch[8] := xmm1 */
- "aeskeygenassist $0x1b, %%xmm1, %%xmm2\n\t"
- "call .Lexpand128_%=\n\t"
- "movdqa %%xmm1, 0x90(%%esi)\n\t" /* ksch[9] := xmm1 */
- "aeskeygenassist $0x36, %%xmm1, %%xmm2\n\t"
- "call .Lexpand128_%=\n\t"
- "movdqa %%xmm1, 0xa0(%%esi)\n\t" /* ksch[10] := xmm1 */
- "jmp .Lleave%=\n"
- ".Lexpand128_%=:\n\t"
- "pshufd $0xff, %%xmm2, %%xmm2\n\t"
- "movdqa %%xmm1, %%xmm3\n\t"
- "pslldq $4, %%xmm3\n\t"
- "pxor %%xmm3, %%xmm1\n\t"
- "pslldq $4, %%xmm3\n\t"
- "pxor %%xmm3, %%xmm1\n\t"
- "pslldq $4, %%xmm3\n\t"
- "pxor %%xmm3, %%xmm2\n\t"
- "pxor %%xmm2, %%xmm1\n\t"
- "ret\n"
- ".Lleave%=:\n\t"
- "pxor %%xmm1, %%xmm1\n\t"
- "pxor %%xmm2, %%xmm2\n\t"
- "pxor %%xmm3, %%xmm3\n"
- :
- : [key] "g" (key), [ksch] "g" (ctx->keyschenc)
- : "%esi", "cc", "memory" );
- }
- #endif /*USE_AESNI*/
- else
- {
- #define W (ctx->keyschenc)
- for (i = 0; i < keylen; i++)
- {
- k[i >> 2][i & 3] = key[i];
- }
- for (j = KC-1; j >= 0; j--)
- {
- *((u32_a_t*)tk[j]) = *((u32_a_t*)k[j]);
- }
- r = 0;
- t = 0;
- /* Copy values into round key array. */
- for (j = 0; (j < KC) && (r < rounds + 1); )
- {
- for (; (j < KC) && (t < 4); j++, t++)
- {
- *((u32_a_t*)W[r][t]) = *((u32_a_t*)tk[j]);
- }
- if (t == 4)
- {
- r++;
- t = 0;
- }
- }
- while (r < rounds + 1)
- {
- /* While not enough round key material calculated calculate
- new values. */
- tk[0][0] ^= S[tk[KC-1][1]];
- tk[0][1] ^= S[tk[KC-1][2]];
- tk[0][2] ^= S[tk[KC-1][3]];
- tk[0][3] ^= S[tk[KC-1][0]];
- tk[0][0] ^= rcon[rconpointer++];
- if (KC != 8)
- {
- for (j = 1; j < KC; j++)
- {
- *((u32_a_t*)tk[j]) ^= *((u32_a_t*)tk[j-1]);
- }
- }
- else
- {
- for (j = 1; j < KC/2; j++)
- {
- *((u32_a_t*)tk[j]) ^= *((u32_a_t*)tk[j-1]);
- }
- tk[KC/2][0] ^= S[tk[KC/2 - 1][0]];
- tk[KC/2][1] ^= S[tk[KC/2 - 1][1]];
- tk[KC/2][2] ^= S[tk[KC/2 - 1][2]];
- tk[KC/2][3] ^= S[tk[KC/2 - 1][3]];
- for (j = KC/2 + 1; j < KC; j++)
- {
- *((u32_a_t*)tk[j]) ^= *((u32_a_t*)tk[j-1]);
- }
- }
- /* Copy values into round key array. */
- for (j = 0; (j < KC) && (r < rounds + 1); )
- {
- for (; (j < KC) && (t < 4); j++, t++)
- {
- *((u32_a_t*)W[r][t]) = *((u32_a_t*)tk[j]);
- }
- if (t == 4)
- {
- r++;
- t = 0;
- }
- }
- }
- #undef W
- }
- return 0;
- #undef tk
- #undef k
- }
- static gcry_err_code_t
- rijndael_setkey (void *context, const byte *key, const unsigned keylen)
- {
- RIJNDAEL_context *ctx = context;
- int rc = do_setkey (ctx, key, keylen);
- _gcry_burn_stack ( 100 + 16*sizeof(int));
- return rc;
- }
- /* Make a decryption key from an encryption key. */
- static void
- prepare_decryption( RIJNDAEL_context *ctx )
- {
- int r;
- #ifdef USE_AESNI
- if (ctx->use_aesni)
- {
- /* The AES-NI decrypt instructions use the Equivalent Inverse
- Cipher, thus we can't use the the standard decrypt key
- preparation. */
- m128i_t *ekey = (m128i_t*)ctx->keyschenc;
- m128i_t *dkey = (m128i_t*)ctx->keyschdec;
- int rr;
- dkey[0] = ekey[ctx->rounds];
- for (r=1, rr=ctx->rounds-1; r < ctx->rounds; r++, rr--)
- {
- asm volatile
- ("movdqu %[ekey], %%xmm1\n\t"
- /*"aesimc %%xmm1, %%xmm1\n\t"*/
- ".byte 0x66, 0x0f, 0x38, 0xdb, 0xc9\n\t"
- "movdqu %%xmm1, %[dkey]"
- : [dkey] "=m" (dkey[r])
- : [ekey] "m" (ekey[rr]) );
- }
- dkey[r] = ekey[0];
- }
- else
- #endif /*USE_AESNI*/
- {
- union
- {
- PROPERLY_ALIGNED_TYPE dummy;
- byte *w;
- } w;
- #define w w.w
- for (r=0; r < MAXROUNDS+1; r++ )
- {
- *((u32_a_t*)ctx->keyschdec[r][0]) = *((u32_a_t*)ctx->keyschenc[r][0]);
- *((u32_a_t*)ctx->keyschdec[r][1]) = *((u32_a_t*)ctx->keyschenc[r][1]);
- *((u32_a_t*)ctx->keyschdec[r][2]) = *((u32_a_t*)ctx->keyschenc[r][2]);
- *((u32_a_t*)ctx->keyschdec[r][3]) = *((u32_a_t*)ctx->keyschenc[r][3]);
- }
- #define W (ctx->keyschdec)
- for (r = 1; r < ctx->rounds; r++)
- {
- w = W[r][0];
- *((u32_a_t*)w) = *((u32_a_t*)U1[w[0]]) ^ *((u32_a_t*)U2[w[1]])
- ^ *((u32_a_t*)U3[w[2]]) ^ *((u32_a_t*)U4[w[3]]);
- w = W[r][1];
- *((u32_a_t*)w) = *((u32_a_t*)U1[w[0]]) ^ *((u32_a_t*)U2[w[1]])
- ^ *((u32_a_t*)U3[w[2]]) ^ *((u32_a_t*)U4[w[3]]);
- w = W[r][2];
- *((u32_a_t*)w) = *((u32_a_t*)U1[w[0]]) ^ *((u32_a_t*)U2[w[1]])
- ^ *((u32_a_t*)U3[w[2]]) ^ *((u32_a_t*)U4[w[3]]);
- w = W[r][3];
- *((u32_a_t*)w) = *((u32_a_t*)U1[w[0]]) ^ *((u32_a_t*)U2[w[1]])
- ^ *((u32_a_t*)U3[w[2]]) ^ *((u32_a_t*)U4[w[3]]);
- }
- #undef W
- #undef w
- }
- }
- /* Encrypt one block. A and B need to be aligned on a 4 byte
- boundary. A and B may be the same. */
- static void
- do_encrypt_aligned (const RIJNDAEL_context *ctx,
- unsigned char *b, const unsigned char *a)
- {
- #define rk (ctx->keyschenc)
- int rounds = ctx->rounds;
- int r;
- union
- {
- u32 tempu32[4]; /* Force correct alignment. */
- byte temp[4][4];
- } u;
- *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(a )) ^ *((u32_a_t*)rk[0][0]);
- *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(a+ 4)) ^ *((u32_a_t*)rk[0][1]);
- *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(a+ 8)) ^ *((u32_a_t*)rk[0][2]);
- *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(a+12)) ^ *((u32_a_t*)rk[0][3]);
- *((u32_a_t*)(b )) = (*((u32_a_t*)T1[u.temp[0][0]])
- ^ *((u32_a_t*)T2[u.temp[1][1]])
- ^ *((u32_a_t*)T3[u.temp[2][2]])
- ^ *((u32_a_t*)T4[u.temp[3][3]]));
- *((u32_a_t*)(b + 4)) = (*((u32_a_t*)T1[u.temp[1][0]])
- ^ *((u32_a_t*)T2[u.temp[2][1]])
- ^ *((u32_a_t*)T3[u.temp[3][2]])
- ^ *((u32_a_t*)T4[u.temp[0][3]]));
- *((u32_a_t*)(b + 8)) = (*((u32_a_t*)T1[u.temp[2][0]])
- ^ *((u32_a_t*)T2[u.temp[3][1]])
- ^ *((u32_a_t*)T3[u.temp[0][2]])
- ^ *((u32_a_t*)T4[u.temp[1][3]]));
- *((u32_a_t*)(b +12)) = (*((u32_a_t*)T1[u.temp[3][0]])
- ^ *((u32_a_t*)T2[u.temp[0][1]])
- ^ *((u32_a_t*)T3[u.temp[1][2]])
- ^ *((u32_a_t*)T4[u.temp[2][3]]));
- for (r = 1; r < rounds-1; r++)
- {
- *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b )) ^ *((u32_a_t*)rk[r][0]);
- *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[r][1]);
- *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[r][2]);
- *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[r][3]);
- *((u32_a_t*)(b )) = (*((u32_a_t*)T1[u.temp[0][0]])
- ^ *((u32_a_t*)T2[u.temp[1][1]])
- ^ *((u32_a_t*)T3[u.temp[2][2]])
- ^ *((u32_a_t*)T4[u.temp[3][3]]));
- *((u32_a_t*)(b + 4)) = (*((u32_a_t*)T1[u.temp[1][0]])
- ^ *((u32_a_t*)T2[u.temp[2][1]])
- ^ *((u32_a_t*)T3[u.temp[3][2]])
- ^ *((u32_a_t*)T4[u.temp[0][3]]));
- *((u32_a_t*)(b + 8)) = (*((u32_a_t*)T1[u.temp[2][0]])
- ^ *((u32_a_t*)T2[u.temp[3][1]])
- ^ *((u32_a_t*)T3[u.temp[0][2]])
- ^ *((u32_a_t*)T4[u.temp[1][3]]));
- *((u32_a_t*)(b +12)) = (*((u32_a_t*)T1[u.temp[3][0]])
- ^ *((u32_a_t*)T2[u.temp[0][1]])
- ^ *((u32_a_t*)T3[u.temp[1][2]])
- ^ *((u32_a_t*)T4[u.temp[2][3]]));
- }
- /* Last round is special. */
- *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b )) ^ *((u32_a_t*)rk[rounds-1][0]);
- *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[rounds-1][1]);
- *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[rounds-1][2]);
- *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[rounds-1][3]);
- b[ 0] = T1[u.temp[0][0]][1];
- b[ 1] = T1[u.temp[1][1]][1];
- b[ 2] = T1[u.temp[2][2]][1];
- b[ 3] = T1[u.temp[3][3]][1];
- b[ 4] = T1[u.temp[1][0]][1];
- b[ 5] = T1[u.temp[2][1]][1];
- b[ 6] = T1[u.temp[3][2]][1];
- b[ 7] = T1[u.temp[0][3]][1];
- b[ 8] = T1[u.temp[2][0]][1];
- b[ 9] = T1[u.temp[3][1]][1];
- b[10] = T1[u.temp[0][2]][1];
- b[11] = T1[u.temp[1][3]][1];
- b[12] = T1[u.temp[3][0]][1];
- b[13] = T1[u.temp[0][1]][1];
- b[14] = T1[u.temp[1][2]][1];
- b[15] = T1[u.temp[2][3]][1];
- *((u32_a_t*)(b )) ^= *((u32_a_t*)rk[rounds][0]);
- *((u32_a_t*)(b+ 4)) ^= *((u32_a_t*)rk[rounds][1]);
- *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[rounds][2]);
- *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[rounds][3]);
- #undef rk
- }
- static void
- do_encrypt (const RIJNDAEL_context *ctx,
- unsigned char *bx, const unsigned char *ax)
- {
- /* BX and AX are not necessary correctly aligned. Thus we might
- need to copy them here. We try to align to a 16 bytes. */
- if (((size_t)ax & 0x0f) || ((size_t)bx & 0x0f))
- {
- union
- {
- u32 dummy[4];
- byte a[16] ATTR_ALIGNED_16;
- } a;
- union
- {
- u32 dummy[4];
- byte b[16] ATTR_ALIGNED_16;
- } b;
- memcpy (a.a, ax, 16);
- do_encrypt_aligned (ctx, b.b, a.a);
- memcpy (bx, b.b, 16);
- }
- else
- {
- do_encrypt_aligned (ctx, bx, ax);
- }
- }
- /* Encrypt or decrypt one block using the padlock engine. A and B may
- be the same. */
- #ifdef USE_PADLOCK
- static void
- do_padlock (const RIJNDAEL_context *ctx, int decrypt_flag,
- unsigned char *bx, const unsigned char *ax)
- {
- /* BX and AX are not necessary correctly aligned. Thus we need to
- copy them here. */
- unsigned char a[16] __attribute__ ((aligned (16)));
- unsigned char b[16] __attribute__ ((aligned (16)));
- unsigned int cword[4] __attribute__ ((aligned (16)));
- /* The control word fields are:
- 127:12 11:10 9 8 7 6 5 4 3:0
- RESERVED KSIZE CRYPT INTER KEYGN CIPHR ALIGN DGEST ROUND */
- cword[0] = (ctx->rounds & 15); /* (The mask is just a safeguard.) */
- cword[1] = 0;
- cword[2] = 0;
- cword[3] = 0;
- if (decrypt_flag)
- cword[0] |= 0x00000200;
- memcpy (a, ax, 16);
- asm volatile
- ("pushfl\n\t" /* Force key reload. */
- "popfl\n\t"
- "xchg %3, %%ebx\n\t" /* Load key. */
- "movl $1, %%ecx\n\t" /* Init counter for just one block. */
- ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XSTORE ECB. */
- "xchg %3, %%ebx\n" /* Restore GOT register. */
- : /* No output */
- : "S" (a), "D" (b), "d" (cword), "r" (ctx->padlockkey)
- : "%ecx", "cc", "memory"
- );
- memcpy (bx, b, 16);
- }
- #endif /*USE_PADLOCK*/
- #ifdef USE_AESNI
- /* Encrypt one block using the Intel AES-NI instructions. A and B may
- be the same; they need to be properly aligned to 16 bytes.
- Our problem here is that gcc does not allow the "x" constraint for
- SSE registers in asm unless you compile with -msse. The common
- wisdom is to use a separate file for SSE instructions and build it
- separately. This would require a lot of extra build system stuff,
- similar to what we do in mpi/ for the asm stuff. What we do
- instead is to use standard registers and a bit more of plain asm
- which copies the data and key stuff to the SSE registers and later
- back. If we decide to implement some block modes with parallelized
- AES instructions, it might indeed be better to use plain asm ala
- mpi/. */
- static void
- do_aesni_enc_aligned (const RIJNDAEL_context *ctx,
- unsigned char *b, const unsigned char *a)
- {
- #define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
- #define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
- /* Note: For now we relax the alignment requirement for A and B: It
- does not make much difference because in many case we would need
- to memcpy them to an extra buffer; using the movdqu is much faster
- that memcpy and movdqa. For CFB we know that the IV is properly
- aligned but that is a special case. We should better implement
- CFB direct in asm. */
- asm volatile ("movdqu %[src], %%xmm0\n\t" /* xmm0 := *a */
- "movl %[key], %%esi\n\t" /* esi := keyschenc */
- "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */
- "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */
- "movdqa 0x10(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x20(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x30(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x40(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x50(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x60(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x70(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x80(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x90(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0xa0(%%esi), %%xmm1\n\t"
- "cmp $10, %[rounds]\n\t"
- "jz .Lenclast%=\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0xb0(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0xc0(%%esi), %%xmm1\n\t"
- "cmp $12, %[rounds]\n\t"
- "jz .Lenclast%=\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0xd0(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0xe0(%%esi), %%xmm1\n"
- ".Lenclast%=:\n\t"
- aesenclast_xmm1_xmm0
- "movdqu %%xmm0, %[dst]\n"
- : [dst] "=m" (*b)
- : [src] "m" (*a),
- [key] "r" (ctx->keyschenc),
- [rounds] "r" (ctx->rounds)
- : "%esi", "cc", "memory");
- #undef aesenc_xmm1_xmm0
- #undef aesenclast_xmm1_xmm0
- }
- static void
- do_aesni_dec_aligned (const RIJNDAEL_context *ctx,
- unsigned char *b, const unsigned char *a)
- {
- #define aesdec_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xde, 0xc1\n\t"
- #define aesdeclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xc1\n\t"
- asm volatile ("movdqu %[src], %%xmm0\n\t" /* xmm0 := *a */
- "movl %[key], %%esi\n\t"
- "movdqa (%%esi), %%xmm1\n\t"
- "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */
- "movdqa 0x10(%%esi), %%xmm1\n\t"
- aesdec_xmm1_xmm0
- "movdqa 0x20(%%esi), %%xmm1\n\t"
- aesdec_xmm1_xmm0
- "movdqa 0x30(%%esi), %%xmm1\n\t"
- aesdec_xmm1_xmm0
- "movdqa 0x40(%%esi), %%xmm1\n\t"
- aesdec_xmm1_xmm0
- "movdqa 0x50(%%esi), %%xmm1\n\t"
- aesdec_xmm1_xmm0
- "movdqa 0x60(%%esi), %%xmm1\n\t"
- aesdec_xmm1_xmm0
- "movdqa 0x70(%%esi), %%xmm1\n\t"
- aesdec_xmm1_xmm0
- "movdqa 0x80(%%esi), %%xmm1\n\t"
- aesdec_xmm1_xmm0
- "movdqa 0x90(%%esi), %%xmm1\n\t"
- aesdec_xmm1_xmm0
- "movdqa 0xa0(%%esi), %%xmm1\n\t"
- "cmp $10, %[rounds]\n\t"
- "jz .Ldeclast%=\n\t"
- aesdec_xmm1_xmm0
- "movdqa 0xb0(%%esi), %%xmm1\n\t"
- aesdec_xmm1_xmm0
- "movdqa 0xc0(%%esi), %%xmm1\n\t"
- "cmp $12, %[rounds]\n\t"
- "jz .Ldeclast%=\n\t"
- aesdec_xmm1_xmm0
- "movdqa 0xd0(%%esi), %%xmm1\n\t"
- aesdec_xmm1_xmm0
- "movdqa 0xe0(%%esi), %%xmm1\n"
- ".Ldeclast%=:\n\t"
- aesdeclast_xmm1_xmm0
- "movdqu %%xmm0, %[dst]\n"
- : [dst] "=m" (*b)
- : [src] "m" (*a),
- [key] "r" (ctx->keyschdec),
- [rounds] "r" (ctx->rounds)
- : "%esi", "cc", "memory");
- #undef aesdec_xmm1_xmm0
- #undef aesdeclast_xmm1_xmm0
- }
- /* Perform a CFB encryption or decryption round using the
- initialization vector IV and the input block A. Write the result
- to the output block B and update IV. IV needs to be 16 byte
- aligned. */
- static void
- do_aesni_cfb (const RIJNDAEL_context *ctx, int decrypt_flag,
- unsigned char *iv, unsigned char *b, const unsigned char *a)
- {
- #define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
- #define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
- asm volatile ("movdqa %[iv], %%xmm0\n\t" /* xmm0 := IV */
- "movl %[key], %%esi\n\t" /* esi := keyschenc */
- "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */
- "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */
- "movdqa 0x10(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x20(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x30(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x40(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x50(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x60(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x70(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x80(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x90(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0xa0(%%esi), %%xmm1\n\t"
- "cmp $10, %[rounds]\n\t"
- "jz .Lenclast%=\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0xb0(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0xc0(%%esi), %%xmm1\n\t"
- "cmp $12, %[rounds]\n\t"
- "jz .Lenclast%=\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0xd0(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0xe0(%%esi), %%xmm1\n"
- ".Lenclast%=:\n\t"
- aesenclast_xmm1_xmm0
- "movdqu %[src], %%xmm1\n\t" /* Save input. */
- "pxor %%xmm1, %%xmm0\n\t" /* xmm0 = input ^ IV */
- "cmp $1, %[decrypt]\n\t"
- "jz .Ldecrypt_%=\n\t"
- "movdqa %%xmm0, %[iv]\n\t" /* [encrypt] Store IV. */
- "jmp .Lleave_%=\n"
- ".Ldecrypt_%=:\n\t"
- "movdqa %%xmm1, %[iv]\n" /* [decrypt] Store IV. */
- ".Lleave_%=:\n\t"
- "movdqu %%xmm0, %[dst]\n" /* Store output. */
- : [iv] "+m" (*iv), [dst] "=m" (*b)
- : [src] "m" (*a),
- [key] "g" (ctx->keyschenc),
- [rounds] "g" (ctx->rounds),
- [decrypt] "m" (decrypt_flag)
- : "%esi", "cc", "memory");
- #undef aesenc_xmm1_xmm0
- #undef aesenclast_xmm1_xmm0
- }
- /* Perform a CTR encryption round using the counter CTR and the input
- block A. Write the result to the output block B and update CTR.
- CTR needs to be a 16 byte aligned little-endian value. */
- static void
- do_aesni_ctr (const RIJNDAEL_context *ctx,
- unsigned char *ctr, unsigned char *b, const unsigned char *a)
- {
- #define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
- #define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
- static unsigned char be_mask[16] __attribute__ ((aligned (16))) =
- { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
- asm volatile ("movdqa %[ctr], %%xmm0\n\t" /* xmm0, xmm2 := CTR */
- "movaps %%xmm0, %%xmm2\n\t"
- "mov $1, %%esi\n\t" /* xmm2++ (big-endian) */
- "movd %%esi, %%xmm1\n\t"
- "pshufb %[mask], %%xmm2\n\t"
- "paddq %%xmm1, %%xmm2\n\t"
- "pshufb %[mask], %%xmm2\n\t"
- "movdqa %%xmm2, %[ctr]\n" /* Update CTR. */
- "movl %[key], %%esi\n\t" /* esi := keyschenc */
- "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */
- "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */
- "movdqa 0x10(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x20(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x30(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x40(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x50(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x60(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x70(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x80(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0x90(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0xa0(%%esi), %%xmm1\n\t"
- "cmp $10, %[rounds]\n\t"
- "jz .Lenclast%=\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0xb0(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0xc0(%%esi), %%xmm1\n\t"
- "cmp $12, %[rounds]\n\t"
- "jz .Lenclast%=\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0xd0(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- "movdqa 0xe0(%%esi), %%xmm1\n"
- ".Lenclast%=:\n\t"
- aesenclast_xmm1_xmm0
- "movdqu %[src], %%xmm1\n\t" /* xmm1 := input */
- "pxor %%xmm1, %%xmm0\n\t" /* EncCTR ^= input */
- "movdqu %%xmm0, %[dst]" /* Store EncCTR. */
- : [ctr] "+m" (*ctr), [dst] "=m" (*b)
- : [src] "m" (*a),
- [key] "g" (ctx->keyschenc),
- [rounds] "g" (ctx->rounds),
- [mask] "m" (*be_mask)
- : "%esi", "cc", "memory");
- #undef aesenc_xmm1_xmm0
- #undef aesenclast_xmm1_xmm0
- }
- /* Four blocks at a time variant of do_aesni_ctr. */
- static void
- do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
- unsigned char *ctr, unsigned char *b, const unsigned char *a)
- {
- #define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
- #define aesenc_xmm1_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd1\n\t"
- #define aesenc_xmm1_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd9\n\t"
- #define aesenc_xmm1_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xe1\n\t"
- #define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
- #define aesenclast_xmm1_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd1\n\t"
- #define aesenclast_xmm1_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd9\n\t"
- #define aesenclast_xmm1_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xe1\n\t"
- static unsigned char be_mask[16] __attribute__ ((aligned (16))) =
- { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
- /* Register usage:
- esi keyschedule
- xmm0 CTR-0
- xmm1 temp / round key
- xmm2 CTR-1
- xmm3 CTR-2
- xmm4 CTR-3
- xmm5 temp
- */
- asm volatile ("movdqa %[ctr], %%xmm0\n\t" /* xmm0, xmm2 := CTR */
- "movaps %%xmm0, %%xmm2\n\t"
- "mov $1, %%esi\n\t" /* xmm1 := 1 */
- "movd %%esi, %%xmm1\n\t"
- "pshufb %[mask], %%xmm2\n\t" /* xmm2 := le(xmm2) */
- "paddq %%xmm1, %%xmm2\n\t" /* xmm2++ */
- "movaps %%xmm2, %%xmm3\n\t" /* xmm3 := xmm2 */
- "paddq %%xmm1, %%xmm3\n\t" /* xmm3++ */
- "movaps %%xmm3, %%xmm4\n\t" /* xmm4 := xmm3 */
- "paddq %%xmm1, %%xmm4\n\t" /* xmm4++ */
- "movaps %%xmm4, %%xmm5\n\t" /* xmm5 := xmm4 */
- "paddq %%xmm1, %%xmm5\n\t" /* xmm5++ */
- "pshufb %[mask], %%xmm2\n\t" /* xmm2 := be(xmm2) */
- "pshufb %[mask], %%xmm3\n\t" /* xmm3 := be(xmm3) */
- "pshufb %[mask], %%xmm4\n\t" /* xmm4 := be(xmm4) */
- "pshufb %[mask], %%xmm5\n\t" /* xmm5 := be(xmm5) */
- "movdqa %%xmm5, %[ctr]\n" /* Update CTR. */
- "movl %[key], %%esi\n\t" /* esi := keyschenc */
- "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */
- "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */
- "pxor %%xmm1, %%xmm2\n\t" /* xmm2 ^= key[0] */
- "pxor %%xmm1, %%xmm3\n\t" /* xmm3 ^= key[0] */
- "pxor %%xmm1, %%xmm4\n\t" /* xmm4 ^= key[0] */
- "movdqa 0x10(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- aesenc_xmm1_xmm2
- aesenc_xmm1_xmm3
- aesenc_xmm1_xmm4
- "movdqa 0x20(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- aesenc_xmm1_xmm2
- aesenc_xmm1_xmm3
- aesenc_xmm1_xmm4
- "movdqa 0x30(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- aesenc_xmm1_xmm2
- aesenc_xmm1_xmm3
- aesenc_xmm1_xmm4
- "movdqa 0x40(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- aesenc_xmm1_xmm2
- aesenc_xmm1_xmm3
- aesenc_xmm1_xmm4
- "movdqa 0x50(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- aesenc_xmm1_xmm2
- aesenc_xmm1_xmm3
- aesenc_xmm1_xmm4
- "movdqa 0x60(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- aesenc_xmm1_xmm2
- aesenc_xmm1_xmm3
- aesenc_xmm1_xmm4
- "movdqa 0x70(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- aesenc_xmm1_xmm2
- aesenc_xmm1_xmm3
- aesenc_xmm1_xmm4
- "movdqa 0x80(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- aesenc_xmm1_xmm2
- aesenc_xmm1_xmm3
- aesenc_xmm1_xmm4
- "movdqa 0x90(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- aesenc_xmm1_xmm2
- aesenc_xmm1_xmm3
- aesenc_xmm1_xmm4
- "movdqa 0xa0(%%esi), %%xmm1\n\t"
- "cmp $10, %[rounds]\n\t"
- "jz .Lenclast%=\n\t"
- aesenc_xmm1_xmm0
- aesenc_xmm1_xmm2
- aesenc_xmm1_xmm3
- aesenc_xmm1_xmm4
- "movdqa 0xb0(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- aesenc_xmm1_xmm2
- aesenc_xmm1_xmm3
- aesenc_xmm1_xmm4
- "movdqa 0xc0(%%esi), %%xmm1\n\t"
- "cmp $12, %[rounds]\n\t"
- "jz .Lenclast%=\n\t"
- aesenc_xmm1_xmm0
- aesenc_xmm1_xmm2
- aesenc_xmm1_xmm3
- aesenc_xmm1_xmm4
- "movdqa 0xd0(%%esi), %%xmm1\n\t"
- aesenc_xmm1_xmm0
- aesenc_xmm1_xmm2
- aesenc_xmm1_xmm3
- aesenc_xmm1_xmm4
- "movdqa 0xe0(%%esi), %%xmm1\n"
- ".Lenclast%=:\n\t"
- aesenclast_xmm1_xmm0
- aesenclast_xmm1_xmm2
- aesenclast_xmm1_xmm3
- aesenclast_xmm1_xmm4
- "movdqu %[src], %%xmm1\n\t" /* Get block 1. */
- "pxor %%xmm1, %%xmm0\n\t" /* EncCTR-1 ^= input */
- "movdqu %%xmm0, %[dst]\n\t" /* Store block 1 */
- "movdqu (16)%[src], %%xmm1\n\t" /* Get block 2. */
- "pxor %%xmm1, %%xmm2\n\t" /* EncCTR-2 ^= input */
- "movdqu %%xmm2, (16)%[dst]\n\t" /* Store block 2. */
- "movdqu (32)%[src], %%xmm1\n\t" /* Get block 3. */
- "pxor %%xmm1, %%xmm3\n\t" /* EncCTR-3 ^= input */
- "movdqu %%xmm3, (32)%[dst]\n\t" /* Store block 3. */
- "movdqu (48)%[src], %%xmm1\n\t" /* Get block 4. */
- "pxor %%xmm1, %%xmm4\n\t" /* EncCTR-4 ^= input */
- "movdqu %%xmm4, (48)%[dst]" /* Store block 4. */
- : [ctr] "+m" (*ctr), [dst] "=m" (*b)
- : [src] "m" (*a),
- [key] "g" (ctx->keyschenc),
- [rounds] "g" (ctx->rounds),
- [mask] "m" (*be_mask)
- : "%esi", "cc", "memory");
- #undef aesenc_xmm1_xmm0
- #undef aesenc_xmm1_xmm2
- #undef aesenc_xmm1_xmm3
- #undef aesenc_xmm1_xmm4
- #undef aesenclast_xmm1_xmm0
- #undef aesenclast_xmm1_xmm2
- #undef aesenclast_xmm1_xmm3
- #undef aesenclast_xmm1_xmm4
- }
- static void
- do_aesni (RIJNDAEL_context *ctx, int decrypt_flag,
- unsigned char *bx, const unsigned char *ax)
- {
- if (decrypt_flag)
- {
- if (!ctx->decryption_prepared )
- {
- prepare_decryption ( ctx );
- ctx->decryption_prepared = 1;
- }
- do_aesni_dec_aligned (ctx, bx, ax);
- }
- else
- do_aesni_enc_aligned (ctx, bx, ax);
- }
- #endif /*USE_AESNI*/
- static void
- rijndael_encrypt (void *context, byte *b, const byte *a)
- {
- RIJNDAEL_context *ctx = context;
- if (0)
- ;
- #ifdef USE_PADLOCK
- else if (ctx->use_padlock)
- {
- do_padlock (ctx, 0, b, a);
- _gcry_burn_stack (48 + 15 /* possible padding for alignment */);
- }
- #endif /*USE_PADLOCK*/
- #ifdef USE_AESNI
- else if (ctx->use_aesni)
- {
- aesni_prepare ();
- do_aesni (ctx, 0, b, a);
- aesni_cleanup ();
- }
- #endif /*USE_AESNI*/
- else
- {
- do_encrypt (ctx, b, a);
- _gcry_burn_stack (56 + 2*sizeof(int));
- }
- }
- /* Bulk encryption of complete blocks in CFB mode. Caller needs to
- make sure that IV is aligned on an unsigned long boundary. This
- function is only intended for the bulk encryption feature of
- cipher.c. */
- void
- _gcry_aes_cfb_enc (void *context, unsigned char *iv,
- void *outbuf_arg, const void *inbuf_arg,
- unsigned int nblocks)
- {
- RIJNDAEL_context *ctx = context;
- unsigned char *outbuf = outbuf_arg;
- const unsigned char *inbuf = inbuf_arg;
- unsigned char *ivp;
- int i;
- if (0)
- ;
- #ifdef USE_PADLOCK
- else if (ctx->use_padlock)
- {
- /* Fixme: Let Padlock do the CFBing. */
- for ( ;nblocks; nblocks-- )
- {
- /* Encrypt the IV. */
- do_padlock (ctx, 0, iv, iv);
- /* XOR the input with the IV and store input into IV. */
- for (ivp=iv,i=0; i < BLOCKSIZE; i++ )
- *outbuf++ = (*ivp++ ^= *inbuf++);
- }
- }
- #endif /*USE_PADLOCK*/
- #ifdef USE_AESNI
- else if (ctx->use_aesni)
- {
- aesni_prepare ();
- for ( ;nblocks; nblocks-- )
- {
- do_aesni_cfb (ctx, 0, iv, outbuf, inbuf);
- outbuf += BLOCKSIZE;
- inbuf += BLOCKSIZE;
- }
- aesni_cleanup ();
- }
- #endif /*USE_AESNI*/
- else
- {
- for ( ;nblocks; nblocks-- )
- {
- /* Encrypt the IV. */
- do_encrypt_aligned (ctx, iv, iv);
- /* XOR the input with the IV and store input into IV. */
- for (ivp=iv,i=0; i < BLOCKSIZE; i++ )
- *outbuf++ = (*ivp++ ^= *inbuf++);
- }
- }
- _gcry_burn_stack (48 + 2*sizeof(int));
- }
- /* Bulk encryption of complete blocks in CBC mode. Caller needs to
- make sure that IV is aligned on an unsigned long boundary. This
- function is only intended for the bulk encryption feature of
- cipher.c. */
- void
- _gcry_aes_cbc_enc (void *context, unsigned char *iv,
- void *outbuf_arg, const void *inbuf_arg,
- unsigned int nblocks, int cbc_mac)
- {
- RIJNDAEL_context *ctx = context;
- unsigned char *outbuf = outbuf_arg;
- const unsigned char *inbuf = inbuf_arg;
- unsigned char *ivp;
- int i;
- #ifdef USE_AESNI
- if (ctx->use_aesni)
- aesni_prepare ();
- #endif /*USE_AESNI*/
- for ( ;nblocks; nblocks-- )
- {
- for (ivp=iv, i=0; i < BLOCKSIZE; i++ )
- outbuf[i] = inbuf[i] ^ *ivp++;
- if (0)
- ;
- #ifdef USE_PADLOCK
- else if (ctx->use_padlock)
- do_padlock (ctx, 0, outbuf, outbuf);
- #endif /*USE_PADLOCK*/
- #ifdef USE_AESNI
- else if (ctx->use_aesni)
- do_aesni (ctx, 0, outbuf, outbuf);
- #endif /*USE_AESNI*/
- else
- do_encrypt (ctx, outbuf, outbuf );
- memcpy (iv, outbuf, BLOCKSIZE);
- inbuf += BLOCKSIZE;
- if (!cbc_mac)
- outbuf += BLOCKSIZE;
- }
- #ifdef USE_AESNI
- if (ctx->use_aesni)
- aesni_cleanup ();
- #endif /*USE_AESNI*/
- _gcry_burn_stack (48 + 2*sizeof(int));
- }
- /* Bulk encryption of complete blocks in CTR mode. Caller needs to
- make sure that CTR is aligned on a 16 byte boundary if AESNI; the
- minimum alignment is for an u32. This function is only intended
- for the bulk encryption feature of cipher.c. CTR is expected to be
- of size BLOCKSIZE. */
- void
- _gcry_aes_ctr_enc (void *context, unsigned char *ctr,
- void *outbuf_arg, const void *inbuf_arg,
- unsigned int nblocks)
- {
- RIJNDAEL_context *ctx = context;
- unsigned char *outbuf = outbuf_arg;
- const unsigned char *inbuf = inbuf_arg;
- unsigned char *p;
- int i;
- if (0)
- ;
- #ifdef USE_AESNI
- else if (ctx->use_aesni)
- {
- aesni_prepare ();
- for ( ;nblocks > 3 ; nblocks -= 4 )
- {
- do_aesni_ctr_4 (ctx, ctr, outbuf, inbuf);
- outbuf += 4*BLOCKSIZE;
- inbuf += 4*BLOCKSIZE;
- }
- for ( ;nblocks; nblocks-- )
- {
- do_aesni_ctr (ctx, ctr, outbuf, inbuf);
- outbuf += BLOCKSIZE;
- inbuf += BLOCKSIZE;
- }
- aesni_cleanup ();
- aesni_cleanup_2_4 ();
- }
- #endif /*USE_AESNI*/
- else
- {
- union { unsigned char x1[16]; u32 x32[4]; } tmp;
- for ( ;nblocks; nblocks-- )
- {
- /* Encrypt the counter. */
- do_encrypt_aligned (ctx, tmp.x1, ctr);
- /* XOR the input with the encrypted counter and store in output. */
- for (p=tmp.x1, i=0; i < BLOCKSIZE; i++)
- *outbuf++ = (*p++ ^= *inbuf++);
- /* Increment the counter. */
- for (i = BLOCKSIZE; i > 0; i--)
- {
- ctr[i-1]++;
- if (ctr[i-1])
- break;
- }
- }
- }
- _gcry_burn_stack (48 + 2*sizeof(int));
- }
- /* Decrypt one block. A and B need to be aligned on a 4 byte boundary
- and the decryption must have been prepared. A and B may be the
- same. */
- static void
- do_decrypt_aligned (RIJNDAEL_context *ctx,
- unsigned char *b, const unsigned char *a)
- {
- #define rk (ctx->keyschdec)
- int rounds = ctx->rounds;
- int r;
- union
- {
- u32 tempu32[4]; /* Force correct alignment. */
- byte temp[4][4];
- } u;
- *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(a )) ^ *((u32_a_t*)rk[rounds][0]);
- *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(a+ 4)) ^ *((u32_a_t*)rk[rounds][1]);
- *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(a+ 8)) ^ *((u32_a_t*)rk[rounds][2]);
- *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(a+12)) ^ *((u32_a_t*)rk[rounds][3]);
- *((u32_a_t*)(b )) = (*((u32_a_t*)T5[u.temp[0][0]])
- ^ *((u32_a_t*)T6[u.temp[3][1]])
- ^ *((u32_a_t*)T7[u.temp[2][2]])
- ^ *((u32_a_t*)T8[u.temp[1][3]]));
- *((u32_a_t*)(b+ 4)) = (*((u32_a_t*)T5[u.temp[1][0]])
- ^ *((u32_a_t*)T6[u.temp[0][1]])
- ^ *((u32_a_t*)T7[u.temp[3][2]])
- ^ *((u32_a_t*)T8[u.temp[2][3]]));
- *((u32_a_t*)(b+ 8)) = (*((u32_a_t*)T5[u.temp[2][0]])
- ^ *((u32_a_t*)T6[u.temp[1][1]])
- ^ *((u32_a_t*)T7[u.temp[0][2]])
- ^ *((u32_a_t*)T8[u.temp[3][3]]));
- *((u32_a_t*)(b+12)) = (*((u32_a_t*)T5[u.temp[3][0]])
- ^ *((u32_a_t*)T6[u.temp[2][1]])
- ^ *((u32_a_t*)T7[u.temp[1][2]])
- ^ *((u32_a_t*)T8[u.temp[0][3]]));
- for (r = rounds-1; r > 1; r--)
- {
- *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b )) ^ *((u32_a_t*)rk[r][0]);
- *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[r][1]);
- *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[r][2]);
- *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[r][3]);
- *((u32_a_t*)(b )) = (*((u32_a_t*)T5[u.temp[0][0]])
- ^ *((u32_a_t*)T6[u.temp[3][1]])
- ^ *((u32_a_t*)T7[u.temp[2][2]])
- ^ *((u32_a_t*)T8[u.temp[1][3]]));
- *((u32_a_t*)(b+ 4)) = (*((u32_a_t*)T5[u.temp[1][0]])
- ^ *((u32_a_t*)T6[u.temp[0][1]])
- ^ *((u32_a_t*)T7[u.temp[3][2]])
- ^ *((u32_a_t*)T8[u.temp[2][3]]));
- *((u32_a_t*)(b+ 8)) = (*((u32_a_t*)T5[u.temp[2][0]])
- ^ *((u32_a_t*)T6[u.temp[1][1]])
- ^ *((u32_a_t*)T7[u.temp[0][2]])
- ^ *((u32_a_t*)T8[u.temp[3][3]]));
- *((u32_a_t*)(b+12)) = (*((u32_a_t*)T5[u.temp[3][0]])
- ^ *((u32_a_t*)T6[u.temp[2][1]])
- ^ *((u32_a_t*)T7[u.temp[1][2]])
- ^ *((u32_a_t*)T8[u.temp[0][3]]));
- }
- /* Last round is special. */
- *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b )) ^ *((u32_a_t*)rk[1][0]);
- *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[1][1]);
- *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[1][2]);
- *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[1][3]);
- b[ 0] = S5[u.temp[0][0]];
- b[ 1] = S5[u.temp[3][1]];
- b[ 2] = S5[u.temp[2][2]];
- b[ 3] = S5[u.temp[1][3]];
- b[ 4] = S5[u.temp[1][0]];
- b[ 5] = S5[u.temp[0][1]];
- b[ 6] = S5[u.temp[3][2]];
- b[ 7] = S5[u.temp[2][3]];
- b[ 8] = S5[u.temp[2][0]];
- b[ 9] = S5[u.temp[1][1]];
- b[10] = S5[u.temp[0][2]];
- b[11] = S5[u.temp[3][3]];
- b[12] = S5[u.temp[3][0]];
- b[13] = S5[u.temp[2][1]];
- b[14] = S5[u.temp[1][2]];
- b[15] = S5[u.temp[0][3]];
- *((u32_a_t*)(b )) ^= *((u32_a_t*)rk[0][0]);
- *((u32_a_t*)(b+ 4)) ^= *((u32_a_t*)rk[0][1]);
- *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[0][2]);
- *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[0][3]);
- #undef rk
- }
- /* Decrypt one block. AX and BX may be the same. */
- static void
- do_decrypt (RIJNDAEL_context *ctx, byte *bx, const byte *ax)
- {
- if ( !ctx->decryption_prepared )
- {
- prepare_decryption ( ctx );
- _gcry_burn_stack (64);
- ctx->decryption_prepared = 1;
- }
- /* BX and AX are not necessary correctly aligned. Thus we might
- need to copy them here. We try to align to a 16 bytes. */
- if (((size_t)ax & 0x0f) || ((size_t)bx & 0x0f))
- {
- union
- {
- u32 dummy[4];
- byte a[16] ATTR_ALIGNED_16;
- } a;
- union
- {
- u32 dummy[4];
- byte b[16] ATTR_ALIGNED_16;
- } b;
- memcpy (a.a, ax, 16);
- do_decrypt_aligned (ctx, b.b, a.a);
- memcpy (bx, b.b, 16);
- }
- else
- {
- do_decrypt_aligned (ctx, bx, ax);
- }
- }
- static void
- rijndael_decrypt (void *context, byte *b, const byte *a)
- {
- RIJNDAEL_context *ctx = context;
- if (0)
- ;
- #ifdef USE_PADLOCK
- else if (ctx->use_padlock)
- {
- do_padlock (ctx, 1, b, a);
- _gcry_burn_stack (48 + 2*sizeof(int) /* FIXME */);
- }
- #endif /*USE_PADLOCK*/
- #ifdef USE_AESNI
- else if (ctx->use_aesni)
- {
- aesni_prepare ();
- do_aesni (ctx, 1, b, a);
- aesni_cleanup ();
- }
- #endif /*USE_AESNI*/
- else
- {
- do_decrypt (ctx, b, a);
- _gcry_burn_stack (56+2*sizeof(int));
- }
- }
- /* Bulk decryption of complete blocks in CFB mode. Caller needs to
- make sure that IV is aligned on an unisgned lonhg boundary. This
- function is only intended for the bulk encryption feature of
- cipher.c. */
- void
- _gcry_aes_cfb_dec (void *context, unsigned char *iv,
- void *outbuf_arg, const void *inbuf_arg,
- unsigned int nblocks)
- {
- RIJNDAEL_context *ctx = context;
- unsigned char *outbuf = outbuf_arg;
- const unsigned char *inbuf = inbuf_arg;
- unsigned char *ivp;
- unsigned char temp;
- int i;
- if (0)
- ;
- #ifdef USE_PADLOCK
- else if (ctx->use_padlock)
- {
- /* Fixme: Let Padlock do the CFBing. */
- for ( ;nblocks; nblocks-- )
- {
- do_padlock (ctx, 0, iv, iv);
- for (ivp=iv,i=0; i < BLOCKSIZE; i++ )
- {
- temp = *inbuf++;
- *outbuf++ = *ivp ^ temp;
- *ivp++ = temp;
- }
- }
- }
- #endif /*USE_PADLOCK*/
- #ifdef USE_AESNI
- else if (ctx->use_aesni)
- {
- aesni_prepare ();
- for ( ;nblocks; nblocks-- )
- {
- do_aesni_cfb (ctx, 1, iv, outbuf, inbuf);
- outbuf += BLOCKSIZE;
- inbuf += BLOCKSIZE;
- }
- aesni_cleanup ();
- }
- #endif /*USE_AESNI*/
- else
- {
- for ( ;nblocks; nblocks-- )
- {
- do_encrypt_aligned (ctx, iv, iv);
- for (ivp=iv,i=0; i < BLOCKSIZE; i++ )
- {
- temp = *inbuf++;
- *outbuf++ = *ivp ^ temp;
- *ivp++ = temp;
- }
- }
- }
- _gcry_burn_stack (48 + 2*sizeof(int));
- }
- /* Bulk decryption of complete blocks in CBC mode. Caller needs to
- make sure that IV is aligned on an unsigned long boundary. This
- function is only intended for the bulk encryption feature of
- cipher.c. */
- void
- _gcry_aes_cbc_dec (void *context, unsigned char *iv,
- void *outbuf_arg, const void *inbuf_arg,
- unsigned int nblocks)
- {
- RIJNDAEL_context *ctx = context;
- unsigned char *outbuf = outbuf_arg;
- const unsigned char *inbuf = inbuf_arg;
- unsigned char *ivp;
- int i;
- unsigned char savebuf[BLOCKSIZE];
- #ifdef USE_AESNI
- if (ctx->use_aesni)
- aesni_prepare ();
- #endif /*USE_AESNI*/
- for ( ;nblocks; nblocks-- )
- {
- /* We need to save INBUF away because it may be identical to
- OUTBUF. */
- memcpy (savebuf, inbuf, BLOCKSIZE);
- if (0)
- ;
- #ifdef USE_PADLOCK
- else if (ctx->use_padlock)
- do_padlock (ctx, 1, outbuf, inbuf);
- #endif /*USE_PADLOCK*/
- #ifdef USE_AESNI
- else if (ctx->use_aesni)
- do_aesni (ctx, 1, outbuf, inbuf);
- #endif /*USE_AESNI*/
- else
- do_decrypt (ctx, outbuf, inbuf);
- for (ivp=iv, i=0; i < BLOCKSIZE; i++ )
- outbuf[i] ^= *ivp++;
- memcpy (iv, savebuf, BLOCKSIZE);
- inbuf += BLOCKSIZE;
- outbuf += BLOCKSIZE;
- }
- #ifdef USE_AESNI
- if (ctx->use_aesni)
- aesni_cleanup ();
- #endif /*USE_AESNI*/
- _gcry_burn_stack (48 + 2*sizeof(int) + BLOCKSIZE + 4*sizeof (char*));
- }
- /* Run the self-tests for AES 128. Returns NULL on success. */
- static const char*
- selftest_basic_128 (void)
- {
- RIJNDAEL_context ctx;
- unsigned char scratch[16];
- /* The test vectors are from the AES supplied ones; more or less
- randomly taken from ecb_tbl.txt (I=42,81,14) */
- #if 1
- static const unsigned char plaintext_128[16] =
- {
- 0x01,0x4B,0xAF,0x22,0x78,0xA6,0x9D,0x33,
- 0x1D,0x51,0x80,0x10,0x36,0x43,0xE9,0x9A
- };
- static const unsigned char key_128[16] =
- {
- 0xE8,0xE9,0xEA,0xEB,0xED,0xEE,0xEF,0xF0,
- 0xF2,0xF3,0xF4,0xF5,0xF7,0xF8,0xF9,0xFA
- };
- static const unsigned char ciphertext_128[16] =
- {
- 0x67,0x43,0xC3,0xD1,0x51,0x9A,0xB4,0xF2,
- 0xCD,0x9A,0x78,0xAB,0x09,0xA5,0x11,0xBD
- };
- #else
- /* Test vectors from fips-197, appendix C. */
- # warning debug test vectors in use
- static const unsigned char plaintext_128[16] =
- {
- 0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,
- 0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff
- };
- static const unsigned char key_128[16] =
- {
- 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
- 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
- /* 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, */
- /* 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c */
- };
- static const unsigned char ciphertext_128[16] =
- {
- 0x69,0xc4,0xe0,0xd8,0x6a,0x7b,0x04,0x30,
- 0xd8,0xcd,0xb7,0x80,0x70,0xb4,0xc5,0x5a
- };
- #endif
- rijndael_setkey (&ctx, key_128, sizeof (key_128));
- rijndael_encrypt (&ctx, scratch, plaintext_128);
- if (memcmp (scratch, ciphertext_128, sizeof (ciphertext_128)))
- return "AES-128 test encryption failed.";
- rijndael_decrypt (&ctx, scratch, scratch);
- if (memcmp (scratch, plaintext_128, sizeof (plaintext_128)))
- return "AES-128 test decryption failed.";
- return NULL;
- }
- /* Run the self-tests for AES 192. Returns NULL on success. */
- static const char*
- selftest_basic_192 (void)
- {
- RIJNDAEL_context ctx;
- unsigned char scratch[16];
- static unsigned char plaintext_192[16] =
- {
- 0x76,0x77,0x74,0x75,0xF1,0xF2,0xF3,0xF4,
- 0xF8,0xF9,0xE6,0xE7,0x77,0x70,0x71,0x72
- };
- static unsigned char key_192[24] =
- {
- 0x04,0x05,0x06,0x07,0x09,0x0A,0x0B,0x0C,
- 0x0E,0x0F,0x10,0x11,0x13,0x14,0x15,0x16,
- 0x18,0x19,0x1A,0x1B,0x1D,0x1E,0x1F,0x20
- };
- static const unsigned char ciphertext_192[16] =
- {
- 0x5D,0x1E,0xF2,0x0D,0xCE,0xD6,0xBC,0xBC,
- 0x12,0x13,0x1A,0xC7,0xC5,0x47,0x88,0xAA
- };
- rijndael_setkey (&ctx, key_192, sizeof(key_192));
- rijndael_encrypt (&ctx, scratch, plaintext_192);
- if (memcmp (scratch, ciphertext_192, sizeof (ciphertext_192)))
- return "AES-192 test encryption failed.";
- rijndael_decrypt (&ctx, scratch, scratch);
- if (memcmp (scratch, plaintext_192, sizeof (plaintext_192)))
- return "AES-192 test decryption failed.";
- return NULL;
- }
- /* Run the self-tests for AES 256. Returns NULL on success. */
- static const char*
- selftest_basic_256 (void)
- {
- RIJNDAEL_context ctx;
- unsigned char scratch[16];
- static unsigned char plaintext_256[16] =
- {
- 0x06,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F,
- 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x21
- };
- static unsigned char key_256[32] =
- {
- 0x08,0x09,0x0A,0x0B,0x0D,0x0E,0x0F,0x10,
- 0x12,0x13,0x14,0x15,0x17,0x18,0x19,0x1A,
- 0x1C,0x1D,0x1E,0x1F,0x21,0x22,0x23,0x24,
- 0x26,0x27,0x28,0x29,0x2B,0x2C,0x2D,0x2E
- };
- static const unsigned char ciphertext_256[16] =
- {
- 0x08,0x0E,0x95,0x17,0xEB,0x16,0x77,0x71,
- 0x9A,0xCF,0x72,0x80,0x86,0x04,0x0A,0xE3
- };
- rijndael_setkey (&ctx, key_256, sizeof(key_256));
- rijndael_encrypt (&ctx, scratch, plaintext_256);
- if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256)))
- return "AES-256 test encryption failed.";
- rijndael_decrypt (&ctx, scratch, scratch);
- if (memcmp (scratch, plaintext_256, sizeof (plaintext_256)))
- return "AES-256 test decryption failed.";
- return NULL;
- }
- /* Run all the self-tests and return NULL on success. This function
- is used for the on-the-fly self-tests. */
- static const char *
- selftest (void)
- {
- const char *r;
- if ( (r = selftest_basic_128 ())
- || (r = selftest_basic_192 ())
- || (r = selftest_basic_256 ()) )
- return r;
- return r;
- }
- /* SP800-38a.pdf for AES-128. */
- static const char *
- selftest_fips_128_38a (int requested_mode)
- {
- struct tv
- {
- int mode;
- const unsigned char key[16];
- const unsigned char iv[16];
- struct
- {
- const unsigned char input[16];
- const unsigned char output[16];
- } data[4];
- } tv[2] =
- {
- {
- GCRY_CIPHER_MODE_CFB, /* F.3.13, CFB128-AES128 */
- { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6,
- 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c },
- { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
- 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
- {
- { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96,
- 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a },
- { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20,
- 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } },
- { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c,
- 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 },
- { 0xc8, 0xa6, 0x45, 0x37, 0xa0, 0xb3, 0xa9, 0x3f,
- 0xcd, 0xe3, 0xcd, 0xad, 0x9f, 0x1c, 0xe5, 0x8b } },
- { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11,
- 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef },
- { 0x26, 0x75, 0x1f, 0x67, 0xa3, 0xcb, 0xb1, 0x40,
- 0xb1, 0x80, 0x8c, 0xf1, 0x87, 0xa4, 0xf4, 0xdf } },
- { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17,
- 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 },
- { 0xc0, 0x4b, 0x05, 0x35, 0x7c, 0x5d, 0x1c, 0x0e,
- 0xea, 0xc4, 0xc6, 0x6f, 0x9f, 0xf7, 0xf2, 0xe6 } }
- }
- },
- {
- GCRY_CIPHER_MODE_OFB,
- { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6,
- 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c },
- { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
- 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
- {
- { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96,
- 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a },
- { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20,
- 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } },
- { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c,
- 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 },
- { 0x77, 0x89, 0x50, 0x8d, 0x16, 0x91, 0x8f, 0x03,
- 0xf5, 0x3c, 0x52, 0xda, 0xc5, 0x4e, 0xd8, 0x25 } },
- { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11,
- 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef },
- { 0x97, 0x40, 0x05, 0x1e, 0x9c, 0x5f, 0xec, 0xf6,
- 0x43, 0x44, 0xf7, 0xa8, 0x22, 0x60, 0xed, 0xcc } },
- { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17,
- 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 },
- { 0x30, 0x4c, 0x65, 0x28, 0xf6, 0x59, 0xc7, 0x78,
- 0x66, 0xa5, 0x10, 0xd9, 0xc1, 0xd6, 0xae, 0x5e } },
- }
- }
- };
- unsigned char scratch[16];
- gpg_error_t err;
- int tvi, idx;
- gcry_cipher_hd_t hdenc = NULL;
- gcry_cipher_hd_t hddec = NULL;
- #define Fail(a) do { \
- _gcry_cipher_close (hdenc); \
- _gcry_cipher_close (hddec); \
- return a; \
- } while (0)
- gcry_assert (sizeof tv[0].data[0].input == sizeof scratch);
- gcry_assert (sizeof tv[0].data[0].output == sizeof scratch);
- for (tvi=0; tvi < DIM (tv); tvi++)
- if (tv[tvi].mode == requested_mode)
- break;
- if (tvi == DIM (tv))
- Fail ("no test data for this mode");
- err = _gcry_cipher_open (&hdenc, GCRY_CIPHER_AES, tv[tvi].mode, 0);
- if (err)
- Fail ("open");
- err = _gcry_cipher_open (&hddec, GCRY_CIPHER_AES, tv[tvi].mode, 0);
- if (err)
- Fail ("open");
- err = _gcry_cipher_setkey (hdenc, tv[tvi].key, sizeof tv[tvi].key);
- if (!err)
- err = _gcry_cipher_setkey (hddec, tv[tvi].key, sizeof tv[tvi].key);
- if (err)
- Fail ("set key");
- err = _gcry_cipher_setiv (hdenc, tv[tvi].iv, sizeof tv[tvi].iv);
- if (!err)
- err = _gcry_cipher_setiv (hddec, tv[tvi].iv, sizeof tv[tvi].iv);
- if (err)
- Fail ("set IV");
- for (idx=0; idx < DIM (tv[tvi].data); idx++)
- {
- err = _gcry_cipher_encrypt (hdenc, scratch, sizeof scratch,
- tv[tvi].data[idx].input,
- sizeof tv[tvi].data[idx].input);
- if (err)
- Fail ("encrypt command");
- if (memcmp (scratch, tv[tvi].data[idx].output, sizeof scratch))
- Fail ("encrypt mismatch");
- err = _gcry_cipher_decrypt (hddec, scratch, sizeof scratch,
- tv[tvi].data[idx].output,
- sizeof tv[tvi].data[idx].output);
- if (err)
- Fail ("decrypt command");
- if (memcmp (scratch, tv[tvi].data[idx].input, sizeof scratch))
- Fail ("decrypt mismatch");
- }
- #undef Fail
- _gcry_cipher_close (hdenc);
- _gcry_cipher_close (hddec);
- return NULL;
- }
- /* Complete selftest for AES-128 with all modes and driver code. */
- static gpg_err_code_t
- selftest_fips_128 (int extended, selftest_report_func_t report)
- {
- const char *what;
- const char *errtxt;
- what = "low-level";
- errtxt = selftest_basic_128 ();
- if (errtxt)
- goto failed;
- if (extended)
- {
- what = "cfb";
- errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_CFB);
- if (errtxt)
- goto failed;
- what = "ofb";
- errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_OFB);
- if (errtxt)
- goto failed;
- }
- return 0; /* Succeeded. */
- failed:
- if (report)
- report ("cipher", GCRY_CIPHER_AES128, what, errtxt);
- return GPG_ERR_SELFTEST_FAILED;
- }
- /* Complete selftest for AES-192. */
- static gpg_err_code_t
- selftest_fips_192 (int extended, selftest_report_func_t report)
- {
- const char *what;
- const char *errtxt;
- (void)extended; /* No extended tests available. */
- what = "low-level";
- errtxt = selftest_basic_192 ();
- if (errtxt)
- goto failed;
- return 0; /* Succeeded. */
- failed:
- if (report)
- report ("cipher", GCRY_CIPHER_AES192, what, errtxt);
- return GPG_ERR_SELFTEST_FAILED;
- }
- /* Complete selftest for AES-256. */
- static gpg_err_code_t
- selftest_fips_256 (int extended, selftest_report_func_t report)
- {
- const char *what;
- const char *errtxt;
- (void)extended; /* No extended tests available. */
- what = "low-level";
- errtxt = selftest_basic_256 ();
- if (errtxt)
- goto failed;
- return 0; /* Succeeded. */
- failed:
- if (report)
- report ("cipher", GCRY_CIPHER_AES256, what, errtxt);
- return GPG_ERR_SELFTEST_FAILED;
- }
- /* Run a full self-test for ALGO and return 0 on success. */
- static gpg_err_code_t
- run_selftests (int algo, int extended, selftest_report_func_t report)
- {
- gpg_err_code_t ec;
- switch (algo)
- {
- case GCRY_CIPHER_AES128:
- ec = selftest_fips_128 (extended, report);
- break;
- case GCRY_CIPHER_AES192:
- ec = selftest_fips_192 (extended, report);
- break;
- case GCRY_CIPHER_AES256:
- ec = selftest_fips_256 (extended, report);
- break;
- default:
- ec = GPG_ERR_CIPHER_ALGO;
- break;
- }
- return ec;
- }
- static const char *rijndael_names[] =
- {
- "RIJNDAEL",
- "AES128",
- "AES-128",
- NULL
- };
- static gcry_cipher_oid_spec_t rijndael_oids[] =
- {
- { "2.16.840.1.101.3.4.1.1", GCRY_CIPHER_MODE_ECB },
- { "2.16.840.1.101.3.4.1.2", GCRY_CIPHER_MODE_CBC },
- { "2.16.840.1.101.3.4.1.3", GCRY_CIPHER_MODE_OFB },
- { "2.16.840.1.101.3.4.1.4", GCRY_CIPHER_MODE_CFB },
- { NULL }
- };
- gcry_cipher_spec_t _gcry_cipher_spec_aes =
- {
- "AES", rijndael_names, rijndael_oids, 16, 128, sizeof (RIJNDAEL_context),
- rijndael_setkey, rijndael_encrypt, rijndael_decrypt
- };
- cipher_extra_spec_t _gcry_cipher_extraspec_aes =
- {
- run_selftests
- };
- static const char *rijndael192_names[] =
- {
- "RIJNDAEL192",
- "AES-192",
- NULL
- };
- static gcry_cipher_oid_spec_t rijndael192_oids[] =
- {
- { "2.16.840.1.101.3.4.1.21", GCRY_CIPHER_MODE_ECB },
- { "2.16.840.1.101.3.4.1.22", GCRY_CIPHER_MODE_CBC },
- { "2.16.840.1.101.3.4.1.23", GCRY_CIPHER_MODE_OFB },
- { "2.16.840.1.101.3.4.1.24", GCRY_CIPHER_MODE_CFB },
- { NULL }
- };
- gcry_cipher_spec_t _gcry_cipher_spec_aes192 =
- {
- "AES192", rijndael192_names, rijndael192_oids, 16, 192, sizeof (RIJNDAEL_context),
- rijndael_setkey, rijndael_encrypt, rijndael_decrypt
- };
- cipher_extra_spec_t _gcry_cipher_extraspec_aes192 =
- {
- run_selftests
- };
- static const char *rijndael256_names[] =
- {
- "RIJNDAEL256",
- "AES-256",
- NULL
- };
- static gcry_cipher_oid_spec_t rijndael256_oids[] =
- {
- { "2.16.840.1.101.3.4.1.41", GCRY_CIPHER_MODE_ECB },
- { "2.16.840.1.101.3.4.1.42", GCRY_CIPHER_MODE_CBC },
- { "2.16.840.1.101.3.4.1.43", GCRY_CIPHER_MODE_OFB },
- { "2.16.840.1.101.3.4.1.44", GCRY_CIPHER_MODE_CFB },
- { NULL }
- };
- gcry_cipher_spec_t _gcry_cipher_spec_aes256 =
- {
- "AES256", rijndael256_names, rijndael256_oids, 16, 256,
- sizeof (RIJNDAEL_context),
- rijndael_setkey, rijndael_encrypt, rijndael_decrypt
- };
- cipher_extra_spec_t _gcry_cipher_extraspec_aes256 =
- {
- run_selftests
- };
|