aesp8-ppc.pl 93 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847
  1. #! /usr/bin/env perl
  2. # SPDX-License-Identifier: GPL-2.0
  3. # This code is taken from CRYPTOGAMs[1] and is included here using the option
  4. # in the license to distribute the code under the GPL. Therefore this program
  5. # is free software; you can redistribute it and/or modify it under the terms of
  6. # the GNU General Public License version 2 as published by the Free Software
  7. # Foundation.
  8. #
  9. # [1] https://www.openssl.org/~appro/cryptogams/
  10. # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
  11. # All rights reserved.
  12. #
  13. # Redistribution and use in source and binary forms, with or without
  14. # modification, are permitted provided that the following conditions
  15. # are met:
  16. #
  17. # * Redistributions of source code must retain copyright notices,
  18. # this list of conditions and the following disclaimer.
  19. #
  20. # * Redistributions in binary form must reproduce the above
  21. # copyright notice, this list of conditions and the following
  22. # disclaimer in the documentation and/or other materials
  23. # provided with the distribution.
  24. #
  25. # * Neither the name of the CRYPTOGAMS nor the names of its
  26. # copyright holder and contributors may be used to endorse or
  27. # promote products derived from this software without specific
  28. # prior written permission.
  29. #
  30. # ALTERNATIVELY, provided that this notice is retained in full, this
  31. # product may be distributed under the terms of the GNU General Public
  32. # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
  33. # those given above.
  34. #
  35. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
  36. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  37. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  38. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  39. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  40. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  41. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  42. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  43. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  44. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  45. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  46. # ====================================================================
  47. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  48. # project. The module is, however, dual licensed under OpenSSL and
  49. # CRYPTOGAMS licenses depending on where you obtain it. For further
  50. # details see http://www.openssl.org/~appro/cryptogams/.
  51. # ====================================================================
  52. #
  53. # This module implements support for AES instructions as per PowerISA
  54. # specification version 2.07, first implemented by POWER8 processor.
  55. # The module is endian-agnostic in sense that it supports both big-
  56. # and little-endian cases. Data alignment in parallelizable modes is
  57. # handled with VSX loads and stores, which implies MSR.VSX flag being
  58. # set. It should also be noted that ISA specification doesn't prohibit
  59. # alignment exceptions for these instructions on page boundaries.
  60. # Initially alignment was handled in pure AltiVec/VMX way [when data
  61. # is aligned programmatically, which in turn guarantees exception-
  62. # free execution], but it turned to hamper performance when vcipher
  63. # instructions are interleaved. It's reckoned that eventual
  64. # misalignment penalties at page boundaries are in average lower
  65. # than additional overhead in pure AltiVec approach.
  66. #
  67. # May 2016
  68. #
  69. # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
  70. # systems were measured.
  71. #
  72. ######################################################################
  73. # Current large-block performance in cycles per byte processed with
  74. # 128-bit key (less is better).
  75. #
  76. # CBC en-/decrypt CTR XTS
  77. # POWER8[le] 3.96/0.72 0.74 1.1
  78. # POWER8[be] 3.75/0.65 0.66 1.0
  79. $flavour = shift;
  80. if ($flavour =~ /64/) {
  81. $SIZE_T =8;
  82. $LRSAVE =2*$SIZE_T;
  83. $STU ="stdu";
  84. $POP ="ld";
  85. $PUSH ="std";
  86. $UCMP ="cmpld";
  87. $SHL ="sldi";
  88. } elsif ($flavour =~ /32/) {
  89. $SIZE_T =4;
  90. $LRSAVE =$SIZE_T;
  91. $STU ="stwu";
  92. $POP ="lwz";
  93. $PUSH ="stw";
  94. $UCMP ="cmplw";
  95. $SHL ="slwi";
  96. } else { die "nonsense $flavour"; }
  97. $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
  98. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  99. ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
  100. ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
  101. die "can't locate ppc-xlate.pl";
  102. open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
  103. $FRAME=8*$SIZE_T;
  104. $prefix="aes_p8";
  105. $sp="r1";
  106. $vrsave="r12";
  107. #########################################################################
  108. {{{ # Key setup procedures #
  109. my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
  110. my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
  111. my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
  112. $code.=<<___;
  113. .machine "any"
  114. .text
  115. .align 7
  116. rcon:
  117. .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
  118. .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
  119. .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
  120. .long 0,0,0,0 ?asis
  121. Lconsts:
  122. mflr r0
  123. bcl 20,31,\$+4
  124. mflr $ptr #vvvvv "distance between . and rcon
  125. addi $ptr,$ptr,-0x48
  126. mtlr r0
  127. blr
  128. .long 0
  129. .byte 0,12,0x14,0,0,0,0,0
  130. .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
  131. .globl .${prefix}_set_encrypt_key
  132. Lset_encrypt_key:
  133. mflr r11
  134. $PUSH r11,$LRSAVE($sp)
  135. li $ptr,-1
  136. ${UCMP}i $inp,0
  137. beq- Lenc_key_abort # if ($inp==0) return -1;
  138. ${UCMP}i $out,0
  139. beq- Lenc_key_abort # if ($out==0) return -1;
  140. li $ptr,-2
  141. cmpwi $bits,128
  142. blt- Lenc_key_abort
  143. cmpwi $bits,256
  144. bgt- Lenc_key_abort
  145. andi. r0,$bits,0x3f
  146. bne- Lenc_key_abort
  147. lis r0,0xfff0
  148. mfspr $vrsave,256
  149. mtspr 256,r0
  150. bl Lconsts
  151. mtlr r11
  152. neg r9,$inp
  153. lvx $in0,0,$inp
  154. addi $inp,$inp,15 # 15 is not typo
  155. lvsr $key,0,r9 # borrow $key
  156. li r8,0x20
  157. cmpwi $bits,192
  158. lvx $in1,0,$inp
  159. le?vspltisb $mask,0x0f # borrow $mask
  160. lvx $rcon,0,$ptr
  161. le?vxor $key,$key,$mask # adjust for byte swap
  162. lvx $mask,r8,$ptr
  163. addi $ptr,$ptr,0x10
  164. vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
  165. li $cnt,8
  166. vxor $zero,$zero,$zero
  167. mtctr $cnt
  168. ?lvsr $outperm,0,$out
  169. vspltisb $outmask,-1
  170. lvx $outhead,0,$out
  171. ?vperm $outmask,$zero,$outmask,$outperm
  172. blt Loop128
  173. addi $inp,$inp,8
  174. beq L192
  175. addi $inp,$inp,8
  176. b L256
  177. .align 4
  178. Loop128:
  179. vperm $key,$in0,$in0,$mask # rotate-n-splat
  180. vsldoi $tmp,$zero,$in0,12 # >>32
  181. vperm $outtail,$in0,$in0,$outperm # rotate
  182. vsel $stage,$outhead,$outtail,$outmask
  183. vmr $outhead,$outtail
  184. vcipherlast $key,$key,$rcon
  185. stvx $stage,0,$out
  186. addi $out,$out,16
  187. vxor $in0,$in0,$tmp
  188. vsldoi $tmp,$zero,$tmp,12 # >>32
  189. vxor $in0,$in0,$tmp
  190. vsldoi $tmp,$zero,$tmp,12 # >>32
  191. vxor $in0,$in0,$tmp
  192. vadduwm $rcon,$rcon,$rcon
  193. vxor $in0,$in0,$key
  194. bdnz Loop128
  195. lvx $rcon,0,$ptr # last two round keys
  196. vperm $key,$in0,$in0,$mask # rotate-n-splat
  197. vsldoi $tmp,$zero,$in0,12 # >>32
  198. vperm $outtail,$in0,$in0,$outperm # rotate
  199. vsel $stage,$outhead,$outtail,$outmask
  200. vmr $outhead,$outtail
  201. vcipherlast $key,$key,$rcon
  202. stvx $stage,0,$out
  203. addi $out,$out,16
  204. vxor $in0,$in0,$tmp
  205. vsldoi $tmp,$zero,$tmp,12 # >>32
  206. vxor $in0,$in0,$tmp
  207. vsldoi $tmp,$zero,$tmp,12 # >>32
  208. vxor $in0,$in0,$tmp
  209. vadduwm $rcon,$rcon,$rcon
  210. vxor $in0,$in0,$key
  211. vperm $key,$in0,$in0,$mask # rotate-n-splat
  212. vsldoi $tmp,$zero,$in0,12 # >>32
  213. vperm $outtail,$in0,$in0,$outperm # rotate
  214. vsel $stage,$outhead,$outtail,$outmask
  215. vmr $outhead,$outtail
  216. vcipherlast $key,$key,$rcon
  217. stvx $stage,0,$out
  218. addi $out,$out,16
  219. vxor $in0,$in0,$tmp
  220. vsldoi $tmp,$zero,$tmp,12 # >>32
  221. vxor $in0,$in0,$tmp
  222. vsldoi $tmp,$zero,$tmp,12 # >>32
  223. vxor $in0,$in0,$tmp
  224. vxor $in0,$in0,$key
  225. vperm $outtail,$in0,$in0,$outperm # rotate
  226. vsel $stage,$outhead,$outtail,$outmask
  227. vmr $outhead,$outtail
  228. stvx $stage,0,$out
  229. addi $inp,$out,15 # 15 is not typo
  230. addi $out,$out,0x50
  231. li $rounds,10
  232. b Ldone
  233. .align 4
  234. L192:
  235. lvx $tmp,0,$inp
  236. li $cnt,4
  237. vperm $outtail,$in0,$in0,$outperm # rotate
  238. vsel $stage,$outhead,$outtail,$outmask
  239. vmr $outhead,$outtail
  240. stvx $stage,0,$out
  241. addi $out,$out,16
  242. vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
  243. vspltisb $key,8 # borrow $key
  244. mtctr $cnt
  245. vsububm $mask,$mask,$key # adjust the mask
  246. Loop192:
  247. vperm $key,$in1,$in1,$mask # roate-n-splat
  248. vsldoi $tmp,$zero,$in0,12 # >>32
  249. vcipherlast $key,$key,$rcon
  250. vxor $in0,$in0,$tmp
  251. vsldoi $tmp,$zero,$tmp,12 # >>32
  252. vxor $in0,$in0,$tmp
  253. vsldoi $tmp,$zero,$tmp,12 # >>32
  254. vxor $in0,$in0,$tmp
  255. vsldoi $stage,$zero,$in1,8
  256. vspltw $tmp,$in0,3
  257. vxor $tmp,$tmp,$in1
  258. vsldoi $in1,$zero,$in1,12 # >>32
  259. vadduwm $rcon,$rcon,$rcon
  260. vxor $in1,$in1,$tmp
  261. vxor $in0,$in0,$key
  262. vxor $in1,$in1,$key
  263. vsldoi $stage,$stage,$in0,8
  264. vperm $key,$in1,$in1,$mask # rotate-n-splat
  265. vsldoi $tmp,$zero,$in0,12 # >>32
  266. vperm $outtail,$stage,$stage,$outperm # rotate
  267. vsel $stage,$outhead,$outtail,$outmask
  268. vmr $outhead,$outtail
  269. vcipherlast $key,$key,$rcon
  270. stvx $stage,0,$out
  271. addi $out,$out,16
  272. vsldoi $stage,$in0,$in1,8
  273. vxor $in0,$in0,$tmp
  274. vsldoi $tmp,$zero,$tmp,12 # >>32
  275. vperm $outtail,$stage,$stage,$outperm # rotate
  276. vsel $stage,$outhead,$outtail,$outmask
  277. vmr $outhead,$outtail
  278. vxor $in0,$in0,$tmp
  279. vsldoi $tmp,$zero,$tmp,12 # >>32
  280. vxor $in0,$in0,$tmp
  281. stvx $stage,0,$out
  282. addi $out,$out,16
  283. vspltw $tmp,$in0,3
  284. vxor $tmp,$tmp,$in1
  285. vsldoi $in1,$zero,$in1,12 # >>32
  286. vadduwm $rcon,$rcon,$rcon
  287. vxor $in1,$in1,$tmp
  288. vxor $in0,$in0,$key
  289. vxor $in1,$in1,$key
  290. vperm $outtail,$in0,$in0,$outperm # rotate
  291. vsel $stage,$outhead,$outtail,$outmask
  292. vmr $outhead,$outtail
  293. stvx $stage,0,$out
  294. addi $inp,$out,15 # 15 is not typo
  295. addi $out,$out,16
  296. bdnz Loop192
  297. li $rounds,12
  298. addi $out,$out,0x20
  299. b Ldone
  300. .align 4
  301. L256:
  302. lvx $tmp,0,$inp
  303. li $cnt,7
  304. li $rounds,14
  305. vperm $outtail,$in0,$in0,$outperm # rotate
  306. vsel $stage,$outhead,$outtail,$outmask
  307. vmr $outhead,$outtail
  308. stvx $stage,0,$out
  309. addi $out,$out,16
  310. vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
  311. mtctr $cnt
  312. Loop256:
  313. vperm $key,$in1,$in1,$mask # rotate-n-splat
  314. vsldoi $tmp,$zero,$in0,12 # >>32
  315. vperm $outtail,$in1,$in1,$outperm # rotate
  316. vsel $stage,$outhead,$outtail,$outmask
  317. vmr $outhead,$outtail
  318. vcipherlast $key,$key,$rcon
  319. stvx $stage,0,$out
  320. addi $out,$out,16
  321. vxor $in0,$in0,$tmp
  322. vsldoi $tmp,$zero,$tmp,12 # >>32
  323. vxor $in0,$in0,$tmp
  324. vsldoi $tmp,$zero,$tmp,12 # >>32
  325. vxor $in0,$in0,$tmp
  326. vadduwm $rcon,$rcon,$rcon
  327. vxor $in0,$in0,$key
  328. vperm $outtail,$in0,$in0,$outperm # rotate
  329. vsel $stage,$outhead,$outtail,$outmask
  330. vmr $outhead,$outtail
  331. stvx $stage,0,$out
  332. addi $inp,$out,15 # 15 is not typo
  333. addi $out,$out,16
  334. bdz Ldone
  335. vspltw $key,$in0,3 # just splat
  336. vsldoi $tmp,$zero,$in1,12 # >>32
  337. vsbox $key,$key
  338. vxor $in1,$in1,$tmp
  339. vsldoi $tmp,$zero,$tmp,12 # >>32
  340. vxor $in1,$in1,$tmp
  341. vsldoi $tmp,$zero,$tmp,12 # >>32
  342. vxor $in1,$in1,$tmp
  343. vxor $in1,$in1,$key
  344. b Loop256
  345. .align 4
  346. Ldone:
  347. lvx $in1,0,$inp # redundant in aligned case
  348. vsel $in1,$outhead,$in1,$outmask
  349. stvx $in1,0,$inp
  350. li $ptr,0
  351. mtspr 256,$vrsave
  352. stw $rounds,0($out)
  353. Lenc_key_abort:
  354. mr r3,$ptr
  355. blr
  356. .long 0
  357. .byte 0,12,0x14,1,0,0,3,0
  358. .long 0
  359. .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
  360. .globl .${prefix}_set_decrypt_key
  361. $STU $sp,-$FRAME($sp)
  362. mflr r10
  363. $PUSH r10,$FRAME+$LRSAVE($sp)
  364. bl Lset_encrypt_key
  365. mtlr r10
  366. cmpwi r3,0
  367. bne- Ldec_key_abort
  368. slwi $cnt,$rounds,4
  369. subi $inp,$out,240 # first round key
  370. srwi $rounds,$rounds,1
  371. add $out,$inp,$cnt # last round key
  372. mtctr $rounds
  373. Ldeckey:
  374. lwz r0, 0($inp)
  375. lwz r6, 4($inp)
  376. lwz r7, 8($inp)
  377. lwz r8, 12($inp)
  378. addi $inp,$inp,16
  379. lwz r9, 0($out)
  380. lwz r10,4($out)
  381. lwz r11,8($out)
  382. lwz r12,12($out)
  383. stw r0, 0($out)
  384. stw r6, 4($out)
  385. stw r7, 8($out)
  386. stw r8, 12($out)
  387. subi $out,$out,16
  388. stw r9, -16($inp)
  389. stw r10,-12($inp)
  390. stw r11,-8($inp)
  391. stw r12,-4($inp)
  392. bdnz Ldeckey
  393. xor r3,r3,r3 # return value
  394. Ldec_key_abort:
  395. addi $sp,$sp,$FRAME
  396. blr
  397. .long 0
  398. .byte 0,12,4,1,0x80,0,3,0
  399. .long 0
  400. .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
  401. ___
  402. }}}
  403. #########################################################################
  404. {{{ # Single block en- and decrypt procedures #
  405. sub gen_block () {
  406. my $dir = shift;
  407. my $n = $dir eq "de" ? "n" : "";
  408. my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
  409. $code.=<<___;
  410. .globl .${prefix}_${dir}crypt
  411. lwz $rounds,240($key)
  412. lis r0,0xfc00
  413. mfspr $vrsave,256
  414. li $idx,15 # 15 is not typo
  415. mtspr 256,r0
  416. lvx v0,0,$inp
  417. neg r11,$out
  418. lvx v1,$idx,$inp
  419. lvsl v2,0,$inp # inpperm
  420. le?vspltisb v4,0x0f
  421. ?lvsl v3,0,r11 # outperm
  422. le?vxor v2,v2,v4
  423. li $idx,16
  424. vperm v0,v0,v1,v2 # align [and byte swap in LE]
  425. lvx v1,0,$key
  426. ?lvsl v5,0,$key # keyperm
  427. srwi $rounds,$rounds,1
  428. lvx v2,$idx,$key
  429. addi $idx,$idx,16
  430. subi $rounds,$rounds,1
  431. ?vperm v1,v1,v2,v5 # align round key
  432. vxor v0,v0,v1
  433. lvx v1,$idx,$key
  434. addi $idx,$idx,16
  435. mtctr $rounds
  436. Loop_${dir}c:
  437. ?vperm v2,v2,v1,v5
  438. v${n}cipher v0,v0,v2
  439. lvx v2,$idx,$key
  440. addi $idx,$idx,16
  441. ?vperm v1,v1,v2,v5
  442. v${n}cipher v0,v0,v1
  443. lvx v1,$idx,$key
  444. addi $idx,$idx,16
  445. bdnz Loop_${dir}c
  446. ?vperm v2,v2,v1,v5
  447. v${n}cipher v0,v0,v2
  448. lvx v2,$idx,$key
  449. ?vperm v1,v1,v2,v5
  450. v${n}cipherlast v0,v0,v1
  451. vspltisb v2,-1
  452. vxor v1,v1,v1
  453. li $idx,15 # 15 is not typo
  454. ?vperm v2,v1,v2,v3 # outmask
  455. le?vxor v3,v3,v4
  456. lvx v1,0,$out # outhead
  457. vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
  458. vsel v1,v1,v0,v2
  459. lvx v4,$idx,$out
  460. stvx v1,0,$out
  461. vsel v0,v0,v4,v2
  462. stvx v0,$idx,$out
  463. mtspr 256,$vrsave
  464. blr
  465. .long 0
  466. .byte 0,12,0x14,0,0,0,3,0
  467. .long 0
  468. .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
  469. ___
  470. }
  471. &gen_block("en");
  472. &gen_block("de");
  473. }}}
  474. #########################################################################
  475. {{{ # CBC en- and decrypt procedures #
  476. my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
  477. my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
  478. my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
  479. map("v$_",(4..10));
  480. $code.=<<___;
  481. .globl .${prefix}_cbc_encrypt
  482. ${UCMP}i $len,16
  483. bltlr-
  484. cmpwi $enc,0 # test direction
  485. lis r0,0xffe0
  486. mfspr $vrsave,256
  487. mtspr 256,r0
  488. li $idx,15
  489. vxor $rndkey0,$rndkey0,$rndkey0
  490. le?vspltisb $tmp,0x0f
  491. lvx $ivec,0,$ivp # load [unaligned] iv
  492. lvsl $inpperm,0,$ivp
  493. lvx $inptail,$idx,$ivp
  494. le?vxor $inpperm,$inpperm,$tmp
  495. vperm $ivec,$ivec,$inptail,$inpperm
  496. neg r11,$inp
  497. ?lvsl $keyperm,0,$key # prepare for unaligned key
  498. lwz $rounds,240($key)
  499. lvsr $inpperm,0,r11 # prepare for unaligned load
  500. lvx $inptail,0,$inp
  501. addi $inp,$inp,15 # 15 is not typo
  502. le?vxor $inpperm,$inpperm,$tmp
  503. ?lvsr $outperm,0,$out # prepare for unaligned store
  504. vspltisb $outmask,-1
  505. lvx $outhead,0,$out
  506. ?vperm $outmask,$rndkey0,$outmask,$outperm
  507. le?vxor $outperm,$outperm,$tmp
  508. srwi $rounds,$rounds,1
  509. li $idx,16
  510. subi $rounds,$rounds,1
  511. beq Lcbc_dec
  512. Lcbc_enc:
  513. vmr $inout,$inptail
  514. lvx $inptail,0,$inp
  515. addi $inp,$inp,16
  516. mtctr $rounds
  517. subi $len,$len,16 # len-=16
  518. lvx $rndkey0,0,$key
  519. vperm $inout,$inout,$inptail,$inpperm
  520. lvx $rndkey1,$idx,$key
  521. addi $idx,$idx,16
  522. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  523. vxor $inout,$inout,$rndkey0
  524. lvx $rndkey0,$idx,$key
  525. addi $idx,$idx,16
  526. vxor $inout,$inout,$ivec
  527. Loop_cbc_enc:
  528. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  529. vcipher $inout,$inout,$rndkey1
  530. lvx $rndkey1,$idx,$key
  531. addi $idx,$idx,16
  532. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  533. vcipher $inout,$inout,$rndkey0
  534. lvx $rndkey0,$idx,$key
  535. addi $idx,$idx,16
  536. bdnz Loop_cbc_enc
  537. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  538. vcipher $inout,$inout,$rndkey1
  539. lvx $rndkey1,$idx,$key
  540. li $idx,16
  541. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  542. vcipherlast $ivec,$inout,$rndkey0
  543. ${UCMP}i $len,16
  544. vperm $tmp,$ivec,$ivec,$outperm
  545. vsel $inout,$outhead,$tmp,$outmask
  546. vmr $outhead,$tmp
  547. stvx $inout,0,$out
  548. addi $out,$out,16
  549. bge Lcbc_enc
  550. b Lcbc_done
  551. .align 4
  552. Lcbc_dec:
  553. ${UCMP}i $len,128
  554. bge _aesp8_cbc_decrypt8x
  555. vmr $tmp,$inptail
  556. lvx $inptail,0,$inp
  557. addi $inp,$inp,16
  558. mtctr $rounds
  559. subi $len,$len,16 # len-=16
  560. lvx $rndkey0,0,$key
  561. vperm $tmp,$tmp,$inptail,$inpperm
  562. lvx $rndkey1,$idx,$key
  563. addi $idx,$idx,16
  564. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  565. vxor $inout,$tmp,$rndkey0
  566. lvx $rndkey0,$idx,$key
  567. addi $idx,$idx,16
  568. Loop_cbc_dec:
  569. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  570. vncipher $inout,$inout,$rndkey1
  571. lvx $rndkey1,$idx,$key
  572. addi $idx,$idx,16
  573. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  574. vncipher $inout,$inout,$rndkey0
  575. lvx $rndkey0,$idx,$key
  576. addi $idx,$idx,16
  577. bdnz Loop_cbc_dec
  578. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  579. vncipher $inout,$inout,$rndkey1
  580. lvx $rndkey1,$idx,$key
  581. li $idx,16
  582. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  583. vncipherlast $inout,$inout,$rndkey0
  584. ${UCMP}i $len,16
  585. vxor $inout,$inout,$ivec
  586. vmr $ivec,$tmp
  587. vperm $tmp,$inout,$inout,$outperm
  588. vsel $inout,$outhead,$tmp,$outmask
  589. vmr $outhead,$tmp
  590. stvx $inout,0,$out
  591. addi $out,$out,16
  592. bge Lcbc_dec
  593. Lcbc_done:
  594. addi $out,$out,-1
  595. lvx $inout,0,$out # redundant in aligned case
  596. vsel $inout,$outhead,$inout,$outmask
  597. stvx $inout,0,$out
  598. neg $enc,$ivp # write [unaligned] iv
  599. li $idx,15 # 15 is not typo
  600. vxor $rndkey0,$rndkey0,$rndkey0
  601. vspltisb $outmask,-1
  602. le?vspltisb $tmp,0x0f
  603. ?lvsl $outperm,0,$enc
  604. ?vperm $outmask,$rndkey0,$outmask,$outperm
  605. le?vxor $outperm,$outperm,$tmp
  606. lvx $outhead,0,$ivp
  607. vperm $ivec,$ivec,$ivec,$outperm
  608. vsel $inout,$outhead,$ivec,$outmask
  609. lvx $inptail,$idx,$ivp
  610. stvx $inout,0,$ivp
  611. vsel $inout,$ivec,$inptail,$outmask
  612. stvx $inout,$idx,$ivp
  613. mtspr 256,$vrsave
  614. blr
  615. .long 0
  616. .byte 0,12,0x14,0,0,0,6,0
  617. .long 0
  618. ___
  619. #########################################################################
  620. {{ # Optimized CBC decrypt procedure #
  621. my $key_="r11";
  622. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
  623. my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
  624. my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
  625. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  626. # v26-v31 last 6 round keys
  627. my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
  628. $code.=<<___;
  629. .align 5
  630. _aesp8_cbc_decrypt8x:
  631. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  632. li r10,`$FRAME+8*16+15`
  633. li r11,`$FRAME+8*16+31`
  634. stvx v20,r10,$sp # ABI says so
  635. addi r10,r10,32
  636. stvx v21,r11,$sp
  637. addi r11,r11,32
  638. stvx v22,r10,$sp
  639. addi r10,r10,32
  640. stvx v23,r11,$sp
  641. addi r11,r11,32
  642. stvx v24,r10,$sp
  643. addi r10,r10,32
  644. stvx v25,r11,$sp
  645. addi r11,r11,32
  646. stvx v26,r10,$sp
  647. addi r10,r10,32
  648. stvx v27,r11,$sp
  649. addi r11,r11,32
  650. stvx v28,r10,$sp
  651. addi r10,r10,32
  652. stvx v29,r11,$sp
  653. addi r11,r11,32
  654. stvx v30,r10,$sp
  655. stvx v31,r11,$sp
  656. li r0,-1
  657. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  658. li $x10,0x10
  659. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  660. li $x20,0x20
  661. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  662. li $x30,0x30
  663. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  664. li $x40,0x40
  665. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  666. li $x50,0x50
  667. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  668. li $x60,0x60
  669. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  670. li $x70,0x70
  671. mtspr 256,r0
  672. subi $rounds,$rounds,3 # -4 in total
  673. subi $len,$len,128 # bias
  674. lvx $rndkey0,$x00,$key # load key schedule
  675. lvx v30,$x10,$key
  676. addi $key,$key,0x20
  677. lvx v31,$x00,$key
  678. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  679. addi $key_,$sp,$FRAME+15
  680. mtctr $rounds
  681. Load_cbc_dec_key:
  682. ?vperm v24,v30,v31,$keyperm
  683. lvx v30,$x10,$key
  684. addi $key,$key,0x20
  685. stvx v24,$x00,$key_ # off-load round[1]
  686. ?vperm v25,v31,v30,$keyperm
  687. lvx v31,$x00,$key
  688. stvx v25,$x10,$key_ # off-load round[2]
  689. addi $key_,$key_,0x20
  690. bdnz Load_cbc_dec_key
  691. lvx v26,$x10,$key
  692. ?vperm v24,v30,v31,$keyperm
  693. lvx v27,$x20,$key
  694. stvx v24,$x00,$key_ # off-load round[3]
  695. ?vperm v25,v31,v26,$keyperm
  696. lvx v28,$x30,$key
  697. stvx v25,$x10,$key_ # off-load round[4]
  698. addi $key_,$sp,$FRAME+15 # rewind $key_
  699. ?vperm v26,v26,v27,$keyperm
  700. lvx v29,$x40,$key
  701. ?vperm v27,v27,v28,$keyperm
  702. lvx v30,$x50,$key
  703. ?vperm v28,v28,v29,$keyperm
  704. lvx v31,$x60,$key
  705. ?vperm v29,v29,v30,$keyperm
  706. lvx $out0,$x70,$key # borrow $out0
  707. ?vperm v30,v30,v31,$keyperm
  708. lvx v24,$x00,$key_ # pre-load round[1]
  709. ?vperm v31,v31,$out0,$keyperm
  710. lvx v25,$x10,$key_ # pre-load round[2]
  711. #lvx $inptail,0,$inp # "caller" already did this
  712. #addi $inp,$inp,15 # 15 is not typo
  713. subi $inp,$inp,15 # undo "caller"
  714. le?li $idx,8
  715. lvx_u $in0,$x00,$inp # load first 8 "words"
  716. le?lvsl $inpperm,0,$idx
  717. le?vspltisb $tmp,0x0f
  718. lvx_u $in1,$x10,$inp
  719. le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
  720. lvx_u $in2,$x20,$inp
  721. le?vperm $in0,$in0,$in0,$inpperm
  722. lvx_u $in3,$x30,$inp
  723. le?vperm $in1,$in1,$in1,$inpperm
  724. lvx_u $in4,$x40,$inp
  725. le?vperm $in2,$in2,$in2,$inpperm
  726. vxor $out0,$in0,$rndkey0
  727. lvx_u $in5,$x50,$inp
  728. le?vperm $in3,$in3,$in3,$inpperm
  729. vxor $out1,$in1,$rndkey0
  730. lvx_u $in6,$x60,$inp
  731. le?vperm $in4,$in4,$in4,$inpperm
  732. vxor $out2,$in2,$rndkey0
  733. lvx_u $in7,$x70,$inp
  734. addi $inp,$inp,0x80
  735. le?vperm $in5,$in5,$in5,$inpperm
  736. vxor $out3,$in3,$rndkey0
  737. le?vperm $in6,$in6,$in6,$inpperm
  738. vxor $out4,$in4,$rndkey0
  739. le?vperm $in7,$in7,$in7,$inpperm
  740. vxor $out5,$in5,$rndkey0
  741. vxor $out6,$in6,$rndkey0
  742. vxor $out7,$in7,$rndkey0
  743. mtctr $rounds
  744. b Loop_cbc_dec8x
  745. .align 5
  746. Loop_cbc_dec8x:
  747. vncipher $out0,$out0,v24
  748. vncipher $out1,$out1,v24
  749. vncipher $out2,$out2,v24
  750. vncipher $out3,$out3,v24
  751. vncipher $out4,$out4,v24
  752. vncipher $out5,$out5,v24
  753. vncipher $out6,$out6,v24
  754. vncipher $out7,$out7,v24
  755. lvx v24,$x20,$key_ # round[3]
  756. addi $key_,$key_,0x20
  757. vncipher $out0,$out0,v25
  758. vncipher $out1,$out1,v25
  759. vncipher $out2,$out2,v25
  760. vncipher $out3,$out3,v25
  761. vncipher $out4,$out4,v25
  762. vncipher $out5,$out5,v25
  763. vncipher $out6,$out6,v25
  764. vncipher $out7,$out7,v25
  765. lvx v25,$x10,$key_ # round[4]
  766. bdnz Loop_cbc_dec8x
  767. subic $len,$len,128 # $len-=128
  768. vncipher $out0,$out0,v24
  769. vncipher $out1,$out1,v24
  770. vncipher $out2,$out2,v24
  771. vncipher $out3,$out3,v24
  772. vncipher $out4,$out4,v24
  773. vncipher $out5,$out5,v24
  774. vncipher $out6,$out6,v24
  775. vncipher $out7,$out7,v24
  776. subfe. r0,r0,r0 # borrow?-1:0
  777. vncipher $out0,$out0,v25
  778. vncipher $out1,$out1,v25
  779. vncipher $out2,$out2,v25
  780. vncipher $out3,$out3,v25
  781. vncipher $out4,$out4,v25
  782. vncipher $out5,$out5,v25
  783. vncipher $out6,$out6,v25
  784. vncipher $out7,$out7,v25
  785. and r0,r0,$len
  786. vncipher $out0,$out0,v26
  787. vncipher $out1,$out1,v26
  788. vncipher $out2,$out2,v26
  789. vncipher $out3,$out3,v26
  790. vncipher $out4,$out4,v26
  791. vncipher $out5,$out5,v26
  792. vncipher $out6,$out6,v26
  793. vncipher $out7,$out7,v26
  794. add $inp,$inp,r0 # $inp is adjusted in such
  795. # way that at exit from the
  796. # loop inX-in7 are loaded
  797. # with last "words"
  798. vncipher $out0,$out0,v27
  799. vncipher $out1,$out1,v27
  800. vncipher $out2,$out2,v27
  801. vncipher $out3,$out3,v27
  802. vncipher $out4,$out4,v27
  803. vncipher $out5,$out5,v27
  804. vncipher $out6,$out6,v27
  805. vncipher $out7,$out7,v27
  806. addi $key_,$sp,$FRAME+15 # rewind $key_
  807. vncipher $out0,$out0,v28
  808. vncipher $out1,$out1,v28
  809. vncipher $out2,$out2,v28
  810. vncipher $out3,$out3,v28
  811. vncipher $out4,$out4,v28
  812. vncipher $out5,$out5,v28
  813. vncipher $out6,$out6,v28
  814. vncipher $out7,$out7,v28
  815. lvx v24,$x00,$key_ # re-pre-load round[1]
  816. vncipher $out0,$out0,v29
  817. vncipher $out1,$out1,v29
  818. vncipher $out2,$out2,v29
  819. vncipher $out3,$out3,v29
  820. vncipher $out4,$out4,v29
  821. vncipher $out5,$out5,v29
  822. vncipher $out6,$out6,v29
  823. vncipher $out7,$out7,v29
  824. lvx v25,$x10,$key_ # re-pre-load round[2]
  825. vncipher $out0,$out0,v30
  826. vxor $ivec,$ivec,v31 # xor with last round key
  827. vncipher $out1,$out1,v30
  828. vxor $in0,$in0,v31
  829. vncipher $out2,$out2,v30
  830. vxor $in1,$in1,v31
  831. vncipher $out3,$out3,v30
  832. vxor $in2,$in2,v31
  833. vncipher $out4,$out4,v30
  834. vxor $in3,$in3,v31
  835. vncipher $out5,$out5,v30
  836. vxor $in4,$in4,v31
  837. vncipher $out6,$out6,v30
  838. vxor $in5,$in5,v31
  839. vncipher $out7,$out7,v30
  840. vxor $in6,$in6,v31
  841. vncipherlast $out0,$out0,$ivec
  842. vncipherlast $out1,$out1,$in0
  843. lvx_u $in0,$x00,$inp # load next input block
  844. vncipherlast $out2,$out2,$in1
  845. lvx_u $in1,$x10,$inp
  846. vncipherlast $out3,$out3,$in2
  847. le?vperm $in0,$in0,$in0,$inpperm
  848. lvx_u $in2,$x20,$inp
  849. vncipherlast $out4,$out4,$in3
  850. le?vperm $in1,$in1,$in1,$inpperm
  851. lvx_u $in3,$x30,$inp
  852. vncipherlast $out5,$out5,$in4
  853. le?vperm $in2,$in2,$in2,$inpperm
  854. lvx_u $in4,$x40,$inp
  855. vncipherlast $out6,$out6,$in5
  856. le?vperm $in3,$in3,$in3,$inpperm
  857. lvx_u $in5,$x50,$inp
  858. vncipherlast $out7,$out7,$in6
  859. le?vperm $in4,$in4,$in4,$inpperm
  860. lvx_u $in6,$x60,$inp
  861. vmr $ivec,$in7
  862. le?vperm $in5,$in5,$in5,$inpperm
  863. lvx_u $in7,$x70,$inp
  864. addi $inp,$inp,0x80
  865. le?vperm $out0,$out0,$out0,$inpperm
  866. le?vperm $out1,$out1,$out1,$inpperm
  867. stvx_u $out0,$x00,$out
  868. le?vperm $in6,$in6,$in6,$inpperm
  869. vxor $out0,$in0,$rndkey0
  870. le?vperm $out2,$out2,$out2,$inpperm
  871. stvx_u $out1,$x10,$out
  872. le?vperm $in7,$in7,$in7,$inpperm
  873. vxor $out1,$in1,$rndkey0
  874. le?vperm $out3,$out3,$out3,$inpperm
  875. stvx_u $out2,$x20,$out
  876. vxor $out2,$in2,$rndkey0
  877. le?vperm $out4,$out4,$out4,$inpperm
  878. stvx_u $out3,$x30,$out
  879. vxor $out3,$in3,$rndkey0
  880. le?vperm $out5,$out5,$out5,$inpperm
  881. stvx_u $out4,$x40,$out
  882. vxor $out4,$in4,$rndkey0
  883. le?vperm $out6,$out6,$out6,$inpperm
  884. stvx_u $out5,$x50,$out
  885. vxor $out5,$in5,$rndkey0
  886. le?vperm $out7,$out7,$out7,$inpperm
  887. stvx_u $out6,$x60,$out
  888. vxor $out6,$in6,$rndkey0
  889. stvx_u $out7,$x70,$out
  890. addi $out,$out,0x80
  891. vxor $out7,$in7,$rndkey0
  892. mtctr $rounds
  893. beq Loop_cbc_dec8x # did $len-=128 borrow?
  894. addic. $len,$len,128
  895. beq Lcbc_dec8x_done
  896. nop
  897. nop
  898. Loop_cbc_dec8x_tail: # up to 7 "words" tail...
  899. vncipher $out1,$out1,v24
  900. vncipher $out2,$out2,v24
  901. vncipher $out3,$out3,v24
  902. vncipher $out4,$out4,v24
  903. vncipher $out5,$out5,v24
  904. vncipher $out6,$out6,v24
  905. vncipher $out7,$out7,v24
  906. lvx v24,$x20,$key_ # round[3]
  907. addi $key_,$key_,0x20
  908. vncipher $out1,$out1,v25
  909. vncipher $out2,$out2,v25
  910. vncipher $out3,$out3,v25
  911. vncipher $out4,$out4,v25
  912. vncipher $out5,$out5,v25
  913. vncipher $out6,$out6,v25
  914. vncipher $out7,$out7,v25
  915. lvx v25,$x10,$key_ # round[4]
  916. bdnz Loop_cbc_dec8x_tail
  917. vncipher $out1,$out1,v24
  918. vncipher $out2,$out2,v24
  919. vncipher $out3,$out3,v24
  920. vncipher $out4,$out4,v24
  921. vncipher $out5,$out5,v24
  922. vncipher $out6,$out6,v24
  923. vncipher $out7,$out7,v24
  924. vncipher $out1,$out1,v25
  925. vncipher $out2,$out2,v25
  926. vncipher $out3,$out3,v25
  927. vncipher $out4,$out4,v25
  928. vncipher $out5,$out5,v25
  929. vncipher $out6,$out6,v25
  930. vncipher $out7,$out7,v25
  931. vncipher $out1,$out1,v26
  932. vncipher $out2,$out2,v26
  933. vncipher $out3,$out3,v26
  934. vncipher $out4,$out4,v26
  935. vncipher $out5,$out5,v26
  936. vncipher $out6,$out6,v26
  937. vncipher $out7,$out7,v26
  938. vncipher $out1,$out1,v27
  939. vncipher $out2,$out2,v27
  940. vncipher $out3,$out3,v27
  941. vncipher $out4,$out4,v27
  942. vncipher $out5,$out5,v27
  943. vncipher $out6,$out6,v27
  944. vncipher $out7,$out7,v27
  945. vncipher $out1,$out1,v28
  946. vncipher $out2,$out2,v28
  947. vncipher $out3,$out3,v28
  948. vncipher $out4,$out4,v28
  949. vncipher $out5,$out5,v28
  950. vncipher $out6,$out6,v28
  951. vncipher $out7,$out7,v28
  952. vncipher $out1,$out1,v29
  953. vncipher $out2,$out2,v29
  954. vncipher $out3,$out3,v29
  955. vncipher $out4,$out4,v29
  956. vncipher $out5,$out5,v29
  957. vncipher $out6,$out6,v29
  958. vncipher $out7,$out7,v29
  959. vncipher $out1,$out1,v30
  960. vxor $ivec,$ivec,v31 # last round key
  961. vncipher $out2,$out2,v30
  962. vxor $in1,$in1,v31
  963. vncipher $out3,$out3,v30
  964. vxor $in2,$in2,v31
  965. vncipher $out4,$out4,v30
  966. vxor $in3,$in3,v31
  967. vncipher $out5,$out5,v30
  968. vxor $in4,$in4,v31
  969. vncipher $out6,$out6,v30
  970. vxor $in5,$in5,v31
  971. vncipher $out7,$out7,v30
  972. vxor $in6,$in6,v31
  973. cmplwi $len,32 # switch($len)
  974. blt Lcbc_dec8x_one
  975. nop
  976. beq Lcbc_dec8x_two
  977. cmplwi $len,64
  978. blt Lcbc_dec8x_three
  979. nop
  980. beq Lcbc_dec8x_four
  981. cmplwi $len,96
  982. blt Lcbc_dec8x_five
  983. nop
  984. beq Lcbc_dec8x_six
  985. Lcbc_dec8x_seven:
  986. vncipherlast $out1,$out1,$ivec
  987. vncipherlast $out2,$out2,$in1
  988. vncipherlast $out3,$out3,$in2
  989. vncipherlast $out4,$out4,$in3
  990. vncipherlast $out5,$out5,$in4
  991. vncipherlast $out6,$out6,$in5
  992. vncipherlast $out7,$out7,$in6
  993. vmr $ivec,$in7
  994. le?vperm $out1,$out1,$out1,$inpperm
  995. le?vperm $out2,$out2,$out2,$inpperm
  996. stvx_u $out1,$x00,$out
  997. le?vperm $out3,$out3,$out3,$inpperm
  998. stvx_u $out2,$x10,$out
  999. le?vperm $out4,$out4,$out4,$inpperm
  1000. stvx_u $out3,$x20,$out
  1001. le?vperm $out5,$out5,$out5,$inpperm
  1002. stvx_u $out4,$x30,$out
  1003. le?vperm $out6,$out6,$out6,$inpperm
  1004. stvx_u $out5,$x40,$out
  1005. le?vperm $out7,$out7,$out7,$inpperm
  1006. stvx_u $out6,$x50,$out
  1007. stvx_u $out7,$x60,$out
  1008. addi $out,$out,0x70
  1009. b Lcbc_dec8x_done
  1010. .align 5
  1011. Lcbc_dec8x_six:
  1012. vncipherlast $out2,$out2,$ivec
  1013. vncipherlast $out3,$out3,$in2
  1014. vncipherlast $out4,$out4,$in3
  1015. vncipherlast $out5,$out5,$in4
  1016. vncipherlast $out6,$out6,$in5
  1017. vncipherlast $out7,$out7,$in6
  1018. vmr $ivec,$in7
  1019. le?vperm $out2,$out2,$out2,$inpperm
  1020. le?vperm $out3,$out3,$out3,$inpperm
  1021. stvx_u $out2,$x00,$out
  1022. le?vperm $out4,$out4,$out4,$inpperm
  1023. stvx_u $out3,$x10,$out
  1024. le?vperm $out5,$out5,$out5,$inpperm
  1025. stvx_u $out4,$x20,$out
  1026. le?vperm $out6,$out6,$out6,$inpperm
  1027. stvx_u $out5,$x30,$out
  1028. le?vperm $out7,$out7,$out7,$inpperm
  1029. stvx_u $out6,$x40,$out
  1030. stvx_u $out7,$x50,$out
  1031. addi $out,$out,0x60
  1032. b Lcbc_dec8x_done
  1033. .align 5
  1034. Lcbc_dec8x_five:
  1035. vncipherlast $out3,$out3,$ivec
  1036. vncipherlast $out4,$out4,$in3
  1037. vncipherlast $out5,$out5,$in4
  1038. vncipherlast $out6,$out6,$in5
  1039. vncipherlast $out7,$out7,$in6
  1040. vmr $ivec,$in7
  1041. le?vperm $out3,$out3,$out3,$inpperm
  1042. le?vperm $out4,$out4,$out4,$inpperm
  1043. stvx_u $out3,$x00,$out
  1044. le?vperm $out5,$out5,$out5,$inpperm
  1045. stvx_u $out4,$x10,$out
  1046. le?vperm $out6,$out6,$out6,$inpperm
  1047. stvx_u $out5,$x20,$out
  1048. le?vperm $out7,$out7,$out7,$inpperm
  1049. stvx_u $out6,$x30,$out
  1050. stvx_u $out7,$x40,$out
  1051. addi $out,$out,0x50
  1052. b Lcbc_dec8x_done
  1053. .align 5
  1054. Lcbc_dec8x_four:
  1055. vncipherlast $out4,$out4,$ivec
  1056. vncipherlast $out5,$out5,$in4
  1057. vncipherlast $out6,$out6,$in5
  1058. vncipherlast $out7,$out7,$in6
  1059. vmr $ivec,$in7
  1060. le?vperm $out4,$out4,$out4,$inpperm
  1061. le?vperm $out5,$out5,$out5,$inpperm
  1062. stvx_u $out4,$x00,$out
  1063. le?vperm $out6,$out6,$out6,$inpperm
  1064. stvx_u $out5,$x10,$out
  1065. le?vperm $out7,$out7,$out7,$inpperm
  1066. stvx_u $out6,$x20,$out
  1067. stvx_u $out7,$x30,$out
  1068. addi $out,$out,0x40
  1069. b Lcbc_dec8x_done
  1070. .align 5
  1071. Lcbc_dec8x_three:
  1072. vncipherlast $out5,$out5,$ivec
  1073. vncipherlast $out6,$out6,$in5
  1074. vncipherlast $out7,$out7,$in6
  1075. vmr $ivec,$in7
  1076. le?vperm $out5,$out5,$out5,$inpperm
  1077. le?vperm $out6,$out6,$out6,$inpperm
  1078. stvx_u $out5,$x00,$out
  1079. le?vperm $out7,$out7,$out7,$inpperm
  1080. stvx_u $out6,$x10,$out
  1081. stvx_u $out7,$x20,$out
  1082. addi $out,$out,0x30
  1083. b Lcbc_dec8x_done
  1084. .align 5
  1085. Lcbc_dec8x_two:
  1086. vncipherlast $out6,$out6,$ivec
  1087. vncipherlast $out7,$out7,$in6
  1088. vmr $ivec,$in7
  1089. le?vperm $out6,$out6,$out6,$inpperm
  1090. le?vperm $out7,$out7,$out7,$inpperm
  1091. stvx_u $out6,$x00,$out
  1092. stvx_u $out7,$x10,$out
  1093. addi $out,$out,0x20
  1094. b Lcbc_dec8x_done
  1095. .align 5
  1096. Lcbc_dec8x_one:
  1097. vncipherlast $out7,$out7,$ivec
  1098. vmr $ivec,$in7
  1099. le?vperm $out7,$out7,$out7,$inpperm
  1100. stvx_u $out7,0,$out
  1101. addi $out,$out,0x10
  1102. Lcbc_dec8x_done:
  1103. le?vperm $ivec,$ivec,$ivec,$inpperm
  1104. stvx_u $ivec,0,$ivp # write [unaligned] iv
  1105. li r10,`$FRAME+15`
  1106. li r11,`$FRAME+31`
  1107. stvx $inpperm,r10,$sp # wipe copies of round keys
  1108. addi r10,r10,32
  1109. stvx $inpperm,r11,$sp
  1110. addi r11,r11,32
  1111. stvx $inpperm,r10,$sp
  1112. addi r10,r10,32
  1113. stvx $inpperm,r11,$sp
  1114. addi r11,r11,32
  1115. stvx $inpperm,r10,$sp
  1116. addi r10,r10,32
  1117. stvx $inpperm,r11,$sp
  1118. addi r11,r11,32
  1119. stvx $inpperm,r10,$sp
  1120. addi r10,r10,32
  1121. stvx $inpperm,r11,$sp
  1122. addi r11,r11,32
  1123. mtspr 256,$vrsave
  1124. lvx v20,r10,$sp # ABI says so
  1125. addi r10,r10,32
  1126. lvx v21,r11,$sp
  1127. addi r11,r11,32
  1128. lvx v22,r10,$sp
  1129. addi r10,r10,32
  1130. lvx v23,r11,$sp
  1131. addi r11,r11,32
  1132. lvx v24,r10,$sp
  1133. addi r10,r10,32
  1134. lvx v25,r11,$sp
  1135. addi r11,r11,32
  1136. lvx v26,r10,$sp
  1137. addi r10,r10,32
  1138. lvx v27,r11,$sp
  1139. addi r11,r11,32
  1140. lvx v28,r10,$sp
  1141. addi r10,r10,32
  1142. lvx v29,r11,$sp
  1143. addi r11,r11,32
  1144. lvx v30,r10,$sp
  1145. lvx v31,r11,$sp
  1146. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1147. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1148. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1149. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1150. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1151. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1152. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  1153. blr
  1154. .long 0
  1155. .byte 0,12,0x14,0,0x80,6,6,0
  1156. .long 0
  1157. .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
  1158. ___
  1159. }} }}}
  1160. #########################################################################
  1161. {{{ # CTR procedure[s] #
  1162. ####################### WARNING: Here be dragons! #######################
  1163. #
  1164. # This code is written as 'ctr32', based on a 32-bit counter used
  1165. # upstream. The kernel does *not* use a 32-bit counter. The kernel uses
  1166. # a 128-bit counter.
  1167. #
  1168. # This leads to subtle changes from the upstream code: the counter
  1169. # is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
  1170. # both the bulk (8 blocks at a time) path, and in the individual block
  1171. # path. Be aware of this when doing updates.
  1172. #
  1173. # See:
  1174. # 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
  1175. # 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
  1176. # https://github.com/openssl/openssl/pull/8942
  1177. #
  1178. #########################################################################
  1179. my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
  1180. my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
  1181. my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
  1182. map("v$_",(4..11));
  1183. my $dat=$tmp;
  1184. $code.=<<___;
  1185. .globl .${prefix}_ctr32_encrypt_blocks
  1186. ${UCMP}i $len,1
  1187. bltlr-
  1188. lis r0,0xfff0
  1189. mfspr $vrsave,256
  1190. mtspr 256,r0
  1191. li $idx,15
  1192. vxor $rndkey0,$rndkey0,$rndkey0
  1193. le?vspltisb $tmp,0x0f
  1194. lvx $ivec,0,$ivp # load [unaligned] iv
  1195. lvsl $inpperm,0,$ivp
  1196. lvx $inptail,$idx,$ivp
  1197. vspltisb $one,1
  1198. le?vxor $inpperm,$inpperm,$tmp
  1199. vperm $ivec,$ivec,$inptail,$inpperm
  1200. vsldoi $one,$rndkey0,$one,1
  1201. neg r11,$inp
  1202. ?lvsl $keyperm,0,$key # prepare for unaligned key
  1203. lwz $rounds,240($key)
  1204. lvsr $inpperm,0,r11 # prepare for unaligned load
  1205. lvx $inptail,0,$inp
  1206. addi $inp,$inp,15 # 15 is not typo
  1207. le?vxor $inpperm,$inpperm,$tmp
  1208. srwi $rounds,$rounds,1
  1209. li $idx,16
  1210. subi $rounds,$rounds,1
  1211. ${UCMP}i $len,8
  1212. bge _aesp8_ctr32_encrypt8x
  1213. ?lvsr $outperm,0,$out # prepare for unaligned store
  1214. vspltisb $outmask,-1
  1215. lvx $outhead,0,$out
  1216. ?vperm $outmask,$rndkey0,$outmask,$outperm
  1217. le?vxor $outperm,$outperm,$tmp
  1218. lvx $rndkey0,0,$key
  1219. mtctr $rounds
  1220. lvx $rndkey1,$idx,$key
  1221. addi $idx,$idx,16
  1222. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1223. vxor $inout,$ivec,$rndkey0
  1224. lvx $rndkey0,$idx,$key
  1225. addi $idx,$idx,16
  1226. b Loop_ctr32_enc
  1227. .align 5
  1228. Loop_ctr32_enc:
  1229. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1230. vcipher $inout,$inout,$rndkey1
  1231. lvx $rndkey1,$idx,$key
  1232. addi $idx,$idx,16
  1233. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1234. vcipher $inout,$inout,$rndkey0
  1235. lvx $rndkey0,$idx,$key
  1236. addi $idx,$idx,16
  1237. bdnz Loop_ctr32_enc
  1238. vadduqm $ivec,$ivec,$one # Kernel change for 128-bit
  1239. vmr $dat,$inptail
  1240. lvx $inptail,0,$inp
  1241. addi $inp,$inp,16
  1242. subic. $len,$len,1 # blocks--
  1243. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1244. vcipher $inout,$inout,$rndkey1
  1245. lvx $rndkey1,$idx,$key
  1246. vperm $dat,$dat,$inptail,$inpperm
  1247. li $idx,16
  1248. ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
  1249. lvx $rndkey0,0,$key
  1250. vxor $dat,$dat,$rndkey1 # last round key
  1251. vcipherlast $inout,$inout,$dat
  1252. lvx $rndkey1,$idx,$key
  1253. addi $idx,$idx,16
  1254. vperm $inout,$inout,$inout,$outperm
  1255. vsel $dat,$outhead,$inout,$outmask
  1256. mtctr $rounds
  1257. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1258. vmr $outhead,$inout
  1259. vxor $inout,$ivec,$rndkey0
  1260. lvx $rndkey0,$idx,$key
  1261. addi $idx,$idx,16
  1262. stvx $dat,0,$out
  1263. addi $out,$out,16
  1264. bne Loop_ctr32_enc
  1265. addi $out,$out,-1
  1266. lvx $inout,0,$out # redundant in aligned case
  1267. vsel $inout,$outhead,$inout,$outmask
  1268. stvx $inout,0,$out
  1269. mtspr 256,$vrsave
  1270. blr
  1271. .long 0
  1272. .byte 0,12,0x14,0,0,0,6,0
  1273. .long 0
  1274. ___
  1275. #########################################################################
  1276. {{ # Optimized CTR procedure #
  1277. my $key_="r11";
  1278. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
  1279. my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
  1280. my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
  1281. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  1282. # v26-v31 last 6 round keys
  1283. my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
  1284. my ($two,$three,$four)=($outhead,$outperm,$outmask);
  1285. $code.=<<___;
  1286. .align 5
  1287. _aesp8_ctr32_encrypt8x:
  1288. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  1289. li r10,`$FRAME+8*16+15`
  1290. li r11,`$FRAME+8*16+31`
  1291. stvx v20,r10,$sp # ABI says so
  1292. addi r10,r10,32
  1293. stvx v21,r11,$sp
  1294. addi r11,r11,32
  1295. stvx v22,r10,$sp
  1296. addi r10,r10,32
  1297. stvx v23,r11,$sp
  1298. addi r11,r11,32
  1299. stvx v24,r10,$sp
  1300. addi r10,r10,32
  1301. stvx v25,r11,$sp
  1302. addi r11,r11,32
  1303. stvx v26,r10,$sp
  1304. addi r10,r10,32
  1305. stvx v27,r11,$sp
  1306. addi r11,r11,32
  1307. stvx v28,r10,$sp
  1308. addi r10,r10,32
  1309. stvx v29,r11,$sp
  1310. addi r11,r11,32
  1311. stvx v30,r10,$sp
  1312. stvx v31,r11,$sp
  1313. li r0,-1
  1314. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  1315. li $x10,0x10
  1316. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1317. li $x20,0x20
  1318. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1319. li $x30,0x30
  1320. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1321. li $x40,0x40
  1322. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1323. li $x50,0x50
  1324. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1325. li $x60,0x60
  1326. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1327. li $x70,0x70
  1328. mtspr 256,r0
  1329. subi $rounds,$rounds,3 # -4 in total
  1330. lvx $rndkey0,$x00,$key # load key schedule
  1331. lvx v30,$x10,$key
  1332. addi $key,$key,0x20
  1333. lvx v31,$x00,$key
  1334. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  1335. addi $key_,$sp,$FRAME+15
  1336. mtctr $rounds
  1337. Load_ctr32_enc_key:
  1338. ?vperm v24,v30,v31,$keyperm
  1339. lvx v30,$x10,$key
  1340. addi $key,$key,0x20
  1341. stvx v24,$x00,$key_ # off-load round[1]
  1342. ?vperm v25,v31,v30,$keyperm
  1343. lvx v31,$x00,$key
  1344. stvx v25,$x10,$key_ # off-load round[2]
  1345. addi $key_,$key_,0x20
  1346. bdnz Load_ctr32_enc_key
  1347. lvx v26,$x10,$key
  1348. ?vperm v24,v30,v31,$keyperm
  1349. lvx v27,$x20,$key
  1350. stvx v24,$x00,$key_ # off-load round[3]
  1351. ?vperm v25,v31,v26,$keyperm
  1352. lvx v28,$x30,$key
  1353. stvx v25,$x10,$key_ # off-load round[4]
  1354. addi $key_,$sp,$FRAME+15 # rewind $key_
  1355. ?vperm v26,v26,v27,$keyperm
  1356. lvx v29,$x40,$key
  1357. ?vperm v27,v27,v28,$keyperm
  1358. lvx v30,$x50,$key
  1359. ?vperm v28,v28,v29,$keyperm
  1360. lvx v31,$x60,$key
  1361. ?vperm v29,v29,v30,$keyperm
  1362. lvx $out0,$x70,$key # borrow $out0
  1363. ?vperm v30,v30,v31,$keyperm
  1364. lvx v24,$x00,$key_ # pre-load round[1]
  1365. ?vperm v31,v31,$out0,$keyperm
  1366. lvx v25,$x10,$key_ # pre-load round[2]
  1367. vadduqm $two,$one,$one
  1368. subi $inp,$inp,15 # undo "caller"
  1369. $SHL $len,$len,4
  1370. vadduqm $out1,$ivec,$one # counter values ...
  1371. vadduqm $out2,$ivec,$two # (do all ctr adds as 128-bit)
  1372. vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
  1373. le?li $idx,8
  1374. vadduqm $out3,$out1,$two
  1375. vxor $out1,$out1,$rndkey0
  1376. le?lvsl $inpperm,0,$idx
  1377. vadduqm $out4,$out2,$two
  1378. vxor $out2,$out2,$rndkey0
  1379. le?vspltisb $tmp,0x0f
  1380. vadduqm $out5,$out3,$two
  1381. vxor $out3,$out3,$rndkey0
  1382. le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
  1383. vadduqm $out6,$out4,$two
  1384. vxor $out4,$out4,$rndkey0
  1385. vadduqm $out7,$out5,$two
  1386. vxor $out5,$out5,$rndkey0
  1387. vadduqm $ivec,$out6,$two # next counter value
  1388. vxor $out6,$out6,$rndkey0
  1389. vxor $out7,$out7,$rndkey0
  1390. mtctr $rounds
  1391. b Loop_ctr32_enc8x
  1392. .align 5
  1393. Loop_ctr32_enc8x:
  1394. vcipher $out0,$out0,v24
  1395. vcipher $out1,$out1,v24
  1396. vcipher $out2,$out2,v24
  1397. vcipher $out3,$out3,v24
  1398. vcipher $out4,$out4,v24
  1399. vcipher $out5,$out5,v24
  1400. vcipher $out6,$out6,v24
  1401. vcipher $out7,$out7,v24
  1402. Loop_ctr32_enc8x_middle:
  1403. lvx v24,$x20,$key_ # round[3]
  1404. addi $key_,$key_,0x20
  1405. vcipher $out0,$out0,v25
  1406. vcipher $out1,$out1,v25
  1407. vcipher $out2,$out2,v25
  1408. vcipher $out3,$out3,v25
  1409. vcipher $out4,$out4,v25
  1410. vcipher $out5,$out5,v25
  1411. vcipher $out6,$out6,v25
  1412. vcipher $out7,$out7,v25
  1413. lvx v25,$x10,$key_ # round[4]
  1414. bdnz Loop_ctr32_enc8x
  1415. subic r11,$len,256 # $len-256, borrow $key_
  1416. vcipher $out0,$out0,v24
  1417. vcipher $out1,$out1,v24
  1418. vcipher $out2,$out2,v24
  1419. vcipher $out3,$out3,v24
  1420. vcipher $out4,$out4,v24
  1421. vcipher $out5,$out5,v24
  1422. vcipher $out6,$out6,v24
  1423. vcipher $out7,$out7,v24
  1424. subfe r0,r0,r0 # borrow?-1:0
  1425. vcipher $out0,$out0,v25
  1426. vcipher $out1,$out1,v25
  1427. vcipher $out2,$out2,v25
  1428. vcipher $out3,$out3,v25
  1429. vcipher $out4,$out4,v25
  1430. vcipher $out5,$out5,v25
  1431. vcipher $out6,$out6,v25
  1432. vcipher $out7,$out7,v25
  1433. and r0,r0,r11
  1434. addi $key_,$sp,$FRAME+15 # rewind $key_
  1435. vcipher $out0,$out0,v26
  1436. vcipher $out1,$out1,v26
  1437. vcipher $out2,$out2,v26
  1438. vcipher $out3,$out3,v26
  1439. vcipher $out4,$out4,v26
  1440. vcipher $out5,$out5,v26
  1441. vcipher $out6,$out6,v26
  1442. vcipher $out7,$out7,v26
  1443. lvx v24,$x00,$key_ # re-pre-load round[1]
  1444. subic $len,$len,129 # $len-=129
  1445. vcipher $out0,$out0,v27
  1446. addi $len,$len,1 # $len-=128 really
  1447. vcipher $out1,$out1,v27
  1448. vcipher $out2,$out2,v27
  1449. vcipher $out3,$out3,v27
  1450. vcipher $out4,$out4,v27
  1451. vcipher $out5,$out5,v27
  1452. vcipher $out6,$out6,v27
  1453. vcipher $out7,$out7,v27
  1454. lvx v25,$x10,$key_ # re-pre-load round[2]
  1455. vcipher $out0,$out0,v28
  1456. lvx_u $in0,$x00,$inp # load input
  1457. vcipher $out1,$out1,v28
  1458. lvx_u $in1,$x10,$inp
  1459. vcipher $out2,$out2,v28
  1460. lvx_u $in2,$x20,$inp
  1461. vcipher $out3,$out3,v28
  1462. lvx_u $in3,$x30,$inp
  1463. vcipher $out4,$out4,v28
  1464. lvx_u $in4,$x40,$inp
  1465. vcipher $out5,$out5,v28
  1466. lvx_u $in5,$x50,$inp
  1467. vcipher $out6,$out6,v28
  1468. lvx_u $in6,$x60,$inp
  1469. vcipher $out7,$out7,v28
  1470. lvx_u $in7,$x70,$inp
  1471. addi $inp,$inp,0x80
  1472. vcipher $out0,$out0,v29
  1473. le?vperm $in0,$in0,$in0,$inpperm
  1474. vcipher $out1,$out1,v29
  1475. le?vperm $in1,$in1,$in1,$inpperm
  1476. vcipher $out2,$out2,v29
  1477. le?vperm $in2,$in2,$in2,$inpperm
  1478. vcipher $out3,$out3,v29
  1479. le?vperm $in3,$in3,$in3,$inpperm
  1480. vcipher $out4,$out4,v29
  1481. le?vperm $in4,$in4,$in4,$inpperm
  1482. vcipher $out5,$out5,v29
  1483. le?vperm $in5,$in5,$in5,$inpperm
  1484. vcipher $out6,$out6,v29
  1485. le?vperm $in6,$in6,$in6,$inpperm
  1486. vcipher $out7,$out7,v29
  1487. le?vperm $in7,$in7,$in7,$inpperm
  1488. add $inp,$inp,r0 # $inp is adjusted in such
  1489. # way that at exit from the
  1490. # loop inX-in7 are loaded
  1491. # with last "words"
  1492. subfe. r0,r0,r0 # borrow?-1:0
  1493. vcipher $out0,$out0,v30
  1494. vxor $in0,$in0,v31 # xor with last round key
  1495. vcipher $out1,$out1,v30
  1496. vxor $in1,$in1,v31
  1497. vcipher $out2,$out2,v30
  1498. vxor $in2,$in2,v31
  1499. vcipher $out3,$out3,v30
  1500. vxor $in3,$in3,v31
  1501. vcipher $out4,$out4,v30
  1502. vxor $in4,$in4,v31
  1503. vcipher $out5,$out5,v30
  1504. vxor $in5,$in5,v31
  1505. vcipher $out6,$out6,v30
  1506. vxor $in6,$in6,v31
  1507. vcipher $out7,$out7,v30
  1508. vxor $in7,$in7,v31
  1509. bne Lctr32_enc8x_break # did $len-129 borrow?
  1510. vcipherlast $in0,$out0,$in0
  1511. vcipherlast $in1,$out1,$in1
  1512. vadduqm $out1,$ivec,$one # counter values ...
  1513. vcipherlast $in2,$out2,$in2
  1514. vadduqm $out2,$ivec,$two
  1515. vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
  1516. vcipherlast $in3,$out3,$in3
  1517. vadduqm $out3,$out1,$two
  1518. vxor $out1,$out1,$rndkey0
  1519. vcipherlast $in4,$out4,$in4
  1520. vadduqm $out4,$out2,$two
  1521. vxor $out2,$out2,$rndkey0
  1522. vcipherlast $in5,$out5,$in5
  1523. vadduqm $out5,$out3,$two
  1524. vxor $out3,$out3,$rndkey0
  1525. vcipherlast $in6,$out6,$in6
  1526. vadduqm $out6,$out4,$two
  1527. vxor $out4,$out4,$rndkey0
  1528. vcipherlast $in7,$out7,$in7
  1529. vadduqm $out7,$out5,$two
  1530. vxor $out5,$out5,$rndkey0
  1531. le?vperm $in0,$in0,$in0,$inpperm
  1532. vadduqm $ivec,$out6,$two # next counter value
  1533. vxor $out6,$out6,$rndkey0
  1534. le?vperm $in1,$in1,$in1,$inpperm
  1535. vxor $out7,$out7,$rndkey0
  1536. mtctr $rounds
  1537. vcipher $out0,$out0,v24
  1538. stvx_u $in0,$x00,$out
  1539. le?vperm $in2,$in2,$in2,$inpperm
  1540. vcipher $out1,$out1,v24
  1541. stvx_u $in1,$x10,$out
  1542. le?vperm $in3,$in3,$in3,$inpperm
  1543. vcipher $out2,$out2,v24
  1544. stvx_u $in2,$x20,$out
  1545. le?vperm $in4,$in4,$in4,$inpperm
  1546. vcipher $out3,$out3,v24
  1547. stvx_u $in3,$x30,$out
  1548. le?vperm $in5,$in5,$in5,$inpperm
  1549. vcipher $out4,$out4,v24
  1550. stvx_u $in4,$x40,$out
  1551. le?vperm $in6,$in6,$in6,$inpperm
  1552. vcipher $out5,$out5,v24
  1553. stvx_u $in5,$x50,$out
  1554. le?vperm $in7,$in7,$in7,$inpperm
  1555. vcipher $out6,$out6,v24
  1556. stvx_u $in6,$x60,$out
  1557. vcipher $out7,$out7,v24
  1558. stvx_u $in7,$x70,$out
  1559. addi $out,$out,0x80
  1560. b Loop_ctr32_enc8x_middle
  1561. .align 5
  1562. Lctr32_enc8x_break:
  1563. cmpwi $len,-0x60
  1564. blt Lctr32_enc8x_one
  1565. nop
  1566. beq Lctr32_enc8x_two
  1567. cmpwi $len,-0x40
  1568. blt Lctr32_enc8x_three
  1569. nop
  1570. beq Lctr32_enc8x_four
  1571. cmpwi $len,-0x20
  1572. blt Lctr32_enc8x_five
  1573. nop
  1574. beq Lctr32_enc8x_six
  1575. cmpwi $len,0x00
  1576. blt Lctr32_enc8x_seven
  1577. Lctr32_enc8x_eight:
  1578. vcipherlast $out0,$out0,$in0
  1579. vcipherlast $out1,$out1,$in1
  1580. vcipherlast $out2,$out2,$in2
  1581. vcipherlast $out3,$out3,$in3
  1582. vcipherlast $out4,$out4,$in4
  1583. vcipherlast $out5,$out5,$in5
  1584. vcipherlast $out6,$out6,$in6
  1585. vcipherlast $out7,$out7,$in7
  1586. le?vperm $out0,$out0,$out0,$inpperm
  1587. le?vperm $out1,$out1,$out1,$inpperm
  1588. stvx_u $out0,$x00,$out
  1589. le?vperm $out2,$out2,$out2,$inpperm
  1590. stvx_u $out1,$x10,$out
  1591. le?vperm $out3,$out3,$out3,$inpperm
  1592. stvx_u $out2,$x20,$out
  1593. le?vperm $out4,$out4,$out4,$inpperm
  1594. stvx_u $out3,$x30,$out
  1595. le?vperm $out5,$out5,$out5,$inpperm
  1596. stvx_u $out4,$x40,$out
  1597. le?vperm $out6,$out6,$out6,$inpperm
  1598. stvx_u $out5,$x50,$out
  1599. le?vperm $out7,$out7,$out7,$inpperm
  1600. stvx_u $out6,$x60,$out
  1601. stvx_u $out7,$x70,$out
  1602. addi $out,$out,0x80
  1603. b Lctr32_enc8x_done
  1604. .align 5
  1605. Lctr32_enc8x_seven:
  1606. vcipherlast $out0,$out0,$in1
  1607. vcipherlast $out1,$out1,$in2
  1608. vcipherlast $out2,$out2,$in3
  1609. vcipherlast $out3,$out3,$in4
  1610. vcipherlast $out4,$out4,$in5
  1611. vcipherlast $out5,$out5,$in6
  1612. vcipherlast $out6,$out6,$in7
  1613. le?vperm $out0,$out0,$out0,$inpperm
  1614. le?vperm $out1,$out1,$out1,$inpperm
  1615. stvx_u $out0,$x00,$out
  1616. le?vperm $out2,$out2,$out2,$inpperm
  1617. stvx_u $out1,$x10,$out
  1618. le?vperm $out3,$out3,$out3,$inpperm
  1619. stvx_u $out2,$x20,$out
  1620. le?vperm $out4,$out4,$out4,$inpperm
  1621. stvx_u $out3,$x30,$out
  1622. le?vperm $out5,$out5,$out5,$inpperm
  1623. stvx_u $out4,$x40,$out
  1624. le?vperm $out6,$out6,$out6,$inpperm
  1625. stvx_u $out5,$x50,$out
  1626. stvx_u $out6,$x60,$out
  1627. addi $out,$out,0x70
  1628. b Lctr32_enc8x_done
  1629. .align 5
  1630. Lctr32_enc8x_six:
  1631. vcipherlast $out0,$out0,$in2
  1632. vcipherlast $out1,$out1,$in3
  1633. vcipherlast $out2,$out2,$in4
  1634. vcipherlast $out3,$out3,$in5
  1635. vcipherlast $out4,$out4,$in6
  1636. vcipherlast $out5,$out5,$in7
  1637. le?vperm $out0,$out0,$out0,$inpperm
  1638. le?vperm $out1,$out1,$out1,$inpperm
  1639. stvx_u $out0,$x00,$out
  1640. le?vperm $out2,$out2,$out2,$inpperm
  1641. stvx_u $out1,$x10,$out
  1642. le?vperm $out3,$out3,$out3,$inpperm
  1643. stvx_u $out2,$x20,$out
  1644. le?vperm $out4,$out4,$out4,$inpperm
  1645. stvx_u $out3,$x30,$out
  1646. le?vperm $out5,$out5,$out5,$inpperm
  1647. stvx_u $out4,$x40,$out
  1648. stvx_u $out5,$x50,$out
  1649. addi $out,$out,0x60
  1650. b Lctr32_enc8x_done
  1651. .align 5
  1652. Lctr32_enc8x_five:
  1653. vcipherlast $out0,$out0,$in3
  1654. vcipherlast $out1,$out1,$in4
  1655. vcipherlast $out2,$out2,$in5
  1656. vcipherlast $out3,$out3,$in6
  1657. vcipherlast $out4,$out4,$in7
  1658. le?vperm $out0,$out0,$out0,$inpperm
  1659. le?vperm $out1,$out1,$out1,$inpperm
  1660. stvx_u $out0,$x00,$out
  1661. le?vperm $out2,$out2,$out2,$inpperm
  1662. stvx_u $out1,$x10,$out
  1663. le?vperm $out3,$out3,$out3,$inpperm
  1664. stvx_u $out2,$x20,$out
  1665. le?vperm $out4,$out4,$out4,$inpperm
  1666. stvx_u $out3,$x30,$out
  1667. stvx_u $out4,$x40,$out
  1668. addi $out,$out,0x50
  1669. b Lctr32_enc8x_done
  1670. .align 5
  1671. Lctr32_enc8x_four:
  1672. vcipherlast $out0,$out0,$in4
  1673. vcipherlast $out1,$out1,$in5
  1674. vcipherlast $out2,$out2,$in6
  1675. vcipherlast $out3,$out3,$in7
  1676. le?vperm $out0,$out0,$out0,$inpperm
  1677. le?vperm $out1,$out1,$out1,$inpperm
  1678. stvx_u $out0,$x00,$out
  1679. le?vperm $out2,$out2,$out2,$inpperm
  1680. stvx_u $out1,$x10,$out
  1681. le?vperm $out3,$out3,$out3,$inpperm
  1682. stvx_u $out2,$x20,$out
  1683. stvx_u $out3,$x30,$out
  1684. addi $out,$out,0x40
  1685. b Lctr32_enc8x_done
  1686. .align 5
  1687. Lctr32_enc8x_three:
  1688. vcipherlast $out0,$out0,$in5
  1689. vcipherlast $out1,$out1,$in6
  1690. vcipherlast $out2,$out2,$in7
  1691. le?vperm $out0,$out0,$out0,$inpperm
  1692. le?vperm $out1,$out1,$out1,$inpperm
  1693. stvx_u $out0,$x00,$out
  1694. le?vperm $out2,$out2,$out2,$inpperm
  1695. stvx_u $out1,$x10,$out
  1696. stvx_u $out2,$x20,$out
  1697. addi $out,$out,0x30
  1698. b Lctr32_enc8x_done
  1699. .align 5
  1700. Lctr32_enc8x_two:
  1701. vcipherlast $out0,$out0,$in6
  1702. vcipherlast $out1,$out1,$in7
  1703. le?vperm $out0,$out0,$out0,$inpperm
  1704. le?vperm $out1,$out1,$out1,$inpperm
  1705. stvx_u $out0,$x00,$out
  1706. stvx_u $out1,$x10,$out
  1707. addi $out,$out,0x20
  1708. b Lctr32_enc8x_done
  1709. .align 5
  1710. Lctr32_enc8x_one:
  1711. vcipherlast $out0,$out0,$in7
  1712. le?vperm $out0,$out0,$out0,$inpperm
  1713. stvx_u $out0,0,$out
  1714. addi $out,$out,0x10
  1715. Lctr32_enc8x_done:
  1716. li r10,`$FRAME+15`
  1717. li r11,`$FRAME+31`
  1718. stvx $inpperm,r10,$sp # wipe copies of round keys
  1719. addi r10,r10,32
  1720. stvx $inpperm,r11,$sp
  1721. addi r11,r11,32
  1722. stvx $inpperm,r10,$sp
  1723. addi r10,r10,32
  1724. stvx $inpperm,r11,$sp
  1725. addi r11,r11,32
  1726. stvx $inpperm,r10,$sp
  1727. addi r10,r10,32
  1728. stvx $inpperm,r11,$sp
  1729. addi r11,r11,32
  1730. stvx $inpperm,r10,$sp
  1731. addi r10,r10,32
  1732. stvx $inpperm,r11,$sp
  1733. addi r11,r11,32
  1734. mtspr 256,$vrsave
  1735. lvx v20,r10,$sp # ABI says so
  1736. addi r10,r10,32
  1737. lvx v21,r11,$sp
  1738. addi r11,r11,32
  1739. lvx v22,r10,$sp
  1740. addi r10,r10,32
  1741. lvx v23,r11,$sp
  1742. addi r11,r11,32
  1743. lvx v24,r10,$sp
  1744. addi r10,r10,32
  1745. lvx v25,r11,$sp
  1746. addi r11,r11,32
  1747. lvx v26,r10,$sp
  1748. addi r10,r10,32
  1749. lvx v27,r11,$sp
  1750. addi r11,r11,32
  1751. lvx v28,r10,$sp
  1752. addi r10,r10,32
  1753. lvx v29,r11,$sp
  1754. addi r11,r11,32
  1755. lvx v30,r10,$sp
  1756. lvx v31,r11,$sp
  1757. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  1758. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  1759. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  1760. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  1761. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  1762. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  1763. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  1764. blr
  1765. .long 0
  1766. .byte 0,12,0x14,0,0x80,6,6,0
  1767. .long 0
  1768. .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
  1769. ___
  1770. }} }}}
  1771. #########################################################################
  1772. {{{ # XTS procedures #
  1773. # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
  1774. # const AES_KEY *key1, const AES_KEY *key2, #
  1775. # [const] unsigned char iv[16]); #
  1776. # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
  1777. # input tweak value is assumed to be encrypted already, and last tweak #
  1778. # value, one suitable for consecutive call on same chunk of data, is #
  1779. # written back to original buffer. In addition, in "tweak chaining" #
  1780. # mode only complete input blocks are processed. #
  1781. my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
  1782. my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
  1783. my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
  1784. my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
  1785. my $taillen = $key2;
  1786. ($inp,$idx) = ($idx,$inp); # reassign
  1787. $code.=<<___;
  1788. .globl .${prefix}_xts_encrypt
  1789. mr $inp,r3 # reassign
  1790. li r3,-1
  1791. ${UCMP}i $len,16
  1792. bltlr-
  1793. lis r0,0xfff0
  1794. mfspr r12,256 # save vrsave
  1795. li r11,0
  1796. mtspr 256,r0
  1797. vspltisb $seven,0x07 # 0x070707..07
  1798. le?lvsl $leperm,r11,r11
  1799. le?vspltisb $tmp,0x0f
  1800. le?vxor $leperm,$leperm,$seven
  1801. li $idx,15
  1802. lvx $tweak,0,$ivp # load [unaligned] iv
  1803. lvsl $inpperm,0,$ivp
  1804. lvx $inptail,$idx,$ivp
  1805. le?vxor $inpperm,$inpperm,$tmp
  1806. vperm $tweak,$tweak,$inptail,$inpperm
  1807. neg r11,$inp
  1808. lvsr $inpperm,0,r11 # prepare for unaligned load
  1809. lvx $inout,0,$inp
  1810. addi $inp,$inp,15 # 15 is not typo
  1811. le?vxor $inpperm,$inpperm,$tmp
  1812. ${UCMP}i $key2,0 # key2==NULL?
  1813. beq Lxts_enc_no_key2
  1814. ?lvsl $keyperm,0,$key2 # prepare for unaligned key
  1815. lwz $rounds,240($key2)
  1816. srwi $rounds,$rounds,1
  1817. subi $rounds,$rounds,1
  1818. li $idx,16
  1819. lvx $rndkey0,0,$key2
  1820. lvx $rndkey1,$idx,$key2
  1821. addi $idx,$idx,16
  1822. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1823. vxor $tweak,$tweak,$rndkey0
  1824. lvx $rndkey0,$idx,$key2
  1825. addi $idx,$idx,16
  1826. mtctr $rounds
  1827. Ltweak_xts_enc:
  1828. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1829. vcipher $tweak,$tweak,$rndkey1
  1830. lvx $rndkey1,$idx,$key2
  1831. addi $idx,$idx,16
  1832. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1833. vcipher $tweak,$tweak,$rndkey0
  1834. lvx $rndkey0,$idx,$key2
  1835. addi $idx,$idx,16
  1836. bdnz Ltweak_xts_enc
  1837. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1838. vcipher $tweak,$tweak,$rndkey1
  1839. lvx $rndkey1,$idx,$key2
  1840. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1841. vcipherlast $tweak,$tweak,$rndkey0
  1842. li $ivp,0 # don't chain the tweak
  1843. b Lxts_enc
  1844. Lxts_enc_no_key2:
  1845. li $idx,-16
  1846. and $len,$len,$idx # in "tweak chaining"
  1847. # mode only complete
  1848. # blocks are processed
  1849. Lxts_enc:
  1850. lvx $inptail,0,$inp
  1851. addi $inp,$inp,16
  1852. ?lvsl $keyperm,0,$key1 # prepare for unaligned key
  1853. lwz $rounds,240($key1)
  1854. srwi $rounds,$rounds,1
  1855. subi $rounds,$rounds,1
  1856. li $idx,16
  1857. vslb $eighty7,$seven,$seven # 0x808080..80
  1858. vor $eighty7,$eighty7,$seven # 0x878787..87
  1859. vspltisb $tmp,1 # 0x010101..01
  1860. vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
  1861. ${UCMP}i $len,96
  1862. bge _aesp8_xts_encrypt6x
  1863. andi. $taillen,$len,15
  1864. subic r0,$len,32
  1865. subi $taillen,$taillen,16
  1866. subfe r0,r0,r0
  1867. and r0,r0,$taillen
  1868. add $inp,$inp,r0
  1869. lvx $rndkey0,0,$key1
  1870. lvx $rndkey1,$idx,$key1
  1871. addi $idx,$idx,16
  1872. vperm $inout,$inout,$inptail,$inpperm
  1873. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1874. vxor $inout,$inout,$tweak
  1875. vxor $inout,$inout,$rndkey0
  1876. lvx $rndkey0,$idx,$key1
  1877. addi $idx,$idx,16
  1878. mtctr $rounds
  1879. b Loop_xts_enc
  1880. .align 5
  1881. Loop_xts_enc:
  1882. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1883. vcipher $inout,$inout,$rndkey1
  1884. lvx $rndkey1,$idx,$key1
  1885. addi $idx,$idx,16
  1886. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1887. vcipher $inout,$inout,$rndkey0
  1888. lvx $rndkey0,$idx,$key1
  1889. addi $idx,$idx,16
  1890. bdnz Loop_xts_enc
  1891. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  1892. vcipher $inout,$inout,$rndkey1
  1893. lvx $rndkey1,$idx,$key1
  1894. li $idx,16
  1895. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1896. vxor $rndkey0,$rndkey0,$tweak
  1897. vcipherlast $output,$inout,$rndkey0
  1898. le?vperm $tmp,$output,$output,$leperm
  1899. be?nop
  1900. le?stvx_u $tmp,0,$out
  1901. be?stvx_u $output,0,$out
  1902. addi $out,$out,16
  1903. subic. $len,$len,16
  1904. beq Lxts_enc_done
  1905. vmr $inout,$inptail
  1906. lvx $inptail,0,$inp
  1907. addi $inp,$inp,16
  1908. lvx $rndkey0,0,$key1
  1909. lvx $rndkey1,$idx,$key1
  1910. addi $idx,$idx,16
  1911. subic r0,$len,32
  1912. subfe r0,r0,r0
  1913. and r0,r0,$taillen
  1914. add $inp,$inp,r0
  1915. vsrab $tmp,$tweak,$seven # next tweak value
  1916. vaddubm $tweak,$tweak,$tweak
  1917. vsldoi $tmp,$tmp,$tmp,15
  1918. vand $tmp,$tmp,$eighty7
  1919. vxor $tweak,$tweak,$tmp
  1920. vperm $inout,$inout,$inptail,$inpperm
  1921. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  1922. vxor $inout,$inout,$tweak
  1923. vxor $output,$output,$rndkey0 # just in case $len<16
  1924. vxor $inout,$inout,$rndkey0
  1925. lvx $rndkey0,$idx,$key1
  1926. addi $idx,$idx,16
  1927. mtctr $rounds
  1928. ${UCMP}i $len,16
  1929. bge Loop_xts_enc
  1930. vxor $output,$output,$tweak
  1931. lvsr $inpperm,0,$len # $inpperm is no longer needed
  1932. vxor $inptail,$inptail,$inptail # $inptail is no longer needed
  1933. vspltisb $tmp,-1
  1934. vperm $inptail,$inptail,$tmp,$inpperm
  1935. vsel $inout,$inout,$output,$inptail
  1936. subi r11,$out,17
  1937. subi $out,$out,16
  1938. mtctr $len
  1939. li $len,16
  1940. Loop_xts_enc_steal:
  1941. lbzu r0,1(r11)
  1942. stb r0,16(r11)
  1943. bdnz Loop_xts_enc_steal
  1944. mtctr $rounds
  1945. b Loop_xts_enc # one more time...
  1946. Lxts_enc_done:
  1947. ${UCMP}i $ivp,0
  1948. beq Lxts_enc_ret
  1949. vsrab $tmp,$tweak,$seven # next tweak value
  1950. vaddubm $tweak,$tweak,$tweak
  1951. vsldoi $tmp,$tmp,$tmp,15
  1952. vand $tmp,$tmp,$eighty7
  1953. vxor $tweak,$tweak,$tmp
  1954. le?vperm $tweak,$tweak,$tweak,$leperm
  1955. stvx_u $tweak,0,$ivp
  1956. Lxts_enc_ret:
  1957. mtspr 256,r12 # restore vrsave
  1958. li r3,0
  1959. blr
  1960. .long 0
  1961. .byte 0,12,0x04,0,0x80,6,6,0
  1962. .long 0
  1963. .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
  1964. .globl .${prefix}_xts_decrypt
  1965. mr $inp,r3 # reassign
  1966. li r3,-1
  1967. ${UCMP}i $len,16
  1968. bltlr-
  1969. lis r0,0xfff8
  1970. mfspr r12,256 # save vrsave
  1971. li r11,0
  1972. mtspr 256,r0
  1973. andi. r0,$len,15
  1974. neg r0,r0
  1975. andi. r0,r0,16
  1976. sub $len,$len,r0
  1977. vspltisb $seven,0x07 # 0x070707..07
  1978. le?lvsl $leperm,r11,r11
  1979. le?vspltisb $tmp,0x0f
  1980. le?vxor $leperm,$leperm,$seven
  1981. li $idx,15
  1982. lvx $tweak,0,$ivp # load [unaligned] iv
  1983. lvsl $inpperm,0,$ivp
  1984. lvx $inptail,$idx,$ivp
  1985. le?vxor $inpperm,$inpperm,$tmp
  1986. vperm $tweak,$tweak,$inptail,$inpperm
  1987. neg r11,$inp
  1988. lvsr $inpperm,0,r11 # prepare for unaligned load
  1989. lvx $inout,0,$inp
  1990. addi $inp,$inp,15 # 15 is not typo
  1991. le?vxor $inpperm,$inpperm,$tmp
  1992. ${UCMP}i $key2,0 # key2==NULL?
  1993. beq Lxts_dec_no_key2
  1994. ?lvsl $keyperm,0,$key2 # prepare for unaligned key
  1995. lwz $rounds,240($key2)
  1996. srwi $rounds,$rounds,1
  1997. subi $rounds,$rounds,1
  1998. li $idx,16
  1999. lvx $rndkey0,0,$key2
  2000. lvx $rndkey1,$idx,$key2
  2001. addi $idx,$idx,16
  2002. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2003. vxor $tweak,$tweak,$rndkey0
  2004. lvx $rndkey0,$idx,$key2
  2005. addi $idx,$idx,16
  2006. mtctr $rounds
  2007. Ltweak_xts_dec:
  2008. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2009. vcipher $tweak,$tweak,$rndkey1
  2010. lvx $rndkey1,$idx,$key2
  2011. addi $idx,$idx,16
  2012. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2013. vcipher $tweak,$tweak,$rndkey0
  2014. lvx $rndkey0,$idx,$key2
  2015. addi $idx,$idx,16
  2016. bdnz Ltweak_xts_dec
  2017. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2018. vcipher $tweak,$tweak,$rndkey1
  2019. lvx $rndkey1,$idx,$key2
  2020. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2021. vcipherlast $tweak,$tweak,$rndkey0
  2022. li $ivp,0 # don't chain the tweak
  2023. b Lxts_dec
  2024. Lxts_dec_no_key2:
  2025. neg $idx,$len
  2026. andi. $idx,$idx,15
  2027. add $len,$len,$idx # in "tweak chaining"
  2028. # mode only complete
  2029. # blocks are processed
  2030. Lxts_dec:
  2031. lvx $inptail,0,$inp
  2032. addi $inp,$inp,16
  2033. ?lvsl $keyperm,0,$key1 # prepare for unaligned key
  2034. lwz $rounds,240($key1)
  2035. srwi $rounds,$rounds,1
  2036. subi $rounds,$rounds,1
  2037. li $idx,16
  2038. vslb $eighty7,$seven,$seven # 0x808080..80
  2039. vor $eighty7,$eighty7,$seven # 0x878787..87
  2040. vspltisb $tmp,1 # 0x010101..01
  2041. vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
  2042. ${UCMP}i $len,96
  2043. bge _aesp8_xts_decrypt6x
  2044. lvx $rndkey0,0,$key1
  2045. lvx $rndkey1,$idx,$key1
  2046. addi $idx,$idx,16
  2047. vperm $inout,$inout,$inptail,$inpperm
  2048. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2049. vxor $inout,$inout,$tweak
  2050. vxor $inout,$inout,$rndkey0
  2051. lvx $rndkey0,$idx,$key1
  2052. addi $idx,$idx,16
  2053. mtctr $rounds
  2054. ${UCMP}i $len,16
  2055. blt Ltail_xts_dec
  2056. be?b Loop_xts_dec
  2057. .align 5
  2058. Loop_xts_dec:
  2059. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2060. vncipher $inout,$inout,$rndkey1
  2061. lvx $rndkey1,$idx,$key1
  2062. addi $idx,$idx,16
  2063. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2064. vncipher $inout,$inout,$rndkey0
  2065. lvx $rndkey0,$idx,$key1
  2066. addi $idx,$idx,16
  2067. bdnz Loop_xts_dec
  2068. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2069. vncipher $inout,$inout,$rndkey1
  2070. lvx $rndkey1,$idx,$key1
  2071. li $idx,16
  2072. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2073. vxor $rndkey0,$rndkey0,$tweak
  2074. vncipherlast $output,$inout,$rndkey0
  2075. le?vperm $tmp,$output,$output,$leperm
  2076. be?nop
  2077. le?stvx_u $tmp,0,$out
  2078. be?stvx_u $output,0,$out
  2079. addi $out,$out,16
  2080. subic. $len,$len,16
  2081. beq Lxts_dec_done
  2082. vmr $inout,$inptail
  2083. lvx $inptail,0,$inp
  2084. addi $inp,$inp,16
  2085. lvx $rndkey0,0,$key1
  2086. lvx $rndkey1,$idx,$key1
  2087. addi $idx,$idx,16
  2088. vsrab $tmp,$tweak,$seven # next tweak value
  2089. vaddubm $tweak,$tweak,$tweak
  2090. vsldoi $tmp,$tmp,$tmp,15
  2091. vand $tmp,$tmp,$eighty7
  2092. vxor $tweak,$tweak,$tmp
  2093. vperm $inout,$inout,$inptail,$inpperm
  2094. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2095. vxor $inout,$inout,$tweak
  2096. vxor $inout,$inout,$rndkey0
  2097. lvx $rndkey0,$idx,$key1
  2098. addi $idx,$idx,16
  2099. mtctr $rounds
  2100. ${UCMP}i $len,16
  2101. bge Loop_xts_dec
  2102. Ltail_xts_dec:
  2103. vsrab $tmp,$tweak,$seven # next tweak value
  2104. vaddubm $tweak1,$tweak,$tweak
  2105. vsldoi $tmp,$tmp,$tmp,15
  2106. vand $tmp,$tmp,$eighty7
  2107. vxor $tweak1,$tweak1,$tmp
  2108. subi $inp,$inp,16
  2109. add $inp,$inp,$len
  2110. vxor $inout,$inout,$tweak # :-(
  2111. vxor $inout,$inout,$tweak1 # :-)
  2112. Loop_xts_dec_short:
  2113. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2114. vncipher $inout,$inout,$rndkey1
  2115. lvx $rndkey1,$idx,$key1
  2116. addi $idx,$idx,16
  2117. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2118. vncipher $inout,$inout,$rndkey0
  2119. lvx $rndkey0,$idx,$key1
  2120. addi $idx,$idx,16
  2121. bdnz Loop_xts_dec_short
  2122. ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
  2123. vncipher $inout,$inout,$rndkey1
  2124. lvx $rndkey1,$idx,$key1
  2125. li $idx,16
  2126. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2127. vxor $rndkey0,$rndkey0,$tweak1
  2128. vncipherlast $output,$inout,$rndkey0
  2129. le?vperm $tmp,$output,$output,$leperm
  2130. be?nop
  2131. le?stvx_u $tmp,0,$out
  2132. be?stvx_u $output,0,$out
  2133. vmr $inout,$inptail
  2134. lvx $inptail,0,$inp
  2135. #addi $inp,$inp,16
  2136. lvx $rndkey0,0,$key1
  2137. lvx $rndkey1,$idx,$key1
  2138. addi $idx,$idx,16
  2139. vperm $inout,$inout,$inptail,$inpperm
  2140. ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
  2141. lvsr $inpperm,0,$len # $inpperm is no longer needed
  2142. vxor $inptail,$inptail,$inptail # $inptail is no longer needed
  2143. vspltisb $tmp,-1
  2144. vperm $inptail,$inptail,$tmp,$inpperm
  2145. vsel $inout,$inout,$output,$inptail
  2146. vxor $rndkey0,$rndkey0,$tweak
  2147. vxor $inout,$inout,$rndkey0
  2148. lvx $rndkey0,$idx,$key1
  2149. addi $idx,$idx,16
  2150. subi r11,$out,1
  2151. mtctr $len
  2152. li $len,16
  2153. Loop_xts_dec_steal:
  2154. lbzu r0,1(r11)
  2155. stb r0,16(r11)
  2156. bdnz Loop_xts_dec_steal
  2157. mtctr $rounds
  2158. b Loop_xts_dec # one more time...
  2159. Lxts_dec_done:
  2160. ${UCMP}i $ivp,0
  2161. beq Lxts_dec_ret
  2162. vsrab $tmp,$tweak,$seven # next tweak value
  2163. vaddubm $tweak,$tweak,$tweak
  2164. vsldoi $tmp,$tmp,$tmp,15
  2165. vand $tmp,$tmp,$eighty7
  2166. vxor $tweak,$tweak,$tmp
  2167. le?vperm $tweak,$tweak,$tweak,$leperm
  2168. stvx_u $tweak,0,$ivp
  2169. Lxts_dec_ret:
  2170. mtspr 256,r12 # restore vrsave
  2171. li r3,0
  2172. blr
  2173. .long 0
  2174. .byte 0,12,0x04,0,0x80,6,6,0
  2175. .long 0
  2176. .size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
  2177. ___
  2178. #########################################################################
  2179. {{ # Optimized XTS procedures #
  2180. my $key_=$key2;
  2181. my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
  2182. $x00=0 if ($flavour =~ /osx/);
  2183. my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
  2184. my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
  2185. my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
  2186. my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
  2187. # v26-v31 last 6 round keys
  2188. my ($keyperm)=($out0); # aliases with "caller", redundant assignment
  2189. my $taillen=$x70;
  2190. $code.=<<___;
  2191. .align 5
  2192. _aesp8_xts_encrypt6x:
  2193. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  2194. mflr r11
  2195. li r7,`$FRAME+8*16+15`
  2196. li r3,`$FRAME+8*16+31`
  2197. $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
  2198. stvx v20,r7,$sp # ABI says so
  2199. addi r7,r7,32
  2200. stvx v21,r3,$sp
  2201. addi r3,r3,32
  2202. stvx v22,r7,$sp
  2203. addi r7,r7,32
  2204. stvx v23,r3,$sp
  2205. addi r3,r3,32
  2206. stvx v24,r7,$sp
  2207. addi r7,r7,32
  2208. stvx v25,r3,$sp
  2209. addi r3,r3,32
  2210. stvx v26,r7,$sp
  2211. addi r7,r7,32
  2212. stvx v27,r3,$sp
  2213. addi r3,r3,32
  2214. stvx v28,r7,$sp
  2215. addi r7,r7,32
  2216. stvx v29,r3,$sp
  2217. addi r3,r3,32
  2218. stvx v30,r7,$sp
  2219. stvx v31,r3,$sp
  2220. li r0,-1
  2221. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  2222. li $x10,0x10
  2223. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2224. li $x20,0x20
  2225. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2226. li $x30,0x30
  2227. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2228. li $x40,0x40
  2229. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2230. li $x50,0x50
  2231. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2232. li $x60,0x60
  2233. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2234. li $x70,0x70
  2235. mtspr 256,r0
  2236. subi $rounds,$rounds,3 # -4 in total
  2237. lvx $rndkey0,$x00,$key1 # load key schedule
  2238. lvx v30,$x10,$key1
  2239. addi $key1,$key1,0x20
  2240. lvx v31,$x00,$key1
  2241. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  2242. addi $key_,$sp,$FRAME+15
  2243. mtctr $rounds
  2244. Load_xts_enc_key:
  2245. ?vperm v24,v30,v31,$keyperm
  2246. lvx v30,$x10,$key1
  2247. addi $key1,$key1,0x20
  2248. stvx v24,$x00,$key_ # off-load round[1]
  2249. ?vperm v25,v31,v30,$keyperm
  2250. lvx v31,$x00,$key1
  2251. stvx v25,$x10,$key_ # off-load round[2]
  2252. addi $key_,$key_,0x20
  2253. bdnz Load_xts_enc_key
  2254. lvx v26,$x10,$key1
  2255. ?vperm v24,v30,v31,$keyperm
  2256. lvx v27,$x20,$key1
  2257. stvx v24,$x00,$key_ # off-load round[3]
  2258. ?vperm v25,v31,v26,$keyperm
  2259. lvx v28,$x30,$key1
  2260. stvx v25,$x10,$key_ # off-load round[4]
  2261. addi $key_,$sp,$FRAME+15 # rewind $key_
  2262. ?vperm v26,v26,v27,$keyperm
  2263. lvx v29,$x40,$key1
  2264. ?vperm v27,v27,v28,$keyperm
  2265. lvx v30,$x50,$key1
  2266. ?vperm v28,v28,v29,$keyperm
  2267. lvx v31,$x60,$key1
  2268. ?vperm v29,v29,v30,$keyperm
  2269. lvx $twk5,$x70,$key1 # borrow $twk5
  2270. ?vperm v30,v30,v31,$keyperm
  2271. lvx v24,$x00,$key_ # pre-load round[1]
  2272. ?vperm v31,v31,$twk5,$keyperm
  2273. lvx v25,$x10,$key_ # pre-load round[2]
  2274. vperm $in0,$inout,$inptail,$inpperm
  2275. subi $inp,$inp,31 # undo "caller"
  2276. vxor $twk0,$tweak,$rndkey0
  2277. vsrab $tmp,$tweak,$seven # next tweak value
  2278. vaddubm $tweak,$tweak,$tweak
  2279. vsldoi $tmp,$tmp,$tmp,15
  2280. vand $tmp,$tmp,$eighty7
  2281. vxor $out0,$in0,$twk0
  2282. vxor $tweak,$tweak,$tmp
  2283. lvx_u $in1,$x10,$inp
  2284. vxor $twk1,$tweak,$rndkey0
  2285. vsrab $tmp,$tweak,$seven # next tweak value
  2286. vaddubm $tweak,$tweak,$tweak
  2287. vsldoi $tmp,$tmp,$tmp,15
  2288. le?vperm $in1,$in1,$in1,$leperm
  2289. vand $tmp,$tmp,$eighty7
  2290. vxor $out1,$in1,$twk1
  2291. vxor $tweak,$tweak,$tmp
  2292. lvx_u $in2,$x20,$inp
  2293. andi. $taillen,$len,15
  2294. vxor $twk2,$tweak,$rndkey0
  2295. vsrab $tmp,$tweak,$seven # next tweak value
  2296. vaddubm $tweak,$tweak,$tweak
  2297. vsldoi $tmp,$tmp,$tmp,15
  2298. le?vperm $in2,$in2,$in2,$leperm
  2299. vand $tmp,$tmp,$eighty7
  2300. vxor $out2,$in2,$twk2
  2301. vxor $tweak,$tweak,$tmp
  2302. lvx_u $in3,$x30,$inp
  2303. sub $len,$len,$taillen
  2304. vxor $twk3,$tweak,$rndkey0
  2305. vsrab $tmp,$tweak,$seven # next tweak value
  2306. vaddubm $tweak,$tweak,$tweak
  2307. vsldoi $tmp,$tmp,$tmp,15
  2308. le?vperm $in3,$in3,$in3,$leperm
  2309. vand $tmp,$tmp,$eighty7
  2310. vxor $out3,$in3,$twk3
  2311. vxor $tweak,$tweak,$tmp
  2312. lvx_u $in4,$x40,$inp
  2313. subi $len,$len,0x60
  2314. vxor $twk4,$tweak,$rndkey0
  2315. vsrab $tmp,$tweak,$seven # next tweak value
  2316. vaddubm $tweak,$tweak,$tweak
  2317. vsldoi $tmp,$tmp,$tmp,15
  2318. le?vperm $in4,$in4,$in4,$leperm
  2319. vand $tmp,$tmp,$eighty7
  2320. vxor $out4,$in4,$twk4
  2321. vxor $tweak,$tweak,$tmp
  2322. lvx_u $in5,$x50,$inp
  2323. addi $inp,$inp,0x60
  2324. vxor $twk5,$tweak,$rndkey0
  2325. vsrab $tmp,$tweak,$seven # next tweak value
  2326. vaddubm $tweak,$tweak,$tweak
  2327. vsldoi $tmp,$tmp,$tmp,15
  2328. le?vperm $in5,$in5,$in5,$leperm
  2329. vand $tmp,$tmp,$eighty7
  2330. vxor $out5,$in5,$twk5
  2331. vxor $tweak,$tweak,$tmp
  2332. vxor v31,v31,$rndkey0
  2333. mtctr $rounds
  2334. b Loop_xts_enc6x
  2335. .align 5
  2336. Loop_xts_enc6x:
  2337. vcipher $out0,$out0,v24
  2338. vcipher $out1,$out1,v24
  2339. vcipher $out2,$out2,v24
  2340. vcipher $out3,$out3,v24
  2341. vcipher $out4,$out4,v24
  2342. vcipher $out5,$out5,v24
  2343. lvx v24,$x20,$key_ # round[3]
  2344. addi $key_,$key_,0x20
  2345. vcipher $out0,$out0,v25
  2346. vcipher $out1,$out1,v25
  2347. vcipher $out2,$out2,v25
  2348. vcipher $out3,$out3,v25
  2349. vcipher $out4,$out4,v25
  2350. vcipher $out5,$out5,v25
  2351. lvx v25,$x10,$key_ # round[4]
  2352. bdnz Loop_xts_enc6x
  2353. subic $len,$len,96 # $len-=96
  2354. vxor $in0,$twk0,v31 # xor with last round key
  2355. vcipher $out0,$out0,v24
  2356. vcipher $out1,$out1,v24
  2357. vsrab $tmp,$tweak,$seven # next tweak value
  2358. vxor $twk0,$tweak,$rndkey0
  2359. vaddubm $tweak,$tweak,$tweak
  2360. vcipher $out2,$out2,v24
  2361. vcipher $out3,$out3,v24
  2362. vsldoi $tmp,$tmp,$tmp,15
  2363. vcipher $out4,$out4,v24
  2364. vcipher $out5,$out5,v24
  2365. subfe. r0,r0,r0 # borrow?-1:0
  2366. vand $tmp,$tmp,$eighty7
  2367. vcipher $out0,$out0,v25
  2368. vcipher $out1,$out1,v25
  2369. vxor $tweak,$tweak,$tmp
  2370. vcipher $out2,$out2,v25
  2371. vcipher $out3,$out3,v25
  2372. vxor $in1,$twk1,v31
  2373. vsrab $tmp,$tweak,$seven # next tweak value
  2374. vxor $twk1,$tweak,$rndkey0
  2375. vcipher $out4,$out4,v25
  2376. vcipher $out5,$out5,v25
  2377. and r0,r0,$len
  2378. vaddubm $tweak,$tweak,$tweak
  2379. vsldoi $tmp,$tmp,$tmp,15
  2380. vcipher $out0,$out0,v26
  2381. vcipher $out1,$out1,v26
  2382. vand $tmp,$tmp,$eighty7
  2383. vcipher $out2,$out2,v26
  2384. vcipher $out3,$out3,v26
  2385. vxor $tweak,$tweak,$tmp
  2386. vcipher $out4,$out4,v26
  2387. vcipher $out5,$out5,v26
  2388. add $inp,$inp,r0 # $inp is adjusted in such
  2389. # way that at exit from the
  2390. # loop inX-in5 are loaded
  2391. # with last "words"
  2392. vxor $in2,$twk2,v31
  2393. vsrab $tmp,$tweak,$seven # next tweak value
  2394. vxor $twk2,$tweak,$rndkey0
  2395. vaddubm $tweak,$tweak,$tweak
  2396. vcipher $out0,$out0,v27
  2397. vcipher $out1,$out1,v27
  2398. vsldoi $tmp,$tmp,$tmp,15
  2399. vcipher $out2,$out2,v27
  2400. vcipher $out3,$out3,v27
  2401. vand $tmp,$tmp,$eighty7
  2402. vcipher $out4,$out4,v27
  2403. vcipher $out5,$out5,v27
  2404. addi $key_,$sp,$FRAME+15 # rewind $key_
  2405. vxor $tweak,$tweak,$tmp
  2406. vcipher $out0,$out0,v28
  2407. vcipher $out1,$out1,v28
  2408. vxor $in3,$twk3,v31
  2409. vsrab $tmp,$tweak,$seven # next tweak value
  2410. vxor $twk3,$tweak,$rndkey0
  2411. vcipher $out2,$out2,v28
  2412. vcipher $out3,$out3,v28
  2413. vaddubm $tweak,$tweak,$tweak
  2414. vsldoi $tmp,$tmp,$tmp,15
  2415. vcipher $out4,$out4,v28
  2416. vcipher $out5,$out5,v28
  2417. lvx v24,$x00,$key_ # re-pre-load round[1]
  2418. vand $tmp,$tmp,$eighty7
  2419. vcipher $out0,$out0,v29
  2420. vcipher $out1,$out1,v29
  2421. vxor $tweak,$tweak,$tmp
  2422. vcipher $out2,$out2,v29
  2423. vcipher $out3,$out3,v29
  2424. vxor $in4,$twk4,v31
  2425. vsrab $tmp,$tweak,$seven # next tweak value
  2426. vxor $twk4,$tweak,$rndkey0
  2427. vcipher $out4,$out4,v29
  2428. vcipher $out5,$out5,v29
  2429. lvx v25,$x10,$key_ # re-pre-load round[2]
  2430. vaddubm $tweak,$tweak,$tweak
  2431. vsldoi $tmp,$tmp,$tmp,15
  2432. vcipher $out0,$out0,v30
  2433. vcipher $out1,$out1,v30
  2434. vand $tmp,$tmp,$eighty7
  2435. vcipher $out2,$out2,v30
  2436. vcipher $out3,$out3,v30
  2437. vxor $tweak,$tweak,$tmp
  2438. vcipher $out4,$out4,v30
  2439. vcipher $out5,$out5,v30
  2440. vxor $in5,$twk5,v31
  2441. vsrab $tmp,$tweak,$seven # next tweak value
  2442. vxor $twk5,$tweak,$rndkey0
  2443. vcipherlast $out0,$out0,$in0
  2444. lvx_u $in0,$x00,$inp # load next input block
  2445. vaddubm $tweak,$tweak,$tweak
  2446. vsldoi $tmp,$tmp,$tmp,15
  2447. vcipherlast $out1,$out1,$in1
  2448. lvx_u $in1,$x10,$inp
  2449. vcipherlast $out2,$out2,$in2
  2450. le?vperm $in0,$in0,$in0,$leperm
  2451. lvx_u $in2,$x20,$inp
  2452. vand $tmp,$tmp,$eighty7
  2453. vcipherlast $out3,$out3,$in3
  2454. le?vperm $in1,$in1,$in1,$leperm
  2455. lvx_u $in3,$x30,$inp
  2456. vcipherlast $out4,$out4,$in4
  2457. le?vperm $in2,$in2,$in2,$leperm
  2458. lvx_u $in4,$x40,$inp
  2459. vxor $tweak,$tweak,$tmp
  2460. vcipherlast $tmp,$out5,$in5 # last block might be needed
  2461. # in stealing mode
  2462. le?vperm $in3,$in3,$in3,$leperm
  2463. lvx_u $in5,$x50,$inp
  2464. addi $inp,$inp,0x60
  2465. le?vperm $in4,$in4,$in4,$leperm
  2466. le?vperm $in5,$in5,$in5,$leperm
  2467. le?vperm $out0,$out0,$out0,$leperm
  2468. le?vperm $out1,$out1,$out1,$leperm
  2469. stvx_u $out0,$x00,$out # store output
  2470. vxor $out0,$in0,$twk0
  2471. le?vperm $out2,$out2,$out2,$leperm
  2472. stvx_u $out1,$x10,$out
  2473. vxor $out1,$in1,$twk1
  2474. le?vperm $out3,$out3,$out3,$leperm
  2475. stvx_u $out2,$x20,$out
  2476. vxor $out2,$in2,$twk2
  2477. le?vperm $out4,$out4,$out4,$leperm
  2478. stvx_u $out3,$x30,$out
  2479. vxor $out3,$in3,$twk3
  2480. le?vperm $out5,$tmp,$tmp,$leperm
  2481. stvx_u $out4,$x40,$out
  2482. vxor $out4,$in4,$twk4
  2483. le?stvx_u $out5,$x50,$out
  2484. be?stvx_u $tmp, $x50,$out
  2485. vxor $out5,$in5,$twk5
  2486. addi $out,$out,0x60
  2487. mtctr $rounds
  2488. beq Loop_xts_enc6x # did $len-=96 borrow?
  2489. addic. $len,$len,0x60
  2490. beq Lxts_enc6x_zero
  2491. cmpwi $len,0x20
  2492. blt Lxts_enc6x_one
  2493. nop
  2494. beq Lxts_enc6x_two
  2495. cmpwi $len,0x40
  2496. blt Lxts_enc6x_three
  2497. nop
  2498. beq Lxts_enc6x_four
  2499. Lxts_enc6x_five:
  2500. vxor $out0,$in1,$twk0
  2501. vxor $out1,$in2,$twk1
  2502. vxor $out2,$in3,$twk2
  2503. vxor $out3,$in4,$twk3
  2504. vxor $out4,$in5,$twk4
  2505. bl _aesp8_xts_enc5x
  2506. le?vperm $out0,$out0,$out0,$leperm
  2507. vmr $twk0,$twk5 # unused tweak
  2508. le?vperm $out1,$out1,$out1,$leperm
  2509. stvx_u $out0,$x00,$out # store output
  2510. le?vperm $out2,$out2,$out2,$leperm
  2511. stvx_u $out1,$x10,$out
  2512. le?vperm $out3,$out3,$out3,$leperm
  2513. stvx_u $out2,$x20,$out
  2514. vxor $tmp,$out4,$twk5 # last block prep for stealing
  2515. le?vperm $out4,$out4,$out4,$leperm
  2516. stvx_u $out3,$x30,$out
  2517. stvx_u $out4,$x40,$out
  2518. addi $out,$out,0x50
  2519. bne Lxts_enc6x_steal
  2520. b Lxts_enc6x_done
  2521. .align 4
  2522. Lxts_enc6x_four:
  2523. vxor $out0,$in2,$twk0
  2524. vxor $out1,$in3,$twk1
  2525. vxor $out2,$in4,$twk2
  2526. vxor $out3,$in5,$twk3
  2527. vxor $out4,$out4,$out4
  2528. bl _aesp8_xts_enc5x
  2529. le?vperm $out0,$out0,$out0,$leperm
  2530. vmr $twk0,$twk4 # unused tweak
  2531. le?vperm $out1,$out1,$out1,$leperm
  2532. stvx_u $out0,$x00,$out # store output
  2533. le?vperm $out2,$out2,$out2,$leperm
  2534. stvx_u $out1,$x10,$out
  2535. vxor $tmp,$out3,$twk4 # last block prep for stealing
  2536. le?vperm $out3,$out3,$out3,$leperm
  2537. stvx_u $out2,$x20,$out
  2538. stvx_u $out3,$x30,$out
  2539. addi $out,$out,0x40
  2540. bne Lxts_enc6x_steal
  2541. b Lxts_enc6x_done
  2542. .align 4
  2543. Lxts_enc6x_three:
  2544. vxor $out0,$in3,$twk0
  2545. vxor $out1,$in4,$twk1
  2546. vxor $out2,$in5,$twk2
  2547. vxor $out3,$out3,$out3
  2548. vxor $out4,$out4,$out4
  2549. bl _aesp8_xts_enc5x
  2550. le?vperm $out0,$out0,$out0,$leperm
  2551. vmr $twk0,$twk3 # unused tweak
  2552. le?vperm $out1,$out1,$out1,$leperm
  2553. stvx_u $out0,$x00,$out # store output
  2554. vxor $tmp,$out2,$twk3 # last block prep for stealing
  2555. le?vperm $out2,$out2,$out2,$leperm
  2556. stvx_u $out1,$x10,$out
  2557. stvx_u $out2,$x20,$out
  2558. addi $out,$out,0x30
  2559. bne Lxts_enc6x_steal
  2560. b Lxts_enc6x_done
  2561. .align 4
  2562. Lxts_enc6x_two:
  2563. vxor $out0,$in4,$twk0
  2564. vxor $out1,$in5,$twk1
  2565. vxor $out2,$out2,$out2
  2566. vxor $out3,$out3,$out3
  2567. vxor $out4,$out4,$out4
  2568. bl _aesp8_xts_enc5x
  2569. le?vperm $out0,$out0,$out0,$leperm
  2570. vmr $twk0,$twk2 # unused tweak
  2571. vxor $tmp,$out1,$twk2 # last block prep for stealing
  2572. le?vperm $out1,$out1,$out1,$leperm
  2573. stvx_u $out0,$x00,$out # store output
  2574. stvx_u $out1,$x10,$out
  2575. addi $out,$out,0x20
  2576. bne Lxts_enc6x_steal
  2577. b Lxts_enc6x_done
  2578. .align 4
  2579. Lxts_enc6x_one:
  2580. vxor $out0,$in5,$twk0
  2581. nop
  2582. Loop_xts_enc1x:
  2583. vcipher $out0,$out0,v24
  2584. lvx v24,$x20,$key_ # round[3]
  2585. addi $key_,$key_,0x20
  2586. vcipher $out0,$out0,v25
  2587. lvx v25,$x10,$key_ # round[4]
  2588. bdnz Loop_xts_enc1x
  2589. add $inp,$inp,$taillen
  2590. cmpwi $taillen,0
  2591. vcipher $out0,$out0,v24
  2592. subi $inp,$inp,16
  2593. vcipher $out0,$out0,v25
  2594. lvsr $inpperm,0,$taillen
  2595. vcipher $out0,$out0,v26
  2596. lvx_u $in0,0,$inp
  2597. vcipher $out0,$out0,v27
  2598. addi $key_,$sp,$FRAME+15 # rewind $key_
  2599. vcipher $out0,$out0,v28
  2600. lvx v24,$x00,$key_ # re-pre-load round[1]
  2601. vcipher $out0,$out0,v29
  2602. lvx v25,$x10,$key_ # re-pre-load round[2]
  2603. vxor $twk0,$twk0,v31
  2604. le?vperm $in0,$in0,$in0,$leperm
  2605. vcipher $out0,$out0,v30
  2606. vperm $in0,$in0,$in0,$inpperm
  2607. vcipherlast $out0,$out0,$twk0
  2608. vmr $twk0,$twk1 # unused tweak
  2609. vxor $tmp,$out0,$twk1 # last block prep for stealing
  2610. le?vperm $out0,$out0,$out0,$leperm
  2611. stvx_u $out0,$x00,$out # store output
  2612. addi $out,$out,0x10
  2613. bne Lxts_enc6x_steal
  2614. b Lxts_enc6x_done
  2615. .align 4
  2616. Lxts_enc6x_zero:
  2617. cmpwi $taillen,0
  2618. beq Lxts_enc6x_done
  2619. add $inp,$inp,$taillen
  2620. subi $inp,$inp,16
  2621. lvx_u $in0,0,$inp
  2622. lvsr $inpperm,0,$taillen # $in5 is no more
  2623. le?vperm $in0,$in0,$in0,$leperm
  2624. vperm $in0,$in0,$in0,$inpperm
  2625. vxor $tmp,$tmp,$twk0
  2626. Lxts_enc6x_steal:
  2627. vxor $in0,$in0,$twk0
  2628. vxor $out0,$out0,$out0
  2629. vspltisb $out1,-1
  2630. vperm $out0,$out0,$out1,$inpperm
  2631. vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
  2632. subi r30,$out,17
  2633. subi $out,$out,16
  2634. mtctr $taillen
  2635. Loop_xts_enc6x_steal:
  2636. lbzu r0,1(r30)
  2637. stb r0,16(r30)
  2638. bdnz Loop_xts_enc6x_steal
  2639. li $taillen,0
  2640. mtctr $rounds
  2641. b Loop_xts_enc1x # one more time...
  2642. .align 4
  2643. Lxts_enc6x_done:
  2644. ${UCMP}i $ivp,0
  2645. beq Lxts_enc6x_ret
  2646. vxor $tweak,$twk0,$rndkey0
  2647. le?vperm $tweak,$tweak,$tweak,$leperm
  2648. stvx_u $tweak,0,$ivp
  2649. Lxts_enc6x_ret:
  2650. mtlr r11
  2651. li r10,`$FRAME+15`
  2652. li r11,`$FRAME+31`
  2653. stvx $seven,r10,$sp # wipe copies of round keys
  2654. addi r10,r10,32
  2655. stvx $seven,r11,$sp
  2656. addi r11,r11,32
  2657. stvx $seven,r10,$sp
  2658. addi r10,r10,32
  2659. stvx $seven,r11,$sp
  2660. addi r11,r11,32
  2661. stvx $seven,r10,$sp
  2662. addi r10,r10,32
  2663. stvx $seven,r11,$sp
  2664. addi r11,r11,32
  2665. stvx $seven,r10,$sp
  2666. addi r10,r10,32
  2667. stvx $seven,r11,$sp
  2668. addi r11,r11,32
  2669. mtspr 256,$vrsave
  2670. lvx v20,r10,$sp # ABI says so
  2671. addi r10,r10,32
  2672. lvx v21,r11,$sp
  2673. addi r11,r11,32
  2674. lvx v22,r10,$sp
  2675. addi r10,r10,32
  2676. lvx v23,r11,$sp
  2677. addi r11,r11,32
  2678. lvx v24,r10,$sp
  2679. addi r10,r10,32
  2680. lvx v25,r11,$sp
  2681. addi r11,r11,32
  2682. lvx v26,r10,$sp
  2683. addi r10,r10,32
  2684. lvx v27,r11,$sp
  2685. addi r11,r11,32
  2686. lvx v28,r10,$sp
  2687. addi r10,r10,32
  2688. lvx v29,r11,$sp
  2689. addi r11,r11,32
  2690. lvx v30,r10,$sp
  2691. lvx v31,r11,$sp
  2692. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2693. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2694. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2695. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2696. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2697. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2698. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  2699. blr
  2700. .long 0
  2701. .byte 0,12,0x04,1,0x80,6,6,0
  2702. .long 0
  2703. .align 5
  2704. _aesp8_xts_enc5x:
  2705. vcipher $out0,$out0,v24
  2706. vcipher $out1,$out1,v24
  2707. vcipher $out2,$out2,v24
  2708. vcipher $out3,$out3,v24
  2709. vcipher $out4,$out4,v24
  2710. lvx v24,$x20,$key_ # round[3]
  2711. addi $key_,$key_,0x20
  2712. vcipher $out0,$out0,v25
  2713. vcipher $out1,$out1,v25
  2714. vcipher $out2,$out2,v25
  2715. vcipher $out3,$out3,v25
  2716. vcipher $out4,$out4,v25
  2717. lvx v25,$x10,$key_ # round[4]
  2718. bdnz _aesp8_xts_enc5x
  2719. add $inp,$inp,$taillen
  2720. cmpwi $taillen,0
  2721. vcipher $out0,$out0,v24
  2722. vcipher $out1,$out1,v24
  2723. vcipher $out2,$out2,v24
  2724. vcipher $out3,$out3,v24
  2725. vcipher $out4,$out4,v24
  2726. subi $inp,$inp,16
  2727. vcipher $out0,$out0,v25
  2728. vcipher $out1,$out1,v25
  2729. vcipher $out2,$out2,v25
  2730. vcipher $out3,$out3,v25
  2731. vcipher $out4,$out4,v25
  2732. vxor $twk0,$twk0,v31
  2733. vcipher $out0,$out0,v26
  2734. lvsr $inpperm,r0,$taillen # $in5 is no more
  2735. vcipher $out1,$out1,v26
  2736. vcipher $out2,$out2,v26
  2737. vcipher $out3,$out3,v26
  2738. vcipher $out4,$out4,v26
  2739. vxor $in1,$twk1,v31
  2740. vcipher $out0,$out0,v27
  2741. lvx_u $in0,0,$inp
  2742. vcipher $out1,$out1,v27
  2743. vcipher $out2,$out2,v27
  2744. vcipher $out3,$out3,v27
  2745. vcipher $out4,$out4,v27
  2746. vxor $in2,$twk2,v31
  2747. addi $key_,$sp,$FRAME+15 # rewind $key_
  2748. vcipher $out0,$out0,v28
  2749. vcipher $out1,$out1,v28
  2750. vcipher $out2,$out2,v28
  2751. vcipher $out3,$out3,v28
  2752. vcipher $out4,$out4,v28
  2753. lvx v24,$x00,$key_ # re-pre-load round[1]
  2754. vxor $in3,$twk3,v31
  2755. vcipher $out0,$out0,v29
  2756. le?vperm $in0,$in0,$in0,$leperm
  2757. vcipher $out1,$out1,v29
  2758. vcipher $out2,$out2,v29
  2759. vcipher $out3,$out3,v29
  2760. vcipher $out4,$out4,v29
  2761. lvx v25,$x10,$key_ # re-pre-load round[2]
  2762. vxor $in4,$twk4,v31
  2763. vcipher $out0,$out0,v30
  2764. vperm $in0,$in0,$in0,$inpperm
  2765. vcipher $out1,$out1,v30
  2766. vcipher $out2,$out2,v30
  2767. vcipher $out3,$out3,v30
  2768. vcipher $out4,$out4,v30
  2769. vcipherlast $out0,$out0,$twk0
  2770. vcipherlast $out1,$out1,$in1
  2771. vcipherlast $out2,$out2,$in2
  2772. vcipherlast $out3,$out3,$in3
  2773. vcipherlast $out4,$out4,$in4
  2774. blr
  2775. .long 0
  2776. .byte 0,12,0x14,0,0,0,0,0
  2777. .align 5
  2778. _aesp8_xts_decrypt6x:
  2779. $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  2780. mflr r11
  2781. li r7,`$FRAME+8*16+15`
  2782. li r3,`$FRAME+8*16+31`
  2783. $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
  2784. stvx v20,r7,$sp # ABI says so
  2785. addi r7,r7,32
  2786. stvx v21,r3,$sp
  2787. addi r3,r3,32
  2788. stvx v22,r7,$sp
  2789. addi r7,r7,32
  2790. stvx v23,r3,$sp
  2791. addi r3,r3,32
  2792. stvx v24,r7,$sp
  2793. addi r7,r7,32
  2794. stvx v25,r3,$sp
  2795. addi r3,r3,32
  2796. stvx v26,r7,$sp
  2797. addi r7,r7,32
  2798. stvx v27,r3,$sp
  2799. addi r3,r3,32
  2800. stvx v28,r7,$sp
  2801. addi r7,r7,32
  2802. stvx v29,r3,$sp
  2803. addi r3,r3,32
  2804. stvx v30,r7,$sp
  2805. stvx v31,r3,$sp
  2806. li r0,-1
  2807. stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
  2808. li $x10,0x10
  2809. $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  2810. li $x20,0x20
  2811. $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  2812. li $x30,0x30
  2813. $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  2814. li $x40,0x40
  2815. $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  2816. li $x50,0x50
  2817. $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  2818. li $x60,0x60
  2819. $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  2820. li $x70,0x70
  2821. mtspr 256,r0
  2822. subi $rounds,$rounds,3 # -4 in total
  2823. lvx $rndkey0,$x00,$key1 # load key schedule
  2824. lvx v30,$x10,$key1
  2825. addi $key1,$key1,0x20
  2826. lvx v31,$x00,$key1
  2827. ?vperm $rndkey0,$rndkey0,v30,$keyperm
  2828. addi $key_,$sp,$FRAME+15
  2829. mtctr $rounds
  2830. Load_xts_dec_key:
  2831. ?vperm v24,v30,v31,$keyperm
  2832. lvx v30,$x10,$key1
  2833. addi $key1,$key1,0x20
  2834. stvx v24,$x00,$key_ # off-load round[1]
  2835. ?vperm v25,v31,v30,$keyperm
  2836. lvx v31,$x00,$key1
  2837. stvx v25,$x10,$key_ # off-load round[2]
  2838. addi $key_,$key_,0x20
  2839. bdnz Load_xts_dec_key
  2840. lvx v26,$x10,$key1
  2841. ?vperm v24,v30,v31,$keyperm
  2842. lvx v27,$x20,$key1
  2843. stvx v24,$x00,$key_ # off-load round[3]
  2844. ?vperm v25,v31,v26,$keyperm
  2845. lvx v28,$x30,$key1
  2846. stvx v25,$x10,$key_ # off-load round[4]
  2847. addi $key_,$sp,$FRAME+15 # rewind $key_
  2848. ?vperm v26,v26,v27,$keyperm
  2849. lvx v29,$x40,$key1
  2850. ?vperm v27,v27,v28,$keyperm
  2851. lvx v30,$x50,$key1
  2852. ?vperm v28,v28,v29,$keyperm
  2853. lvx v31,$x60,$key1
  2854. ?vperm v29,v29,v30,$keyperm
  2855. lvx $twk5,$x70,$key1 # borrow $twk5
  2856. ?vperm v30,v30,v31,$keyperm
  2857. lvx v24,$x00,$key_ # pre-load round[1]
  2858. ?vperm v31,v31,$twk5,$keyperm
  2859. lvx v25,$x10,$key_ # pre-load round[2]
  2860. vperm $in0,$inout,$inptail,$inpperm
  2861. subi $inp,$inp,31 # undo "caller"
  2862. vxor $twk0,$tweak,$rndkey0
  2863. vsrab $tmp,$tweak,$seven # next tweak value
  2864. vaddubm $tweak,$tweak,$tweak
  2865. vsldoi $tmp,$tmp,$tmp,15
  2866. vand $tmp,$tmp,$eighty7
  2867. vxor $out0,$in0,$twk0
  2868. vxor $tweak,$tweak,$tmp
  2869. lvx_u $in1,$x10,$inp
  2870. vxor $twk1,$tweak,$rndkey0
  2871. vsrab $tmp,$tweak,$seven # next tweak value
  2872. vaddubm $tweak,$tweak,$tweak
  2873. vsldoi $tmp,$tmp,$tmp,15
  2874. le?vperm $in1,$in1,$in1,$leperm
  2875. vand $tmp,$tmp,$eighty7
  2876. vxor $out1,$in1,$twk1
  2877. vxor $tweak,$tweak,$tmp
  2878. lvx_u $in2,$x20,$inp
  2879. andi. $taillen,$len,15
  2880. vxor $twk2,$tweak,$rndkey0
  2881. vsrab $tmp,$tweak,$seven # next tweak value
  2882. vaddubm $tweak,$tweak,$tweak
  2883. vsldoi $tmp,$tmp,$tmp,15
  2884. le?vperm $in2,$in2,$in2,$leperm
  2885. vand $tmp,$tmp,$eighty7
  2886. vxor $out2,$in2,$twk2
  2887. vxor $tweak,$tweak,$tmp
  2888. lvx_u $in3,$x30,$inp
  2889. sub $len,$len,$taillen
  2890. vxor $twk3,$tweak,$rndkey0
  2891. vsrab $tmp,$tweak,$seven # next tweak value
  2892. vaddubm $tweak,$tweak,$tweak
  2893. vsldoi $tmp,$tmp,$tmp,15
  2894. le?vperm $in3,$in3,$in3,$leperm
  2895. vand $tmp,$tmp,$eighty7
  2896. vxor $out3,$in3,$twk3
  2897. vxor $tweak,$tweak,$tmp
  2898. lvx_u $in4,$x40,$inp
  2899. subi $len,$len,0x60
  2900. vxor $twk4,$tweak,$rndkey0
  2901. vsrab $tmp,$tweak,$seven # next tweak value
  2902. vaddubm $tweak,$tweak,$tweak
  2903. vsldoi $tmp,$tmp,$tmp,15
  2904. le?vperm $in4,$in4,$in4,$leperm
  2905. vand $tmp,$tmp,$eighty7
  2906. vxor $out4,$in4,$twk4
  2907. vxor $tweak,$tweak,$tmp
  2908. lvx_u $in5,$x50,$inp
  2909. addi $inp,$inp,0x60
  2910. vxor $twk5,$tweak,$rndkey0
  2911. vsrab $tmp,$tweak,$seven # next tweak value
  2912. vaddubm $tweak,$tweak,$tweak
  2913. vsldoi $tmp,$tmp,$tmp,15
  2914. le?vperm $in5,$in5,$in5,$leperm
  2915. vand $tmp,$tmp,$eighty7
  2916. vxor $out5,$in5,$twk5
  2917. vxor $tweak,$tweak,$tmp
  2918. vxor v31,v31,$rndkey0
  2919. mtctr $rounds
  2920. b Loop_xts_dec6x
  2921. .align 5
  2922. Loop_xts_dec6x:
  2923. vncipher $out0,$out0,v24
  2924. vncipher $out1,$out1,v24
  2925. vncipher $out2,$out2,v24
  2926. vncipher $out3,$out3,v24
  2927. vncipher $out4,$out4,v24
  2928. vncipher $out5,$out5,v24
  2929. lvx v24,$x20,$key_ # round[3]
  2930. addi $key_,$key_,0x20
  2931. vncipher $out0,$out0,v25
  2932. vncipher $out1,$out1,v25
  2933. vncipher $out2,$out2,v25
  2934. vncipher $out3,$out3,v25
  2935. vncipher $out4,$out4,v25
  2936. vncipher $out5,$out5,v25
  2937. lvx v25,$x10,$key_ # round[4]
  2938. bdnz Loop_xts_dec6x
  2939. subic $len,$len,96 # $len-=96
  2940. vxor $in0,$twk0,v31 # xor with last round key
  2941. vncipher $out0,$out0,v24
  2942. vncipher $out1,$out1,v24
  2943. vsrab $tmp,$tweak,$seven # next tweak value
  2944. vxor $twk0,$tweak,$rndkey0
  2945. vaddubm $tweak,$tweak,$tweak
  2946. vncipher $out2,$out2,v24
  2947. vncipher $out3,$out3,v24
  2948. vsldoi $tmp,$tmp,$tmp,15
  2949. vncipher $out4,$out4,v24
  2950. vncipher $out5,$out5,v24
  2951. subfe. r0,r0,r0 # borrow?-1:0
  2952. vand $tmp,$tmp,$eighty7
  2953. vncipher $out0,$out0,v25
  2954. vncipher $out1,$out1,v25
  2955. vxor $tweak,$tweak,$tmp
  2956. vncipher $out2,$out2,v25
  2957. vncipher $out3,$out3,v25
  2958. vxor $in1,$twk1,v31
  2959. vsrab $tmp,$tweak,$seven # next tweak value
  2960. vxor $twk1,$tweak,$rndkey0
  2961. vncipher $out4,$out4,v25
  2962. vncipher $out5,$out5,v25
  2963. and r0,r0,$len
  2964. vaddubm $tweak,$tweak,$tweak
  2965. vsldoi $tmp,$tmp,$tmp,15
  2966. vncipher $out0,$out0,v26
  2967. vncipher $out1,$out1,v26
  2968. vand $tmp,$tmp,$eighty7
  2969. vncipher $out2,$out2,v26
  2970. vncipher $out3,$out3,v26
  2971. vxor $tweak,$tweak,$tmp
  2972. vncipher $out4,$out4,v26
  2973. vncipher $out5,$out5,v26
  2974. add $inp,$inp,r0 # $inp is adjusted in such
  2975. # way that at exit from the
  2976. # loop inX-in5 are loaded
  2977. # with last "words"
  2978. vxor $in2,$twk2,v31
  2979. vsrab $tmp,$tweak,$seven # next tweak value
  2980. vxor $twk2,$tweak,$rndkey0
  2981. vaddubm $tweak,$tweak,$tweak
  2982. vncipher $out0,$out0,v27
  2983. vncipher $out1,$out1,v27
  2984. vsldoi $tmp,$tmp,$tmp,15
  2985. vncipher $out2,$out2,v27
  2986. vncipher $out3,$out3,v27
  2987. vand $tmp,$tmp,$eighty7
  2988. vncipher $out4,$out4,v27
  2989. vncipher $out5,$out5,v27
  2990. addi $key_,$sp,$FRAME+15 # rewind $key_
  2991. vxor $tweak,$tweak,$tmp
  2992. vncipher $out0,$out0,v28
  2993. vncipher $out1,$out1,v28
  2994. vxor $in3,$twk3,v31
  2995. vsrab $tmp,$tweak,$seven # next tweak value
  2996. vxor $twk3,$tweak,$rndkey0
  2997. vncipher $out2,$out2,v28
  2998. vncipher $out3,$out3,v28
  2999. vaddubm $tweak,$tweak,$tweak
  3000. vsldoi $tmp,$tmp,$tmp,15
  3001. vncipher $out4,$out4,v28
  3002. vncipher $out5,$out5,v28
  3003. lvx v24,$x00,$key_ # re-pre-load round[1]
  3004. vand $tmp,$tmp,$eighty7
  3005. vncipher $out0,$out0,v29
  3006. vncipher $out1,$out1,v29
  3007. vxor $tweak,$tweak,$tmp
  3008. vncipher $out2,$out2,v29
  3009. vncipher $out3,$out3,v29
  3010. vxor $in4,$twk4,v31
  3011. vsrab $tmp,$tweak,$seven # next tweak value
  3012. vxor $twk4,$tweak,$rndkey0
  3013. vncipher $out4,$out4,v29
  3014. vncipher $out5,$out5,v29
  3015. lvx v25,$x10,$key_ # re-pre-load round[2]
  3016. vaddubm $tweak,$tweak,$tweak
  3017. vsldoi $tmp,$tmp,$tmp,15
  3018. vncipher $out0,$out0,v30
  3019. vncipher $out1,$out1,v30
  3020. vand $tmp,$tmp,$eighty7
  3021. vncipher $out2,$out2,v30
  3022. vncipher $out3,$out3,v30
  3023. vxor $tweak,$tweak,$tmp
  3024. vncipher $out4,$out4,v30
  3025. vncipher $out5,$out5,v30
  3026. vxor $in5,$twk5,v31
  3027. vsrab $tmp,$tweak,$seven # next tweak value
  3028. vxor $twk5,$tweak,$rndkey0
  3029. vncipherlast $out0,$out0,$in0
  3030. lvx_u $in0,$x00,$inp # load next input block
  3031. vaddubm $tweak,$tweak,$tweak
  3032. vsldoi $tmp,$tmp,$tmp,15
  3033. vncipherlast $out1,$out1,$in1
  3034. lvx_u $in1,$x10,$inp
  3035. vncipherlast $out2,$out2,$in2
  3036. le?vperm $in0,$in0,$in0,$leperm
  3037. lvx_u $in2,$x20,$inp
  3038. vand $tmp,$tmp,$eighty7
  3039. vncipherlast $out3,$out3,$in3
  3040. le?vperm $in1,$in1,$in1,$leperm
  3041. lvx_u $in3,$x30,$inp
  3042. vncipherlast $out4,$out4,$in4
  3043. le?vperm $in2,$in2,$in2,$leperm
  3044. lvx_u $in4,$x40,$inp
  3045. vxor $tweak,$tweak,$tmp
  3046. vncipherlast $out5,$out5,$in5
  3047. le?vperm $in3,$in3,$in3,$leperm
  3048. lvx_u $in5,$x50,$inp
  3049. addi $inp,$inp,0x60
  3050. le?vperm $in4,$in4,$in4,$leperm
  3051. le?vperm $in5,$in5,$in5,$leperm
  3052. le?vperm $out0,$out0,$out0,$leperm
  3053. le?vperm $out1,$out1,$out1,$leperm
  3054. stvx_u $out0,$x00,$out # store output
  3055. vxor $out0,$in0,$twk0
  3056. le?vperm $out2,$out2,$out2,$leperm
  3057. stvx_u $out1,$x10,$out
  3058. vxor $out1,$in1,$twk1
  3059. le?vperm $out3,$out3,$out3,$leperm
  3060. stvx_u $out2,$x20,$out
  3061. vxor $out2,$in2,$twk2
  3062. le?vperm $out4,$out4,$out4,$leperm
  3063. stvx_u $out3,$x30,$out
  3064. vxor $out3,$in3,$twk3
  3065. le?vperm $out5,$out5,$out5,$leperm
  3066. stvx_u $out4,$x40,$out
  3067. vxor $out4,$in4,$twk4
  3068. stvx_u $out5,$x50,$out
  3069. vxor $out5,$in5,$twk5
  3070. addi $out,$out,0x60
  3071. mtctr $rounds
  3072. beq Loop_xts_dec6x # did $len-=96 borrow?
  3073. addic. $len,$len,0x60
  3074. beq Lxts_dec6x_zero
  3075. cmpwi $len,0x20
  3076. blt Lxts_dec6x_one
  3077. nop
  3078. beq Lxts_dec6x_two
  3079. cmpwi $len,0x40
  3080. blt Lxts_dec6x_three
  3081. nop
  3082. beq Lxts_dec6x_four
  3083. Lxts_dec6x_five:
  3084. vxor $out0,$in1,$twk0
  3085. vxor $out1,$in2,$twk1
  3086. vxor $out2,$in3,$twk2
  3087. vxor $out3,$in4,$twk3
  3088. vxor $out4,$in5,$twk4
  3089. bl _aesp8_xts_dec5x
  3090. le?vperm $out0,$out0,$out0,$leperm
  3091. vmr $twk0,$twk5 # unused tweak
  3092. vxor $twk1,$tweak,$rndkey0
  3093. le?vperm $out1,$out1,$out1,$leperm
  3094. stvx_u $out0,$x00,$out # store output
  3095. vxor $out0,$in0,$twk1
  3096. le?vperm $out2,$out2,$out2,$leperm
  3097. stvx_u $out1,$x10,$out
  3098. le?vperm $out3,$out3,$out3,$leperm
  3099. stvx_u $out2,$x20,$out
  3100. le?vperm $out4,$out4,$out4,$leperm
  3101. stvx_u $out3,$x30,$out
  3102. stvx_u $out4,$x40,$out
  3103. addi $out,$out,0x50
  3104. bne Lxts_dec6x_steal
  3105. b Lxts_dec6x_done
  3106. .align 4
  3107. Lxts_dec6x_four:
  3108. vxor $out0,$in2,$twk0
  3109. vxor $out1,$in3,$twk1
  3110. vxor $out2,$in4,$twk2
  3111. vxor $out3,$in5,$twk3
  3112. vxor $out4,$out4,$out4
  3113. bl _aesp8_xts_dec5x
  3114. le?vperm $out0,$out0,$out0,$leperm
  3115. vmr $twk0,$twk4 # unused tweak
  3116. vmr $twk1,$twk5
  3117. le?vperm $out1,$out1,$out1,$leperm
  3118. stvx_u $out0,$x00,$out # store output
  3119. vxor $out0,$in0,$twk5
  3120. le?vperm $out2,$out2,$out2,$leperm
  3121. stvx_u $out1,$x10,$out
  3122. le?vperm $out3,$out3,$out3,$leperm
  3123. stvx_u $out2,$x20,$out
  3124. stvx_u $out3,$x30,$out
  3125. addi $out,$out,0x40
  3126. bne Lxts_dec6x_steal
  3127. b Lxts_dec6x_done
  3128. .align 4
  3129. Lxts_dec6x_three:
  3130. vxor $out0,$in3,$twk0
  3131. vxor $out1,$in4,$twk1
  3132. vxor $out2,$in5,$twk2
  3133. vxor $out3,$out3,$out3
  3134. vxor $out4,$out4,$out4
  3135. bl _aesp8_xts_dec5x
  3136. le?vperm $out0,$out0,$out0,$leperm
  3137. vmr $twk0,$twk3 # unused tweak
  3138. vmr $twk1,$twk4
  3139. le?vperm $out1,$out1,$out1,$leperm
  3140. stvx_u $out0,$x00,$out # store output
  3141. vxor $out0,$in0,$twk4
  3142. le?vperm $out2,$out2,$out2,$leperm
  3143. stvx_u $out1,$x10,$out
  3144. stvx_u $out2,$x20,$out
  3145. addi $out,$out,0x30
  3146. bne Lxts_dec6x_steal
  3147. b Lxts_dec6x_done
  3148. .align 4
  3149. Lxts_dec6x_two:
  3150. vxor $out0,$in4,$twk0
  3151. vxor $out1,$in5,$twk1
  3152. vxor $out2,$out2,$out2
  3153. vxor $out3,$out3,$out3
  3154. vxor $out4,$out4,$out4
  3155. bl _aesp8_xts_dec5x
  3156. le?vperm $out0,$out0,$out0,$leperm
  3157. vmr $twk0,$twk2 # unused tweak
  3158. vmr $twk1,$twk3
  3159. le?vperm $out1,$out1,$out1,$leperm
  3160. stvx_u $out0,$x00,$out # store output
  3161. vxor $out0,$in0,$twk3
  3162. stvx_u $out1,$x10,$out
  3163. addi $out,$out,0x20
  3164. bne Lxts_dec6x_steal
  3165. b Lxts_dec6x_done
  3166. .align 4
  3167. Lxts_dec6x_one:
  3168. vxor $out0,$in5,$twk0
  3169. nop
  3170. Loop_xts_dec1x:
  3171. vncipher $out0,$out0,v24
  3172. lvx v24,$x20,$key_ # round[3]
  3173. addi $key_,$key_,0x20
  3174. vncipher $out0,$out0,v25
  3175. lvx v25,$x10,$key_ # round[4]
  3176. bdnz Loop_xts_dec1x
  3177. subi r0,$taillen,1
  3178. vncipher $out0,$out0,v24
  3179. andi. r0,r0,16
  3180. cmpwi $taillen,0
  3181. vncipher $out0,$out0,v25
  3182. sub $inp,$inp,r0
  3183. vncipher $out0,$out0,v26
  3184. lvx_u $in0,0,$inp
  3185. vncipher $out0,$out0,v27
  3186. addi $key_,$sp,$FRAME+15 # rewind $key_
  3187. vncipher $out0,$out0,v28
  3188. lvx v24,$x00,$key_ # re-pre-load round[1]
  3189. vncipher $out0,$out0,v29
  3190. lvx v25,$x10,$key_ # re-pre-load round[2]
  3191. vxor $twk0,$twk0,v31
  3192. le?vperm $in0,$in0,$in0,$leperm
  3193. vncipher $out0,$out0,v30
  3194. mtctr $rounds
  3195. vncipherlast $out0,$out0,$twk0
  3196. vmr $twk0,$twk1 # unused tweak
  3197. vmr $twk1,$twk2
  3198. le?vperm $out0,$out0,$out0,$leperm
  3199. stvx_u $out0,$x00,$out # store output
  3200. addi $out,$out,0x10
  3201. vxor $out0,$in0,$twk2
  3202. bne Lxts_dec6x_steal
  3203. b Lxts_dec6x_done
  3204. .align 4
  3205. Lxts_dec6x_zero:
  3206. cmpwi $taillen,0
  3207. beq Lxts_dec6x_done
  3208. lvx_u $in0,0,$inp
  3209. le?vperm $in0,$in0,$in0,$leperm
  3210. vxor $out0,$in0,$twk1
  3211. Lxts_dec6x_steal:
  3212. vncipher $out0,$out0,v24
  3213. lvx v24,$x20,$key_ # round[3]
  3214. addi $key_,$key_,0x20
  3215. vncipher $out0,$out0,v25
  3216. lvx v25,$x10,$key_ # round[4]
  3217. bdnz Lxts_dec6x_steal
  3218. add $inp,$inp,$taillen
  3219. vncipher $out0,$out0,v24
  3220. cmpwi $taillen,0
  3221. vncipher $out0,$out0,v25
  3222. lvx_u $in0,0,$inp
  3223. vncipher $out0,$out0,v26
  3224. lvsr $inpperm,0,$taillen # $in5 is no more
  3225. vncipher $out0,$out0,v27
  3226. addi $key_,$sp,$FRAME+15 # rewind $key_
  3227. vncipher $out0,$out0,v28
  3228. lvx v24,$x00,$key_ # re-pre-load round[1]
  3229. vncipher $out0,$out0,v29
  3230. lvx v25,$x10,$key_ # re-pre-load round[2]
  3231. vxor $twk1,$twk1,v31
  3232. le?vperm $in0,$in0,$in0,$leperm
  3233. vncipher $out0,$out0,v30
  3234. vperm $in0,$in0,$in0,$inpperm
  3235. vncipherlast $tmp,$out0,$twk1
  3236. le?vperm $out0,$tmp,$tmp,$leperm
  3237. le?stvx_u $out0,0,$out
  3238. be?stvx_u $tmp,0,$out
  3239. vxor $out0,$out0,$out0
  3240. vspltisb $out1,-1
  3241. vperm $out0,$out0,$out1,$inpperm
  3242. vsel $out0,$in0,$tmp,$out0
  3243. vxor $out0,$out0,$twk0
  3244. subi r30,$out,1
  3245. mtctr $taillen
  3246. Loop_xts_dec6x_steal:
  3247. lbzu r0,1(r30)
  3248. stb r0,16(r30)
  3249. bdnz Loop_xts_dec6x_steal
  3250. li $taillen,0
  3251. mtctr $rounds
  3252. b Loop_xts_dec1x # one more time...
  3253. .align 4
  3254. Lxts_dec6x_done:
  3255. ${UCMP}i $ivp,0
  3256. beq Lxts_dec6x_ret
  3257. vxor $tweak,$twk0,$rndkey0
  3258. le?vperm $tweak,$tweak,$tweak,$leperm
  3259. stvx_u $tweak,0,$ivp
  3260. Lxts_dec6x_ret:
  3261. mtlr r11
  3262. li r10,`$FRAME+15`
  3263. li r11,`$FRAME+31`
  3264. stvx $seven,r10,$sp # wipe copies of round keys
  3265. addi r10,r10,32
  3266. stvx $seven,r11,$sp
  3267. addi r11,r11,32
  3268. stvx $seven,r10,$sp
  3269. addi r10,r10,32
  3270. stvx $seven,r11,$sp
  3271. addi r11,r11,32
  3272. stvx $seven,r10,$sp
  3273. addi r10,r10,32
  3274. stvx $seven,r11,$sp
  3275. addi r11,r11,32
  3276. stvx $seven,r10,$sp
  3277. addi r10,r10,32
  3278. stvx $seven,r11,$sp
  3279. addi r11,r11,32
  3280. mtspr 256,$vrsave
  3281. lvx v20,r10,$sp # ABI says so
  3282. addi r10,r10,32
  3283. lvx v21,r11,$sp
  3284. addi r11,r11,32
  3285. lvx v22,r10,$sp
  3286. addi r10,r10,32
  3287. lvx v23,r11,$sp
  3288. addi r11,r11,32
  3289. lvx v24,r10,$sp
  3290. addi r10,r10,32
  3291. lvx v25,r11,$sp
  3292. addi r11,r11,32
  3293. lvx v26,r10,$sp
  3294. addi r10,r10,32
  3295. lvx v27,r11,$sp
  3296. addi r11,r11,32
  3297. lvx v28,r10,$sp
  3298. addi r10,r10,32
  3299. lvx v29,r11,$sp
  3300. addi r11,r11,32
  3301. lvx v30,r10,$sp
  3302. lvx v31,r11,$sp
  3303. $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  3304. $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  3305. $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  3306. $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  3307. $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  3308. $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  3309. addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
  3310. blr
  3311. .long 0
  3312. .byte 0,12,0x04,1,0x80,6,6,0
  3313. .long 0
  3314. .align 5
  3315. _aesp8_xts_dec5x:
  3316. vncipher $out0,$out0,v24
  3317. vncipher $out1,$out1,v24
  3318. vncipher $out2,$out2,v24
  3319. vncipher $out3,$out3,v24
  3320. vncipher $out4,$out4,v24
  3321. lvx v24,$x20,$key_ # round[3]
  3322. addi $key_,$key_,0x20
  3323. vncipher $out0,$out0,v25
  3324. vncipher $out1,$out1,v25
  3325. vncipher $out2,$out2,v25
  3326. vncipher $out3,$out3,v25
  3327. vncipher $out4,$out4,v25
  3328. lvx v25,$x10,$key_ # round[4]
  3329. bdnz _aesp8_xts_dec5x
  3330. subi r0,$taillen,1
  3331. vncipher $out0,$out0,v24
  3332. vncipher $out1,$out1,v24
  3333. vncipher $out2,$out2,v24
  3334. vncipher $out3,$out3,v24
  3335. vncipher $out4,$out4,v24
  3336. andi. r0,r0,16
  3337. cmpwi $taillen,0
  3338. vncipher $out0,$out0,v25
  3339. vncipher $out1,$out1,v25
  3340. vncipher $out2,$out2,v25
  3341. vncipher $out3,$out3,v25
  3342. vncipher $out4,$out4,v25
  3343. vxor $twk0,$twk0,v31
  3344. sub $inp,$inp,r0
  3345. vncipher $out0,$out0,v26
  3346. vncipher $out1,$out1,v26
  3347. vncipher $out2,$out2,v26
  3348. vncipher $out3,$out3,v26
  3349. vncipher $out4,$out4,v26
  3350. vxor $in1,$twk1,v31
  3351. vncipher $out0,$out0,v27
  3352. lvx_u $in0,0,$inp
  3353. vncipher $out1,$out1,v27
  3354. vncipher $out2,$out2,v27
  3355. vncipher $out3,$out3,v27
  3356. vncipher $out4,$out4,v27
  3357. vxor $in2,$twk2,v31
  3358. addi $key_,$sp,$FRAME+15 # rewind $key_
  3359. vncipher $out0,$out0,v28
  3360. vncipher $out1,$out1,v28
  3361. vncipher $out2,$out2,v28
  3362. vncipher $out3,$out3,v28
  3363. vncipher $out4,$out4,v28
  3364. lvx v24,$x00,$key_ # re-pre-load round[1]
  3365. vxor $in3,$twk3,v31
  3366. vncipher $out0,$out0,v29
  3367. le?vperm $in0,$in0,$in0,$leperm
  3368. vncipher $out1,$out1,v29
  3369. vncipher $out2,$out2,v29
  3370. vncipher $out3,$out3,v29
  3371. vncipher $out4,$out4,v29
  3372. lvx v25,$x10,$key_ # re-pre-load round[2]
  3373. vxor $in4,$twk4,v31
  3374. vncipher $out0,$out0,v30
  3375. vncipher $out1,$out1,v30
  3376. vncipher $out2,$out2,v30
  3377. vncipher $out3,$out3,v30
  3378. vncipher $out4,$out4,v30
  3379. vncipherlast $out0,$out0,$twk0
  3380. vncipherlast $out1,$out1,$in1
  3381. vncipherlast $out2,$out2,$in2
  3382. vncipherlast $out3,$out3,$in3
  3383. vncipherlast $out4,$out4,$in4
  3384. mtctr $rounds
  3385. blr
  3386. .long 0
  3387. .byte 0,12,0x14,0,0,0,0,0
  3388. ___
  3389. }} }}}
  3390. my $consts=1;
  3391. foreach(split("\n",$code)) {
  3392. s/\`([^\`]*)\`/eval($1)/geo;
  3393. # constants table endian-specific conversion
  3394. if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
  3395. my $conv=$3;
  3396. my @bytes=();
  3397. # convert to endian-agnostic format
  3398. if ($1 eq "long") {
  3399. foreach (split(/,\s*/,$2)) {
  3400. my $l = /^0/?oct:int;
  3401. push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
  3402. }
  3403. } else {
  3404. @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
  3405. }
  3406. # little-endian conversion
  3407. if ($flavour =~ /le$/o) {
  3408. SWITCH: for($conv) {
  3409. /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
  3410. /\?rev/ && do { @bytes=reverse(@bytes); last; };
  3411. }
  3412. }
  3413. #emit
  3414. print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
  3415. next;
  3416. }
  3417. $consts=0 if (m/Lconsts:/o); # end of table
  3418. # instructions prefixed with '?' are endian-specific and need
  3419. # to be adjusted accordingly...
  3420. if ($flavour =~ /le$/o) { # little-endian
  3421. s/le\?//o or
  3422. s/be\?/#be#/o or
  3423. s/\?lvsr/lvsl/o or
  3424. s/\?lvsl/lvsr/o or
  3425. s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
  3426. s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
  3427. s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
  3428. } else { # big-endian
  3429. s/le\?/#le#/o or
  3430. s/be\?//o or
  3431. s/\?([a-z]+)/$1/o;
  3432. }
  3433. print $_,"\n";
  3434. }
  3435. close STDOUT;