12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761 |
- /*
- Convection Texture Tools
- Copyright (c) 2018-2019 Eric Lasota
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject
- to the following conditions:
- The above copyright notice and this permission notice shall be included
- in all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- -------------------------------------------------------------------------------------
- Portions based on DirectX Texture Library (DirectXTex)
- Copyright (c) Microsoft Corporation. All rights reserved.
- Licensed under the MIT License.
- http://go.microsoft.com/fwlink/?LinkId=248926
- */
- #include "ConvectionKernels_Config.h"
- #if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)
- #include "ConvectionKernels_BC67.h"
- #include "ConvectionKernels_AggregatedError.h"
- #include "ConvectionKernels_BCCommon.h"
- #include "ConvectionKernels_BC7_Prio.h"
- #include "ConvectionKernels_BC7_SingleColor.h"
- #include "ConvectionKernels_BC6H_IO.h"
- #include "ConvectionKernels_EndpointRefiner.h"
- #include "ConvectionKernels_EndpointSelector.h"
- #include "ConvectionKernels_IndexSelectorHDR.h"
- #include "ConvectionKernels_ParallelMath.h"
- #include "ConvectionKernels_UnfinishedEndpoints.h"
- namespace cvtt
- {
- namespace Internal
- {
- namespace BC67
- {
- typedef ParallelMath::Float MFloat;
- typedef ParallelMath::UInt15 MUInt15;
- struct WorkInfo
- {
- MUInt15 m_mode;
- MFloat m_error;
- MUInt15 m_ep[3][2][4];
- MUInt15 m_indexes[16];
- MUInt15 m_indexes2[16];
- union
- {
- MUInt15 m_partition;
- struct IndexSelectorAndRotation
- {
- MUInt15 m_indexSelector;
- MUInt15 m_rotation;
- } m_isr;
- } m_u;
- };
- }
- namespace BC6HData
- {
- enum EField
- {
- NA, // N/A
- M, // Mode
- D, // Shape
- RW,
- RX,
- RY,
- RZ,
- GW,
- GX,
- GY,
- GZ,
- BW,
- BX,
- BY,
- BZ,
- };
- struct ModeDescriptor
- {
- EField m_eField;
- uint8_t m_uBit;
- };
- const ModeDescriptor g_modeDescriptors[14][82] =
- {
- { // Mode 1 (0x00) - 10 5 5 5
- { M, 0 },{ M, 1 },{ GY, 4 },{ BY, 4 },{ BZ, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
- { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
- { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
- { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
- { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
- { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
- { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
- { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
- { D, 3 },{ D, 4 },
- },
- { // Mode 2 (0x01) - 7 6 6 6
- { M, 0 },{ M, 1 },{ GY, 5 },{ GZ, 4 },{ GZ, 5 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
- { RW, 5 },{ RW, 6 },{ BZ, 0 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
- { GW, 5 },{ GW, 6 },{ BY, 5 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
- { BW, 5 },{ BW, 6 },{ BZ, 3 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
- { RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
- { GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
- { BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
- { RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 },
- { D, 3 },{ D, 4 },
- },
- { // Mode 3 (0x02) - 11 5 4 4
- { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
- { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
- { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
- { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
- { RW,10 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,10 },
- { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,10 },
- { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
- { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
- { D, 3 },{ D, 4 },
- },
- { // Mode 4 (0x06) - 11 4 5 4
- { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
- { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
- { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
- { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,10 },
- { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
- { GW,10 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,10 },
- { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ BZ, 0 },
- { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ GY, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
- { D, 3 },{ D, 4 },
- },
- { // Mode 5 (0x0a) - 11 4 4 5
- { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
- { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
- { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
- { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,10 },
- { BY, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,10 },
- { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
- { BW,10 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ BZ, 1 },
- { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ BZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
- { D, 3 },{ D, 4 },
- },
- { // Mode 6 (0x0e) - 9 5 5 5
- { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
- { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
- { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
- { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
- { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
- { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
- { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
- { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
- { D, 3 },{ D, 4 },
- },
- { // Mode 7 (0x12) - 8 6 5 5
- { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
- { RW, 5 },{ RW, 6 },{ RW, 7 },{ GZ, 4 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
- { GW, 5 },{ GW, 6 },{ GW, 7 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
- { BW, 5 },{ BW, 6 },{ BW, 7 },{ BZ, 3 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
- { RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
- { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
- { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
- { RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 },
- { D, 3 },{ D, 4 },
- },
- { // Mode 8 (0x16) - 8 5 6 5
- { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
- { RW, 5 },{ RW, 6 },{ RW, 7 },{ BZ, 0 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
- { GW, 5 },{ GW, 6 },{ GW, 7 },{ GY, 5 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
- { BW, 5 },{ BW, 6 },{ BW, 7 },{ GZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
- { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
- { GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
- { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
- { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
- { D, 3 },{ D, 4 },
- },
- { // Mode 9 (0x1a) - 8 5 5 6
- { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
- { RW, 5 },{ RW, 6 },{ RW, 7 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
- { GW, 5 },{ GW, 6 },{ GW, 7 },{ BY, 5 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
- { BW, 5 },{ BW, 6 },{ BW, 7 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
- { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
- { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
- { BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
- { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
- { D, 3 },{ D, 4 },
- },
- { // Mode 10 (0x1e) - 6 6 6 6
- { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
- { RW, 5 },{ GZ, 4 },{ BZ, 0 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
- { GW, 5 },{ GY, 5 },{ BY, 5 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
- { BW, 5 },{ GZ, 5 },{ BZ, 3 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
- { RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
- { GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
- { BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
- { RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 },
- { D, 3 },{ D, 4 },
- },
- { // Mode 11 (0x03) - 10 10
- { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
- { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
- { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
- { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
- { RX, 5 },{ RX, 6 },{ RX, 7 },{ RX, 8 },{ RX, 9 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
- { GX, 5 },{ GX, 6 },{ GX, 7 },{ GX, 8 },{ GX, 9 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
- { BX, 5 },{ BX, 6 },{ BX, 7 },{ BX, 8 },{ BX, 9 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
- { NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
- { NA, 0 },{ NA, 0 },
- },
- { // Mode 12 (0x07) - 11 9
- { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
- { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
- { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
- { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
- { RX, 5 },{ RX, 6 },{ RX, 7 },{ RX, 8 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
- { GX, 5 },{ GX, 6 },{ GX, 7 },{ GX, 8 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
- { BX, 5 },{ BX, 6 },{ BX, 7 },{ BX, 8 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
- { NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
- { NA, 0 },{ NA, 0 },
- },
- { // Mode 13 (0x0b) - 12 8
- { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
- { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
- { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
- { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
- { RX, 5 },{ RX, 6 },{ RX, 7 },{ RW,11 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
- { GX, 5 },{ GX, 6 },{ GX, 7 },{ GW,11 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
- { BX, 5 },{ BX, 6 },{ BX, 7 },{ BW,11 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
- { NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
- { NA, 0 },{ NA, 0 },
- },
- { // Mode 14 (0x0f) - 16 4
- { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
- { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
- { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
- { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,15 },
- { RW,14 },{ RW,13 },{ RW,12 },{ RW,11 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,15 },
- { GW,14 },{ GW,13 },{ GW,12 },{ GW,11 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,15 },
- { BW,14 },{ BW,13 },{ BW,12 },{ BW,11 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
- { NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
- { NA, 0 },{ NA, 0 },
- },
- };
- }
- namespace BC7Data
- {
- enum AlphaMode
- {
- AlphaMode_Combined,
- AlphaMode_Separate,
- AlphaMode_None,
- };
- enum PBitMode
- {
- PBitMode_PerEndpoint,
- PBitMode_PerSubset,
- PBitMode_None
- };
- struct BC7ModeInfo
- {
- PBitMode m_pBitMode;
- AlphaMode m_alphaMode;
- int m_rgbBits;
- int m_alphaBits;
- int m_partitionBits;
- int m_numSubsets;
- int m_indexBits;
- int m_alphaIndexBits;
- bool m_hasIndexSelector;
- };
- BC7ModeInfo g_modes[] =
- {
- { PBitMode_PerEndpoint, AlphaMode_None, 4, 0, 4, 3, 3, 0, false }, // 0
- { PBitMode_PerSubset, AlphaMode_None, 6, 0, 6, 2, 3, 0, false }, // 1
- { PBitMode_None, AlphaMode_None, 5, 0, 6, 3, 2, 0, false }, // 2
- { PBitMode_PerEndpoint, AlphaMode_None, 7, 0, 6, 2, 2, 0, false }, // 3 (Mode reference has an error, P-bit is really per-endpoint)
- { PBitMode_None, AlphaMode_Separate, 5, 6, 0, 1, 2, 3, true }, // 4
- { PBitMode_None, AlphaMode_Separate, 7, 8, 0, 1, 2, 2, false }, // 5
- { PBitMode_PerEndpoint, AlphaMode_Combined, 7, 7, 0, 1, 4, 0, false }, // 6
- { PBitMode_PerEndpoint, AlphaMode_Combined, 5, 5, 6, 2, 2, 0, false } // 7
- };
- const int g_weight2[] = { 0, 21, 43, 64 };
- const int g_weight3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
- const int g_weight4[] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
- const int *g_weightTables[] =
- {
- NULL,
- NULL,
- g_weight2,
- g_weight3,
- g_weight4
- };
- struct BC6HModeInfo
- {
- uint16_t m_modeID;
- bool m_partitioned;
- bool m_transformed;
- int m_aPrec;
- int m_bPrec[3];
- };
- // [partitioned][precision]
- bool g_hdrModesExistForPrecision[2][17] =
- {
- //0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
- { false, false, false, false, false, false, false, false, false, false, true, true, true, false, false, false, true },
- { false, false, false, false, false, false, true, true, true, true, true, true, false, false, false, false, false },
- };
- BC6HModeInfo g_hdrModes[] =
- {
- { 0x00, true, true, 10,{ 5, 5, 5 } },
- { 0x01, true, true, 7,{ 6, 6, 6 } },
- { 0x02, true, true, 11,{ 5, 4, 4 } },
- { 0x06, true, true, 11,{ 4, 5, 4 } },
- { 0x0a, true, true, 11,{ 4, 4, 5 } },
- { 0x0e, true, true, 9,{ 5, 5, 5 } },
- { 0x12, true, true, 8,{ 6, 5, 5 } },
- { 0x16, true, true, 8,{ 5, 6, 5 } },
- { 0x1a, true, true, 8,{ 5, 5, 6 } },
- { 0x1e, true, false, 6,{ 6, 6, 6 } },
- { 0x03, false, false, 10,{ 10, 10, 10 } },
- { 0x07, false, true, 11,{ 9, 9, 9 } },
- { 0x0b, false, true, 12,{ 8, 8, 8 } },
- { 0x0f, false, true, 16,{ 4, 4, 4 } },
- };
- const int g_maxHDRPrecision = 16;
- static const size_t g_numHDRModes = sizeof(g_hdrModes) / sizeof(g_hdrModes[0]);
- static uint16_t g_partitionMap[64] =
- {
- 0xCCCC, 0x8888, 0xEEEE, 0xECC8,
- 0xC880, 0xFEEC, 0xFEC8, 0xEC80,
- 0xC800, 0xFFEC, 0xFE80, 0xE800,
- 0xFFE8, 0xFF00, 0xFFF0, 0xF000,
- 0xF710, 0x008E, 0x7100, 0x08CE,
- 0x008C, 0x7310, 0x3100, 0x8CCE,
- 0x088C, 0x3110, 0x6666, 0x366C,
- 0x17E8, 0x0FF0, 0x718E, 0x399C,
- 0xaaaa, 0xf0f0, 0x5a5a, 0x33cc,
- 0x3c3c, 0x55aa, 0x9696, 0xa55a,
- 0x73ce, 0x13c8, 0x324c, 0x3bdc,
- 0x6996, 0xc33c, 0x9966, 0x660,
- 0x272, 0x4e4, 0x4e40, 0x2720,
- 0xc936, 0x936c, 0x39c6, 0x639c,
- 0x9336, 0x9cc6, 0x817e, 0xe718,
- 0xccf0, 0xfcc, 0x7744, 0xee22,
- };
- static uint32_t g_partitionMap2[64] =
- {
- 0xaa685050, 0x6a5a5040, 0x5a5a4200, 0x5450a0a8,
- 0xa5a50000, 0xa0a05050, 0x5555a0a0, 0x5a5a5050,
- 0xaa550000, 0xaa555500, 0xaaaa5500, 0x90909090,
- 0x94949494, 0xa4a4a4a4, 0xa9a59450, 0x2a0a4250,
- 0xa5945040, 0x0a425054, 0xa5a5a500, 0x55a0a0a0,
- 0xa8a85454, 0x6a6a4040, 0xa4a45000, 0x1a1a0500,
- 0x0050a4a4, 0xaaa59090, 0x14696914, 0x69691400,
- 0xa08585a0, 0xaa821414, 0x50a4a450, 0x6a5a0200,
- 0xa9a58000, 0x5090a0a8, 0xa8a09050, 0x24242424,
- 0x00aa5500, 0x24924924, 0x24499224, 0x50a50a50,
- 0x500aa550, 0xaaaa4444, 0x66660000, 0xa5a0a5a0,
- 0x50a050a0, 0x69286928, 0x44aaaa44, 0x66666600,
- 0xaa444444, 0x54a854a8, 0x95809580, 0x96969600,
- 0xa85454a8, 0x80959580, 0xaa141414, 0x96960000,
- 0xaaaa1414, 0xa05050a0, 0xa0a5a5a0, 0x96000000,
- 0x40804080, 0xa9a8a9a8, 0xaaaaaa44, 0x2a4a5254,
- };
- static int g_fixupIndexes2[64] =
- {
- 15,15,15,15,
- 15,15,15,15,
- 15,15,15,15,
- 15,15,15,15,
- 15, 2, 8, 2,
- 2, 8, 8,15,
- 2, 8, 2, 2,
- 8, 8, 2, 2,
- 15,15, 6, 8,
- 2, 8,15,15,
- 2, 8, 2, 2,
- 2,15,15, 6,
- 6, 2, 6, 8,
- 15,15, 2, 2,
- 15,15,15,15,
- 15, 2, 2,15,
- };
- static int g_fixupIndexes3[64][2] =
- {
- { 3,15 },{ 3, 8 },{ 15, 8 },{ 15, 3 },
- { 8,15 },{ 3,15 },{ 15, 3 },{ 15, 8 },
- { 8,15 },{ 8,15 },{ 6,15 },{ 6,15 },
- { 6,15 },{ 5,15 },{ 3,15 },{ 3, 8 },
- { 3,15 },{ 3, 8 },{ 8,15 },{ 15, 3 },
- { 3,15 },{ 3, 8 },{ 6,15 },{ 10, 8 },
- { 5, 3 },{ 8,15 },{ 8, 6 },{ 6,10 },
- { 8,15 },{ 5,15 },{ 15,10 },{ 15, 8 },
- { 8,15 },{ 15, 3 },{ 3,15 },{ 5,10 },
- { 6,10 },{ 10, 8 },{ 8, 9 },{ 15,10 },
- { 15, 6 },{ 3,15 },{ 15, 8 },{ 5,15 },
- { 15, 3 },{ 15, 6 },{ 15, 6 },{ 15, 8 },
- { 3,15 },{ 15, 3 },{ 5,15 },{ 5,15 },
- { 5,15 },{ 8,15 },{ 5,15 },{ 10,15 },
- { 5,15 },{ 10,15 },{ 8,15 },{ 13,15 },
- { 15, 3 },{ 12,15 },{ 3,15 },{ 3, 8 },
- };
- static const unsigned char g_fragments[] =
- {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 0, 16
- 0, 1, 2, 3, // 16, 4
- 0, 1, 4, // 20, 3
- 0, 1, 2, 4, // 23, 4
- 2, 3, 7, // 27, 3
- 1, 2, 3, 7, // 30, 4
- 0, 1, 2, 3, 4, 5, 6, 7, // 34, 8
- 0, 1, 4, 8, // 42, 4
- 0, 1, 2, 4, 5, 8, // 46, 6
- 0, 1, 2, 3, 4, 5, 6, 8, // 52, 8
- 1, 4, 5, 6, 9, // 60, 5
- 2, 5, 6, 7, 10, // 65, 5
- 5, 6, 9, 10, // 70, 4
- 2, 3, 7, 11, // 74, 4
- 1, 2, 3, 6, 7, 11, // 78, 6
- 0, 1, 2, 3, 5, 6, 7, 11, // 84, 8
- 0, 1, 2, 3, 8, 9, 10, 11, // 92, 8
- 2, 3, 6, 7, 8, 9, 10, 11, // 100, 8
- 4, 5, 6, 7, 8, 9, 10, 11, // 108, 8
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, // 116, 12
- 0, 4, 8, 12, // 128, 4
- 0, 2, 3, 4, 6, 7, 8, 12, // 132, 8
- 0, 1, 2, 4, 5, 8, 9, 12, // 140, 8
- 0, 1, 2, 3, 4, 5, 6, 8, 9, 12, // 148, 10
- 3, 6, 7, 8, 9, 12, // 158, 6
- 3, 5, 6, 7, 8, 9, 10, 12, // 164, 8
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, // 172, 12
- 0, 1, 2, 5, 6, 7, 11, 12, // 184, 8
- 5, 8, 9, 10, 13, // 192, 5
- 8, 12, 13, // 197, 3
- 4, 8, 12, 13, // 200, 4
- 2, 3, 6, 9, 12, 13, // 204, 6
- 0, 1, 2, 3, 8, 9, 12, 13, // 210, 8
- 0, 1, 4, 5, 8, 9, 12, 13, // 218, 8
- 2, 3, 6, 7, 8, 9, 12, 13, // 226, 8
- 2, 3, 5, 6, 9, 10, 12, 13, // 234, 8
- 0, 3, 6, 7, 9, 10, 12, 13, // 242, 8
- 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 13, // 250, 12
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, // 262, 13
- 2, 3, 4, 7, 8, 11, 12, 13, // 275, 8
- 1, 2, 6, 7, 8, 11, 12, 13, // 283, 8
- 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, // 291, 10
- 2, 3, 4, 5, 10, 11, 12, 13, // 301, 8
- 0, 1, 6, 7, 10, 11, 12, 13, // 309, 8
- 6, 9, 10, 11, 14, // 317, 5
- 0, 2, 4, 6, 8, 10, 12, 14, // 322, 8
- 1, 3, 5, 7, 8, 10, 12, 14, // 330, 8
- 1, 3, 4, 6, 9, 11, 12, 14, // 338, 8
- 0, 2, 5, 7, 9, 11, 12, 14, // 346, 8
- 0, 3, 4, 5, 8, 9, 13, 14, // 354, 8
- 2, 3, 4, 7, 8, 9, 13, 14, // 362, 8
- 1, 2, 5, 6, 9, 10, 13, 14, // 370, 8
- 0, 3, 4, 7, 9, 10, 13, 14, // 378, 8
- 0, 3, 5, 6, 8, 11, 13, 14, // 386, 8
- 1, 2, 4, 7, 8, 11, 13, 14, // 394, 8
- 0, 1, 4, 7, 10, 11, 13, 14, // 402, 8
- 0, 3, 6, 7, 10, 11, 13, 14, // 410, 8
- 8, 12, 13, 14, // 418, 4
- 1, 2, 3, 7, 8, 12, 13, 14, // 422, 8
- 4, 8, 9, 12, 13, 14, // 430, 6
- 0, 4, 5, 8, 9, 12, 13, 14, // 436, 8
- 1, 2, 3, 6, 7, 8, 9, 12, 13, 14, // 444, 10
- 2, 6, 8, 9, 10, 12, 13, 14, // 454, 8
- 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, // 462, 12
- 0, 7, 9, 10, 11, 12, 13, 14, // 474, 8
- 1, 2, 3, 4, 5, 6, 8, 15, // 482, 8
- 3, 7, 11, 15, // 490, 4
- 0, 1, 3, 4, 5, 7, 11, 15, // 494, 8
- 0, 4, 5, 10, 11, 15, // 502, 6
- 1, 2, 3, 6, 7, 10, 11, 15, // 508, 8
- 0, 1, 2, 3, 5, 6, 7, 10, 11, 15, // 516, 10
- 0, 4, 5, 6, 9, 10, 11, 15, // 526, 8
- 0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 15, // 534, 12
- 1, 2, 4, 5, 8, 9, 12, 15, // 546, 8
- 2, 3, 5, 6, 8, 9, 12, 15, // 554, 8
- 0, 3, 5, 6, 9, 10, 12, 15, // 562, 8
- 1, 2, 4, 7, 9, 10, 12, 15, // 570, 8
- 1, 2, 5, 6, 8, 11, 12, 15, // 578, 8
- 0, 3, 4, 7, 8, 11, 12, 15, // 586, 8
- 0, 1, 5, 6, 10, 11, 12, 15, // 594, 8
- 1, 2, 6, 7, 10, 11, 12, 15, // 602, 8
- 1, 3, 4, 6, 8, 10, 13, 15, // 610, 8
- 0, 2, 5, 7, 8, 10, 13, 15, // 618, 8
- 0, 2, 4, 6, 9, 11, 13, 15, // 626, 8
- 1, 3, 5, 7, 9, 11, 13, 15, // 634, 8
- 0, 1, 2, 3, 4, 5, 7, 8, 12, 13, 15, // 642, 11
- 2, 3, 4, 5, 8, 9, 14, 15, // 653, 8
- 0, 1, 6, 7, 8, 9, 14, 15, // 661, 8
- 0, 1, 5, 10, 14, 15, // 669, 6
- 0, 3, 4, 5, 9, 10, 14, 15, // 675, 8
- 0, 1, 5, 6, 9, 10, 14, 15, // 683, 8
- 11, 14, 15, // 691, 3
- 7, 11, 14, 15, // 694, 4
- 1, 2, 4, 5, 8, 11, 14, 15, // 698, 8
- 0, 1, 4, 7, 8, 11, 14, 15, // 706, 8
- 0, 1, 4, 5, 10, 11, 14, 15, // 714, 8
- 2, 3, 6, 7, 10, 11, 14, 15, // 722, 8
- 4, 5, 6, 7, 10, 11, 14, 15, // 730, 8
- 0, 1, 4, 5, 7, 8, 10, 11, 14, 15, // 738, 10
- 0, 1, 2, 3, 5, 6, 7, 9, 10, 11, 14, 15, // 748, 12
- 0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 14, 15, // 760, 13
- 0, 1, 2, 3, 4, 6, 7, 11, 12, 14, 15, // 773, 11
- 3, 4, 8, 9, 10, 13, 14, 15, // 784, 8
- 11, 13, 14, 15, // 792, 4
- 0, 1, 2, 4, 11, 13, 14, 15, // 796, 8
- 0, 1, 2, 4, 5, 10, 11, 13, 14, 15, // 804, 10
- 7, 10, 11, 13, 14, 15, // 814, 6
- 3, 6, 7, 10, 11, 13, 14, 15, // 820, 8
- 1, 5, 9, 10, 11, 13, 14, 15, // 828, 8
- 1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, // 836, 12
- 12, 13, 14, 15, // 848, 4
- 0, 1, 2, 3, 12, 13, 14, 15, // 852, 8
- 0, 1, 4, 5, 12, 13, 14, 15, // 860, 8
- 4, 5, 6, 7, 12, 13, 14, 15, // 868, 8
- 4, 8, 9, 10, 12, 13, 14, 15, // 876, 8
- 0, 4, 5, 8, 9, 10, 12, 13, 14, 15, // 884, 10
- 0, 1, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, // 894, 12
- 0, 1, 2, 3, 4, 7, 8, 11, 12, 13, 14, 15, // 906, 12
- 0, 1, 3, 4, 8, 9, 11, 12, 13, 14, 15, // 918, 11
- 0, 2, 3, 7, 8, 10, 11, 12, 13, 14, 15, // 929, 11
- 7, 9, 10, 11, 12, 13, 14, 15, // 940, 8
- 3, 6, 7, 9, 10, 11, 12, 13, 14, 15, // 948, 10
- 2, 3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, // 958, 12
- 8, 9, 10, 11, 12, 13, 14, 15, // 970, 8
- 0, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, // 978, 12
- 0, 1, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, // 990, 13
- 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1003, 12
- 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1015, 13
- 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1028, 12
- 0, 2, // 1040, 2
- 1, 3, // 1042, 2
- 0, 1, 4, 5, // 1044, 4
- 0, 1, 2, 4, 5, // 1048, 5
- 2, 3, 6, // 1053, 3
- 0, 2, 4, 6, // 1056, 4
- 1, 2, 5, 6, // 1060, 4
- 0, 1, 2, 3, 5, 6, // 1064, 6
- 0, 1, 2, 4, 5, 6, // 1070, 6
- 0, 1, 2, 3, 4, 5, 6, // 1076, 7
- 0, 3, 4, 7, // 1083, 4
- 0, 1, 2, 3, 4, 7, // 1087, 6
- 1, 3, 5, 7, // 1093, 4
- 2, 3, 6, 7, // 1097, 4
- 1, 2, 3, 6, 7, // 1101, 5
- 1, 2, 3, 5, 6, 7, // 1106, 6
- 0, 1, 2, 3, 5, 6, 7, // 1112, 7
- 4, 5, 6, 7, // 1119, 4
- 0, 8, // 1123, 2
- 0, 1, 4, 5, 8, // 1125, 5
- 0, 1, 8, 9, // 1130, 4
- 4, 5, 8, 9, // 1134, 4
- 0, 1, 4, 5, 8, 9, // 1138, 6
- 2, 6, 8, 9, // 1144, 4
- 6, 7, 8, 9, // 1148, 4
- 0, 2, 4, 6, 8, 10, // 1152, 6
- 1, 2, 5, 6, 9, 10, // 1158, 6
- 0, 3, 4, 7, 9, 10, // 1164, 6
- 0, 1, 2, 8, 9, 10, // 1170, 6
- 4, 5, 6, 8, 9, 10, // 1176, 6
- 3, 11, // 1182, 2
- 2, 3, 6, 7, 11, // 1184, 5
- 0, 3, 8, 11, // 1189, 4
- 0, 3, 4, 7, 8, 11, // 1193, 6
- 1, 3, 5, 7, 9, 11, // 1199, 6
- 2, 3, 10, 11, // 1205, 4
- 1, 5, 10, 11, // 1209, 4
- 4, 5, 10, 11, // 1213, 4
- 6, 7, 10, 11, // 1217, 4
- 2, 3, 6, 7, 10, 11, // 1221, 6
- 1, 2, 3, 9, 10, 11, // 1227, 6
- 5, 6, 7, 9, 10, 11, // 1233, 6
- 8, 9, 10, 11, // 1239, 4
- 4, 12, // 1243, 2
- 0, 1, 2, 3, 4, 5, 8, 12, // 1245, 8
- 8, 9, 12, // 1253, 3
- 0, 4, 5, 8, 9, 12, // 1256, 6
- 0, 1, 4, 5, 8, 9, 12, // 1262, 7
- 2, 3, 5, 6, 8, 9, 12, // 1269, 7
- 1, 5, 9, 13, // 1276, 4
- 6, 7, 9, 13, // 1280, 4
- 1, 4, 7, 10, 13, // 1284, 5
- 1, 6, 8, 11, 13, // 1289, 5
- 0, 1, 12, 13, // 1294, 4
- 4, 5, 12, 13, // 1298, 4
- 0, 1, 6, 7, 12, 13, // 1302, 6
- 0, 1, 4, 8, 12, 13, // 1308, 6
- 8, 9, 12, 13, // 1314, 4
- 4, 8, 9, 12, 13, // 1318, 5
- 4, 5, 8, 9, 12, 13, // 1323, 6
- 0, 4, 5, 8, 9, 12, 13, // 1329, 7
- 0, 1, 6, 10, 12, 13, // 1336, 6
- 3, 6, 7, 9, 10, 12, 13, // 1342, 7
- 0, 1, 10, 11, 12, 13, // 1349, 6
- 2, 4, 7, 9, 14, // 1355, 5
- 4, 5, 10, 14, // 1360, 4
- 2, 6, 10, 14, // 1364, 4
- 2, 5, 8, 11, 14, // 1368, 5
- 0, 2, 12, 14, // 1373, 4
- 8, 10, 12, 14, // 1377, 4
- 4, 6, 8, 10, 12, 14, // 1381, 6
- 13, 14, // 1387, 2
- 9, 10, 13, 14, // 1389, 4
- 5, 6, 9, 10, 13, 14, // 1393, 6
- 0, 1, 2, 12, 13, 14, // 1399, 6
- 4, 5, 6, 12, 13, 14, // 1405, 6
- 8, 9, 12, 13, 14, // 1411, 5
- 8, 9, 10, 12, 13, 14, // 1416, 6
- 7, 15, // 1422, 2
- 0, 5, 10, 15, // 1424, 4
- 0, 1, 2, 3, 6, 7, 11, 15, // 1428, 8
- 10, 11, 15, // 1436, 3
- 0, 1, 5, 6, 10, 11, 15, // 1439, 7
- 3, 6, 7, 10, 11, 15, // 1446, 6
- 12, 15, // 1452, 2
- 0, 3, 12, 15, // 1454, 4
- 4, 7, 12, 15, // 1458, 4
- 0, 3, 6, 9, 12, 15, // 1462, 6
- 0, 3, 5, 10, 12, 15, // 1468, 6
- 8, 11, 12, 15, // 1474, 4
- 5, 6, 8, 11, 12, 15, // 1478, 6
- 4, 7, 8, 11, 12, 15, // 1484, 6
- 1, 3, 13, 15, // 1490, 4
- 9, 11, 13, 15, // 1494, 4
- 5, 7, 9, 11, 13, 15, // 1498, 6
- 2, 3, 14, 15, // 1504, 4
- 2, 3, 4, 5, 14, 15, // 1508, 6
- 6, 7, 14, 15, // 1514, 4
- 2, 3, 5, 9, 14, 15, // 1518, 6
- 2, 3, 8, 9, 14, 15, // 1524, 6
- 10, 14, 15, // 1530, 3
- 0, 4, 5, 9, 10, 14, 15, // 1533, 7
- 2, 3, 7, 11, 14, 15, // 1540, 6
- 10, 11, 14, 15, // 1546, 4
- 7, 10, 11, 14, 15, // 1550, 5
- 6, 7, 10, 11, 14, 15, // 1555, 6
- 1, 2, 3, 13, 14, 15, // 1561, 6
- 5, 6, 7, 13, 14, 15, // 1567, 6
- 10, 11, 13, 14, 15, // 1573, 5
- 9, 10, 11, 13, 14, 15, // 1578, 6
- 0, 4, 8, 9, 12, 13, 14, 15, // 1584, 8
- 9, 10, 12, 13, 14, 15, // 1592, 6
- 8, 11, 12, 13, 14, 15, // 1598, 6
- 3, 7, 10, 11, 12, 13, 14, 15, // 1604, 8
- };
- static const int g_shapeRanges[][2] =
- {
- { 0, 16 },{ 16, 4 },{ 20, 3 },{ 23, 4 },{ 27, 3 },{ 30, 4 },{ 34, 8 },{ 42, 4 },{ 46, 6 },{ 52, 8 },{ 60, 5 },
- { 65, 5 },{ 70, 4 },{ 74, 4 },{ 78, 6 },{ 84, 8 },{ 92, 8 },{ 100, 8 },{ 108, 8 },{ 116, 12 },{ 128, 4 },{ 132, 8 },
- { 140, 8 },{ 148, 10 },{ 158, 6 },{ 164, 8 },{ 172, 12 },{ 184, 8 },{ 192, 5 },{ 197, 3 },{ 200, 4 },{ 204, 6 },{ 210, 8 },
- { 218, 8 },{ 226, 8 },{ 234, 8 },{ 242, 8 },{ 250, 12 },{ 262, 13 },{ 275, 8 },{ 283, 8 },{ 291, 10 },{ 301, 8 },{ 309, 8 },
- { 317, 5 },{ 322, 8 },{ 330, 8 },{ 338, 8 },{ 346, 8 },{ 354, 8 },{ 362, 8 },{ 370, 8 },{ 378, 8 },{ 386, 8 },{ 394, 8 },
- { 402, 8 },{ 410, 8 },{ 418, 4 },{ 422, 8 },{ 430, 6 },{ 436, 8 },{ 444, 10 },{ 454, 8 },{ 462, 12 },{ 474, 8 },{ 482, 8 },
- { 490, 4 },{ 494, 8 },{ 502, 6 },{ 508, 8 },{ 516, 10 },{ 526, 8 },{ 534, 12 },{ 546, 8 },{ 554, 8 },{ 562, 8 },{ 570, 8 },
- { 578, 8 },{ 586, 8 },{ 594, 8 },{ 602, 8 },{ 610, 8 },{ 618, 8 },{ 626, 8 },{ 634, 8 },{ 642, 11 },{ 653, 8 },{ 661, 8 },
- { 669, 6 },{ 675, 8 },{ 683, 8 },{ 691, 3 },{ 694, 4 },{ 698, 8 },{ 706, 8 },{ 714, 8 },{ 722, 8 },{ 730, 8 },{ 738, 10 },
- { 748, 12 },{ 760, 13 },{ 773, 11 },{ 784, 8 },{ 792, 4 },{ 796, 8 },{ 804, 10 },{ 814, 6 },{ 820, 8 },{ 828, 8 },{ 836, 12 },
- { 848, 4 },{ 852, 8 },{ 860, 8 },{ 868, 8 },{ 876, 8 },{ 884, 10 },{ 894, 12 },{ 906, 12 },{ 918, 11 },{ 929, 11 },{ 940, 8 },
- { 948, 10 },{ 958, 12 },{ 970, 8 },{ 978, 12 },{ 990, 13 },{ 1003, 12 },{ 1015, 13 },{ 1028, 12 },{ 1040, 2 },{ 1042, 2 },{ 1044, 4 },
- { 1048, 5 },{ 1053, 3 },{ 1056, 4 },{ 1060, 4 },{ 1064, 6 },{ 1070, 6 },{ 1076, 7 },{ 1083, 4 },{ 1087, 6 },{ 1093, 4 },{ 1097, 4 },
- { 1101, 5 },{ 1106, 6 },{ 1112, 7 },{ 1119, 4 },{ 1123, 2 },{ 1125, 5 },{ 1130, 4 },{ 1134, 4 },{ 1138, 6 },{ 1144, 4 },{ 1148, 4 },
- { 1152, 6 },{ 1158, 6 },{ 1164, 6 },{ 1170, 6 },{ 1176, 6 },{ 1182, 2 },{ 1184, 5 },{ 1189, 4 },{ 1193, 6 },{ 1199, 6 },{ 1205, 4 },
- { 1209, 4 },{ 1213, 4 },{ 1217, 4 },{ 1221, 6 },{ 1227, 6 },{ 1233, 6 },{ 1239, 4 },{ 1243, 2 },{ 1245, 8 },{ 1253, 3 },{ 1256, 6 },
- { 1262, 7 },{ 1269, 7 },{ 1276, 4 },{ 1280, 4 },{ 1284, 5 },{ 1289, 5 },{ 1294, 4 },{ 1298, 4 },{ 1302, 6 },{ 1308, 6 },{ 1314, 4 },
- { 1318, 5 },{ 1323, 6 },{ 1329, 7 },{ 1336, 6 },{ 1342, 7 },{ 1349, 6 },{ 1355, 5 },{ 1360, 4 },{ 1364, 4 },{ 1368, 5 },{ 1373, 4 },
- { 1377, 4 },{ 1381, 6 },{ 1387, 2 },{ 1389, 4 },{ 1393, 6 },{ 1399, 6 },{ 1405, 6 },{ 1411, 5 },{ 1416, 6 },{ 1422, 2 },{ 1424, 4 },
- { 1428, 8 },{ 1436, 3 },{ 1439, 7 },{ 1446, 6 },{ 1452, 2 },{ 1454, 4 },{ 1458, 4 },{ 1462, 6 },{ 1468, 6 },{ 1474, 4 },{ 1478, 6 },
- { 1484, 6 },{ 1490, 4 },{ 1494, 4 },{ 1498, 6 },{ 1504, 4 },{ 1508, 6 },{ 1514, 4 },{ 1518, 6 },{ 1524, 6 },{ 1530, 3 },{ 1533, 7 },
- { 1540, 6 },{ 1546, 4 },{ 1550, 5 },{ 1555, 6 },{ 1561, 6 },{ 1567, 6 },{ 1573, 5 },{ 1578, 6 },{ 1584, 8 },{ 1592, 6 },{ 1598, 6 },
- { 1604, 8 },
- };
- static const int g_shapes1[][2] =
- {
- { 0, 16 }
- };
- static const int g_shapes2[64][2] =
- {
- { 33, 96 },{ 63, 66 },{ 20, 109 },{ 22, 107 },{ 37, 92 },{ 7, 122 },{ 8, 121 },{ 23, 106 },
- { 38, 91 },{ 2, 127 },{ 9, 120 },{ 26, 103 },{ 3, 126 },{ 6, 123 },{ 1, 128 },{ 19, 110 },
- { 15, 114 },{ 124, 5 },{ 72, 57 },{ 115, 14 },{ 125, 4 },{ 70, 59 },{ 100, 29 },{ 60, 69 },
- { 116, 13 },{ 99, 30 },{ 78, 51 },{ 94, 35 },{ 104, 25 },{ 111, 18 },{ 71, 58 },{ 90, 39 },
- { 45, 84 },{ 16, 113 },{ 82, 47 },{ 95, 34 },{ 87, 42 },{ 83, 46 },{ 53, 76 },{ 48, 81 },
- { 68, 61 },{ 105, 24 },{ 98, 31 },{ 88, 41 },{ 75, 54 },{ 43, 86 },{ 52, 77 },{ 117, 12 },
- { 119, 10 },{ 118, 11 },{ 85, 44 },{ 101, 28 },{ 36, 93 },{ 55, 74 },{ 89, 40 },{ 79, 50 },
- { 56, 73 },{ 49, 80 },{ 64, 65 },{ 27, 102 },{ 32, 97 },{ 112, 17 },{ 67, 62 },{ 21, 108 },
- };
- static const int g_shapes3[64][3] =
- {
- { 148, 160, 240 },{ 132, 212, 205 },{ 136, 233, 187 },{ 175, 237, 143 },{ 6, 186, 232 },{ 33, 142, 232 },{ 131, 123, 142 },{ 131, 96, 186 },
- { 6, 171, 110 },{ 1, 18, 110 },{ 1, 146, 123 },{ 33, 195, 66 },{ 20, 51, 66 },{ 20, 178, 96 },{ 2, 177, 106 },{ 211, 4, 59 },
- { 8, 191, 91 },{ 230, 14, 29 },{ 1, 188, 234 },{ 151, 110, 168 },{ 20, 144, 238 },{ 137, 66, 206 },{ 173, 179, 232 },{ 209, 194, 186 },
- { 239, 165, 142 },{ 131, 152, 242 },{ 214, 54, 12 },{ 140, 219, 201 },{ 190, 150, 231 },{ 156, 135, 241 },{ 185, 227, 167 },{ 145, 210, 59 },
- { 138, 174, 106 },{ 189, 229, 14 },{ 176, 133, 106 },{ 78, 178, 195 },{ 111, 146, 171 },{ 216, 180, 196 },{ 217, 181, 193 },{ 184, 228, 166 },
- { 192, 225, 153 },{ 134, 141, 123 },{ 6, 222, 198 },{ 149, 183, 96 },{ 33, 226, 164 },{ 161, 215, 51 },{ 197, 221, 18 },{ 1, 223, 199 },
- { 154, 163, 110 },{ 20, 236, 169 },{ 157, 204, 66 },{ 1, 202, 220 },{ 20, 170, 235 },{ 203, 158, 66 },{ 162, 155, 110 },{ 6, 201, 218 },
- { 139, 135, 123 },{ 33, 167, 224 },{ 182, 150, 96 },{ 19, 200, 213 },{ 63, 207, 159 },{ 147, 172, 109 },{ 129, 130, 128 },{ 208, 14, 59 },
- };
- static const int g_shapeList1[] =
- {
- 0,
- };
- static const int g_shapeList2[] =
- {
- 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
- 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
- 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
- 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
- 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
- 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,
- 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
- 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
- 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
- 122, 123, 124, 125, 126, 127, 128,
- };
- static const int g_shapeList12[] =
- {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
- 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
- 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
- 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
- 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
- 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
- 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
- 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,
- 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
- 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
- 121, 122, 123, 124, 125, 126, 127, 128,
- };
- static const int g_shapeList3[] =
- {
- 1, 2, 4, 6, 8, 12, 14, 18, 19, 20, 29,
- 33, 51, 54, 59, 63, 66, 78, 91, 96, 106, 109,
- 110, 111, 123, 128, 129, 130, 131, 132, 133, 134, 135,
- 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
- 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
- 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
- 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
- 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190,
- 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201,
- 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
- 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
- 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234,
- 235, 236, 237, 238, 239, 240, 241, 242,
- };
- static const int g_shapeList3Short[] =
- {
- 1, 2, 4, 6, 18, 20, 33, 51, 59, 66, 96,
- 106, 110, 123, 131, 132, 136, 142, 143, 146, 148, 160,
- 171, 175, 177, 178, 186, 187, 195, 205, 211, 212, 232,
- 233, 237, 240,
- };
- static const int g_shapeListAll[] =
- {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
- 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
- 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
- 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
- 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
- 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
- 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
- 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,
- 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
- 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
- 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
- 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
- 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153,
- 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
- 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
- 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186,
- 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197,
- 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208,
- 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
- 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230,
- 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241,
- 242,
- };
- static const int g_numShapes1 = sizeof(g_shapeList1) / sizeof(g_shapeList1[0]);
- static const int g_numShapes2 = sizeof(g_shapeList2) / sizeof(g_shapeList2[0]);
- static const int g_numShapes12 = sizeof(g_shapeList12) / sizeof(g_shapeList12[0]);
- static const int g_numShapes3 = sizeof(g_shapeList3) / sizeof(g_shapeList3[0]);
- static const int g_numShapes3Short = sizeof(g_shapeList3Short) / sizeof(g_shapeList3Short[0]);
- static const int g_numShapesAll = sizeof(g_shapeListAll) / sizeof(g_shapeListAll[0]);
- static const int g_numFragments = sizeof(g_fragments) / sizeof(g_fragments[0]);
- }
- struct PackingVector
- {
- uint32_t m_vector[4];
- int m_offset;
- void Init()
- {
- for (int i = 0; i < 4; i++)
- m_vector[i] = 0;
- m_offset = 0;
- }
- void InitPacked(const uint32_t *v, int bits)
- {
- for (int b = 0; b < bits; b += 32)
- m_vector[b / 32] = v[b / 32];
- m_offset = bits;
- }
- inline void Pack(ParallelMath::ScalarUInt16 value, int bits)
- {
- int vOffset = m_offset >> 5;
- int bitOffset = m_offset & 0x1f;
- m_vector[vOffset] |= (static_cast<uint32_t>(value) << bitOffset) & static_cast<uint32_t>(0xffffffff);
- int overflowBits = bitOffset + bits - 32;
- if (overflowBits > 0)
- m_vector[vOffset + 1] |= (static_cast<uint32_t>(value) >> (bits - overflowBits));
- m_offset += bits;
- }
- inline void Flush(uint8_t* output)
- {
- assert(m_offset == 128);
- for (int v = 0; v < 4; v++)
- {
- uint32_t chunk = m_vector[v];
- for (int b = 0; b < 4; b++)
- output[v * 4 + b] = static_cast<uint8_t>((chunk >> (b * 8)) & 0xff);
- }
- }
- };
- struct UnpackingVector
- {
- uint32_t m_vector[4];
- void Init(const uint8_t *bytes)
- {
- for (int i = 0; i < 4; i++)
- m_vector[i] = 0;
- for (int b = 0; b < 16; b++)
- m_vector[b / 4] |= (bytes[b] << ((b % 4) * 8));
- }
- inline void UnpackStart(uint32_t *v, int bits)
- {
- for (int b = 0; b < bits; b += 32)
- v[b / 32] = m_vector[b / 32];
- int entriesShifted = bits / 32;
- int carry = bits % 32;
- for (int i = entriesShifted; i < 4; i++)
- m_vector[i - entriesShifted] = m_vector[i];
- int entriesRemaining = 4 - entriesShifted;
- if (carry)
- {
- uint32_t bitMask = (1 << carry) - 1;
- for (int i = 0; i < entriesRemaining; i++)
- {
- m_vector[i] >>= carry;
- if (i != entriesRemaining - 1)
- m_vector[i] |= (m_vector[i + 1] & bitMask) << (32 - carry);
- }
- }
- }
- inline ParallelMath::ScalarUInt16 Unpack(int bits)
- {
- uint32_t bitMask = (1 << bits) - 1;
- ParallelMath::ScalarUInt16 result = static_cast<ParallelMath::ScalarUInt16>(m_vector[0] & bitMask);
- for (int i = 0; i < 4; i++)
- {
- m_vector[i] >>= bits;
- if (i != 3)
- m_vector[i] |= (m_vector[i + 1] & bitMask) << (32 - bits);
- }
- return result;
- }
- };
- ParallelMath::Float ScaleHDRValue(const ParallelMath::Float &v, bool isSigned)
- {
- if (isSigned)
- {
- ParallelMath::Float offset = ParallelMath::Select(ParallelMath::Less(v, ParallelMath::MakeFloatZero()), ParallelMath::MakeFloat(-30.0f), ParallelMath::MakeFloat(30.0f));
- return (v * 32.0f + offset) / 31.0f;
- }
- else
- return (v * 64.0f + 30.0f) / 31.0f;
- }
- ParallelMath::SInt16 UnscaleHDRValueSigned(const ParallelMath::SInt16 &v)
- {
- #ifdef CVTT_ENABLE_ASSERTS
- for (int i = 0; i < ParallelMath::ParallelSize; i++)
- assert(ParallelMath::Extract(v, i) != -32768)
- #endif
- ParallelMath::Int16CompFlag negative = ParallelMath::Less(v, ParallelMath::MakeSInt16(0));
- ParallelMath::UInt15 absComp = ParallelMath::LosslessCast<ParallelMath::UInt15>::Cast(ParallelMath::Select(negative, ParallelMath::SInt16(ParallelMath::MakeSInt16(0) - v), v));
- ParallelMath::UInt31 multiplied = ParallelMath::XMultiply(absComp, ParallelMath::MakeUInt15(31));
- ParallelMath::UInt31 shifted = ParallelMath::RightShift(multiplied, 5);
- ParallelMath::UInt15 absCompScaled = ParallelMath::ToUInt15(shifted);
- ParallelMath::SInt16 signBits = ParallelMath::SelectOrZero(negative, ParallelMath::MakeSInt16(-32768));
- return ParallelMath::LosslessCast<ParallelMath::SInt16>::Cast(absCompScaled) | signBits;
- }
- ParallelMath::UInt15 UnscaleHDRValueUnsigned(const ParallelMath::UInt16 &v)
- {
- return ParallelMath::ToUInt15(ParallelMath::RightShift(ParallelMath::XMultiply(v, ParallelMath::MakeUInt15(31)), 6));
- }
- void UnscaleHDREndpoints(const ParallelMath::AInt16 inEP[2][3], ParallelMath::AInt16 outEP[2][3], bool isSigned)
- {
- for (int epi = 0; epi < 2; epi++)
- {
- for (int ch = 0; ch < 3; ch++)
- {
- if (isSigned)
- outEP[epi][ch] = ParallelMath::LosslessCast<ParallelMath::AInt16>::Cast(UnscaleHDRValueSigned(ParallelMath::LosslessCast<ParallelMath::SInt16>::Cast(inEP[epi][ch])));
- else
- outEP[epi][ch] = ParallelMath::LosslessCast<ParallelMath::AInt16>::Cast(UnscaleHDRValueUnsigned(ParallelMath::LosslessCast<ParallelMath::UInt16>::Cast(inEP[epi][ch])));
- }
- }
- }
- struct SinglePlaneTemporaries
- {
- UnfinishedEndpoints<3> unfinishedRGB[BC7Data::g_numShapesAll];
- UnfinishedEndpoints<4> unfinishedRGBA[BC7Data::g_numShapes12];
- ParallelMath::UInt15 fragmentBestIndexes[BC7Data::g_numFragments];
- ParallelMath::UInt15 shapeBestEP[BC7Data::g_numShapesAll][2][4];
- ParallelMath::Float shapeBestError[BC7Data::g_numShapesAll];
- };
- }
- }
- void cvtt::Internal::BC7Computer::TweakAlpha(const MUInt15 original[2], int tweak, int range, MUInt15 result[2])
- {
- ParallelMath::RoundTowardNearestForScope roundingMode;
- float tf[2];
- Util::ComputeTweakFactors(tweak, range, tf);
- MFloat base = ParallelMath::ToFloat(original[0]);
- MFloat offs = ParallelMath::ToFloat(original[1]) - base;
- result[0] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(base + offs * tf[0], 0.0f, 255.0f), &roundingMode);
- result[1] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(base + offs * tf[1], 0.0f, 255.0f), &roundingMode);
- }
- void cvtt::Internal::BC7Computer::Quantize(MUInt15* color, int bits, int channels)
- {
- for (int ch = 0; ch < channels; ch++)
- color[ch] = ParallelMath::RightShift(((color[ch] << bits) - color[ch]) + ParallelMath::MakeUInt15(127 + (1 << (7 - bits))), 8);
- }
- void cvtt::Internal::BC7Computer::QuantizeP(MUInt15* color, int bits, uint16_t p, int channels)
- {
- int16_t addend;
- if (p)
- addend = ((1 << (8 - bits)) - 1);
- else
- addend = 255;
- for (int ch = 0; ch < channels; ch++)
- {
- MUInt16 ch16 = ParallelMath::LosslessCast<MUInt16>::Cast(color[ch]);
- ch16 = ParallelMath::RightShift((ch16 << (bits + 1)) - ch16 + addend, 9);
- ch16 = (ch16 << 1) | ParallelMath::MakeUInt16(p);
- color[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ch16);
- }
- }
- void cvtt::Internal::BC7Computer::Unquantize(MUInt15* color, int bits, int channels)
- {
- for (int ch = 0; ch < channels; ch++)
- {
- MUInt15 clr = color[ch];
- clr = clr << (8 - bits);
- color[ch] = clr | ParallelMath::RightShift(clr, bits);
- }
- }
- void cvtt::Internal::BC7Computer::CompressEndpoints0(MUInt15 ep[2][4], uint16_t p[2])
- {
- for (int j = 0; j < 2; j++)
- {
- QuantizeP(ep[j], 4, p[j], 3);
- Unquantize(ep[j], 5, 3);
- ep[j][3] = ParallelMath::MakeUInt15(255);
- }
- }
- void cvtt::Internal::BC7Computer::CompressEndpoints1(MUInt15 ep[2][4], uint16_t p)
- {
- for (int j = 0; j < 2; j++)
- {
- QuantizeP(ep[j], 6, p, 3);
- Unquantize(ep[j], 7, 3);
- ep[j][3] = ParallelMath::MakeUInt15(255);
- }
- }
- void cvtt::Internal::BC7Computer::CompressEndpoints2(MUInt15 ep[2][4])
- {
- for (int j = 0; j < 2; j++)
- {
- Quantize(ep[j], 5, 3);
- Unquantize(ep[j], 5, 3);
- ep[j][3] = ParallelMath::MakeUInt15(255);
- }
- }
- void cvtt::Internal::BC7Computer::CompressEndpoints3(MUInt15 ep[2][4], uint16_t p[2])
- {
- for (int j = 0; j < 2; j++)
- {
- QuantizeP(ep[j], 7, p[j], 3);
- ep[j][3] = ParallelMath::MakeUInt15(255);
- }
- }
- void cvtt::Internal::BC7Computer::CompressEndpoints4(MUInt15 epRGB[2][3], MUInt15 epA[2])
- {
- for (int j = 0; j < 2; j++)
- {
- Quantize(epRGB[j], 5, 3);
- Unquantize(epRGB[j], 5, 3);
- Quantize(epA + j, 6, 1);
- Unquantize(epA + j, 6, 1);
- }
- }
- void cvtt::Internal::BC7Computer::CompressEndpoints5(MUInt15 epRGB[2][3], MUInt15 epA[2])
- {
- for (int j = 0; j < 2; j++)
- {
- Quantize(epRGB[j], 7, 3);
- Unquantize(epRGB[j], 7, 3);
- }
- // Alpha is full precision
- (void)epA;
- }
- void cvtt::Internal::BC7Computer::CompressEndpoints6(MUInt15 ep[2][4], uint16_t p[2])
- {
- for (int j = 0; j < 2; j++)
- QuantizeP(ep[j], 7, p[j], 4);
- }
- void cvtt::Internal::BC7Computer::CompressEndpoints7(MUInt15 ep[2][4], uint16_t p[2])
- {
- for (int j = 0; j < 2; j++)
- {
- QuantizeP(ep[j], 5, p[j], 4);
- Unquantize(ep[j], 6, 4);
- }
- }
- void cvtt::Internal::BC7Computer::TrySingleColorRGBAMultiTable(uint32_t flags, const MUInt15 pixels[16][4], const MFloat average[4], int numRealChannels, const uint8_t *fragmentStart, int shapeLength, const MFloat &staticAlphaError, const ParallelMath::Int16CompFlag punchThroughInvalid[4], MFloat& shapeBestError, MUInt15 shapeBestEP[2][4], MUInt15 *fragmentBestIndexes, const float *channelWeightsSq, const cvtt::Tables::BC7SC::Table*const* tables, int numTables, const ParallelMath::RoundTowardNearestForScope *rtn)
- {
- MFloat bestAverageError = ParallelMath::MakeFloat(FLT_MAX);
- MUInt15 intAverage[4];
- for (int ch = 0; ch < 4; ch++)
- intAverage[ch] = ParallelMath::RoundAndConvertToU15(average[ch], rtn);
- MUInt15 eps[2][4];
- MUInt15 reconstructed[4];
- MUInt15 index = ParallelMath::MakeUInt15(0);
- for (int epi = 0; epi < 2; epi++)
- {
- for (int ch = 0; ch < 3; ch++)
- eps[epi][ch] = ParallelMath::MakeUInt15(0);
- eps[epi][3] = ParallelMath::MakeUInt15(255);
- }
- for (int ch = 0; ch < 3; ch++)
- reconstructed[ch] = ParallelMath::MakeUInt15(0);
- reconstructed[3] = ParallelMath::MakeUInt15(255);
- // Depending on the target index and parity bits, there are multiple valid solid colors.
- // We want to find the one closest to the actual average.
- MFloat epsAverageDiff = ParallelMath::MakeFloat(FLT_MAX);
- for (int t = 0; t < numTables; t++)
- {
- const cvtt::Tables::BC7SC::Table& table = *(tables[t]);
- ParallelMath::Int16CompFlag pti = punchThroughInvalid[table.m_pBits];
- MUInt15 candidateReconstructed[4];
- MUInt15 candidateEPs[2][4];
- for (int i = 0; i < ParallelMath::ParallelSize; i++)
- {
- for (int ch = 0; ch < numRealChannels; ch++)
- {
- ParallelMath::ScalarUInt16 avgValue = ParallelMath::Extract(intAverage[ch], i);
- assert(avgValue >= 0 && avgValue <= 255);
- const cvtt::Tables::BC7SC::TableEntry &entry = table.m_entries[avgValue];
- ParallelMath::PutUInt15(candidateEPs[0][ch], i, entry.m_min);
- ParallelMath::PutUInt15(candidateEPs[1][ch], i, entry.m_max);
- ParallelMath::PutUInt15(candidateReconstructed[ch], i, entry.m_actualColor);
- }
- }
- MFloat avgError = ParallelMath::MakeFloatZero();
- for (int ch = 0; ch < numRealChannels; ch++)
- {
- MFloat delta = ParallelMath::ToFloat(candidateReconstructed[ch]) - average[ch];
- avgError = avgError + delta * delta * channelWeightsSq[ch];
- }
- ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(avgError, bestAverageError));
- better = ParallelMath::AndNot(pti, better); // Mask out punch-through invalidations
- if (ParallelMath::AnySet(better))
- {
- ParallelMath::ConditionalSet(bestAverageError, ParallelMath::Int16FlagToFloat(better), avgError);
- MUInt15 candidateIndex = ParallelMath::MakeUInt15(table.m_index);
- ParallelMath::ConditionalSet(index, better, candidateIndex);
- for (int ch = 0; ch < numRealChannels; ch++)
- ParallelMath::ConditionalSet(reconstructed[ch], better, candidateReconstructed[ch]);
- for (int epi = 0; epi < 2; epi++)
- for (int ch = 0; ch < numRealChannels; ch++)
- ParallelMath::ConditionalSet(eps[epi][ch], better, candidateEPs[epi][ch]);
- }
- }
- AggregatedError<4> aggError;
- for (int pxi = 0; pxi < shapeLength; pxi++)
- {
- int px = fragmentStart[pxi];
- BCCommon::ComputeErrorLDR<4>(flags, reconstructed, pixels[px], numRealChannels, aggError);
- }
- MFloat error = aggError.Finalize(flags, channelWeightsSq) + staticAlphaError;
- ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(error, shapeBestError));
- if (ParallelMath::AnySet(better))
- {
- shapeBestError = ParallelMath::Min(shapeBestError, error);
- for (int epi = 0; epi < 2; epi++)
- {
- for (int ch = 0; ch < numRealChannels; ch++)
- ParallelMath::ConditionalSet(shapeBestEP[epi][ch], better, eps[epi][ch]);
- }
- for (int pxi = 0; pxi < shapeLength; pxi++)
- ParallelMath::ConditionalSet(fragmentBestIndexes[pxi], better, index);
- }
- }
- void cvtt::Internal::BC7Computer::TrySinglePlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn)
- {
- if (numRefineRounds < 1)
- numRefineRounds = 1;
- float channelWeightsSq[4];
- for (int ch = 0; ch < 4; ch++)
- channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch];
- SinglePlaneTemporaries temps;
- MUInt15 maxAlpha = ParallelMath::MakeUInt15(0);
- MUInt15 minAlpha = ParallelMath::MakeUInt15(255);
- ParallelMath::Int16CompFlag isPunchThrough = ParallelMath::MakeBoolInt16(true);
- for (int px = 0; px < 16; px++)
- {
- MUInt15 a = pixels[px][3];
- maxAlpha = ParallelMath::Max(maxAlpha, a);
- minAlpha = ParallelMath::Min(minAlpha, a);
- isPunchThrough = (isPunchThrough & (ParallelMath::Equal(a, ParallelMath::MakeUInt15(0)) | ParallelMath::Equal(a, ParallelMath::MakeUInt15(255))));
- }
- ParallelMath::Int16CompFlag blockHasNonMaxAlpha = ParallelMath::Less(minAlpha, ParallelMath::MakeUInt15(255));
- ParallelMath::Int16CompFlag blockHasNonZeroAlpha = ParallelMath::Less(ParallelMath::MakeUInt15(0), maxAlpha);
- bool anyBlockHasAlpha = ParallelMath::AnySet(blockHasNonMaxAlpha);
- // Try RGB modes if any block has a min alpha 251 or higher
- bool allowRGBModes = ParallelMath::AnySet(ParallelMath::Less(ParallelMath::MakeUInt15(250), minAlpha));
- // Try mode 7 if any block has alpha.
- // Mode 7 is almost never selected for RGB blocks because mode 4 has very accurate 7.7.7.1 endpoints
- // and its parity bit doesn't affect alpha, meaning mode 7 can only be better in extremely specific
- // situations, and only by at most 1 unit of error per pixel.
- bool allowMode7 = anyBlockHasAlpha || (encodingPlan.mode7RGBPartitionEnabled != 0);
- MFloat preWeightedPixels[16][4];
- BCCommon::PreWeightPixelsLDR<4>(preWeightedPixels, pixels, channelWeights);
- // Get initial RGB endpoints
- if (allowRGBModes)
- {
- const uint8_t *shapeList = encodingPlan.rgbShapeList;
- int numShapesToEvaluate = encodingPlan.rgbNumShapesToEvaluate;
- for (int shapeIter = 0; shapeIter < numShapesToEvaluate; shapeIter++)
- {
- int shape = shapeList[shapeIter];
- int shapeStart = BC7Data::g_shapeRanges[shape][0];
- int shapeSize = BC7Data::g_shapeRanges[shape][1];
- EndpointSelector<3, 8> epSelector;
- for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++)
- {
- for (int spx = 0; spx < shapeSize; spx++)
- {
- int px = BC7Data::g_fragments[shapeStart + spx];
- epSelector.ContributePass(preWeightedPixels[px], epPass, ParallelMath::MakeFloat(1.0f));
- }
- epSelector.FinishPass(epPass);
- }
- temps.unfinishedRGB[shape] = epSelector.GetEndpoints(channelWeights);
- }
- }
- // Get initial RGBA endpoints
- {
- const uint8_t *shapeList = encodingPlan.rgbaShapeList;
- int numShapesToEvaluate = encodingPlan.rgbaNumShapesToEvaluate;
- for (int shapeIter = 0; shapeIter < numShapesToEvaluate; shapeIter++)
- {
- int shape = shapeList[shapeIter];
- if (anyBlockHasAlpha || !allowRGBModes)
- {
- int shapeStart = BC7Data::g_shapeRanges[shape][0];
- int shapeSize = BC7Data::g_shapeRanges[shape][1];
- EndpointSelector<4, 8> epSelector;
- for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++)
- {
- for (int spx = 0; spx < shapeSize; spx++)
- {
- int px = BC7Data::g_fragments[shapeStart + spx];
- epSelector.ContributePass(preWeightedPixels[px], epPass, ParallelMath::MakeFloat(1.0f));
- }
- epSelector.FinishPass(epPass);
- }
- temps.unfinishedRGBA[shape] = epSelector.GetEndpoints(channelWeights);
- }
- else
- {
- temps.unfinishedRGBA[shape] = temps.unfinishedRGB[shape].ExpandTo<4>(255);
- }
- }
- }
- for (uint16_t mode = 0; mode <= 7; mode++)
- {
- if (mode == 4 || mode == 5)
- continue;
- if (mode < 4 && !allowRGBModes)
- continue;
- if (mode == 7 && !allowMode7)
- continue;
- uint64_t partitionEnabledBits = 0;
- switch (mode)
- {
- case 0:
- partitionEnabledBits = encodingPlan.mode0PartitionEnabled;
- break;
- case 1:
- partitionEnabledBits = encodingPlan.mode1PartitionEnabled;
- break;
- case 2:
- partitionEnabledBits = encodingPlan.mode2PartitionEnabled;
- break;
- case 3:
- partitionEnabledBits = encodingPlan.mode3PartitionEnabled;
- break;
- case 6:
- partitionEnabledBits = encodingPlan.mode6Enabled ? 1 : 0;
- break;
- case 7:
- if (anyBlockHasAlpha)
- partitionEnabledBits = encodingPlan.mode7RGBAPartitionEnabled;
- else
- partitionEnabledBits = encodingPlan.mode7RGBPartitionEnabled;
- break;
- default:
- break;
- }
- bool isRGB = (mode < 4);
- unsigned int numPartitions = 1 << BC7Data::g_modes[mode].m_partitionBits;
- int numSubsets = BC7Data::g_modes[mode].m_numSubsets;
- int indexPrec = BC7Data::g_modes[mode].m_indexBits;
- int parityBitMax = 1;
- if (BC7Data::g_modes[mode].m_pBitMode == BC7Data::PBitMode_PerEndpoint)
- parityBitMax = 4;
- else if (BC7Data::g_modes[mode].m_pBitMode == BC7Data::PBitMode_PerSubset)
- parityBitMax = 2;
- int numRealChannels = isRGB ? 3 : 4;
- int numShapes;
- const int *shapeList;
- if (numSubsets == 1)
- {
- numShapes = BC7Data::g_numShapes1;
- shapeList = BC7Data::g_shapeList1;
- }
- else if (numSubsets == 2)
- {
- numShapes = BC7Data::g_numShapes2;
- shapeList = BC7Data::g_shapeList2;
- }
- else
- {
- assert(numSubsets == 3);
- if (numPartitions == 16)
- {
- numShapes = BC7Data::g_numShapes3Short;
- shapeList = BC7Data::g_shapeList3Short;
- }
- else
- {
- assert(numPartitions == 64);
- numShapes = BC7Data::g_numShapes3;
- shapeList = BC7Data::g_shapeList3;
- }
- }
- for (int slot = 0; slot < BC7Data::g_numShapesAll; slot++)
- temps.shapeBestError[slot] = ParallelMath::MakeFloat(FLT_MAX);
- for (int shapeIter = 0; shapeIter < numShapes; shapeIter++)
- {
- int shape = shapeList[shapeIter];
- int numTweakRounds = 0;
- if (isRGB)
- numTweakRounds = encodingPlan.seedPointsForShapeRGB[shape];
- else
- numTweakRounds = encodingPlan.seedPointsForShapeRGBA[shape];
- if (numTweakRounds == 0)
- continue;
- if (numTweakRounds > MaxTweakRounds)
- numTweakRounds = MaxTweakRounds;
- int shapeStart = BC7Data::g_shapeRanges[shape][0];
- int shapeLength = BC7Data::g_shapeRanges[shape][1];
- AggregatedError<1> alphaAggError;
- if (isRGB && anyBlockHasAlpha)
- {
- MUInt15 filledAlpha[1] = { ParallelMath::MakeUInt15(255) };
- for (int pxi = 0; pxi < shapeLength; pxi++)
- {
- int px = BC7Data::g_fragments[shapeStart + pxi];
- MUInt15 original[1] = { pixels[px][3] };
- BCCommon::ComputeErrorLDR<1>(flags, filledAlpha, original, alphaAggError);
- }
- }
- float alphaWeightsSq[1] = { channelWeightsSq[3] };
- MFloat staticAlphaError = alphaAggError.Finalize(flags, alphaWeightsSq);
- MUInt15 tweakBaseEP[MaxTweakRounds][2][4];
- for (int tweak = 0; tweak < numTweakRounds; tweak++)
- {
- if (isRGB)
- {
- temps.unfinishedRGB[shape].FinishLDR(tweak, 1 << indexPrec, tweakBaseEP[tweak][0], tweakBaseEP[tweak][1]);
- tweakBaseEP[tweak][0][3] = tweakBaseEP[tweak][1][3] = ParallelMath::MakeUInt15(255);
- }
- else
- {
- temps.unfinishedRGBA[shape].FinishLDR(tweak, 1 << indexPrec, tweakBaseEP[tweak][0], tweakBaseEP[tweak][1]);
- }
- }
- ParallelMath::Int16CompFlag punchThroughInvalid[4];
- for (int pIter = 0; pIter < parityBitMax; pIter++)
- {
- punchThroughInvalid[pIter] = ParallelMath::MakeBoolInt16(false);
- if ((flags & Flags::BC7_RespectPunchThrough) && (mode == 6 || mode == 7))
- {
- // Modes 6 and 7 have parity bits that affect alpha
- if (pIter == 0)
- punchThroughInvalid[pIter] = (isPunchThrough & blockHasNonZeroAlpha);
- else if (pIter == parityBitMax - 1)
- punchThroughInvalid[pIter] = (isPunchThrough & blockHasNonMaxAlpha);
- else
- punchThroughInvalid[pIter] = isPunchThrough;
- }
- }
- for (int pIter = 0; pIter < parityBitMax; pIter++)
- {
- if (ParallelMath::AllSet(punchThroughInvalid[pIter]))
- continue;
- bool needPunchThroughCheck = ParallelMath::AnySet(punchThroughInvalid[pIter]);
- for (int tweak = 0; tweak < numTweakRounds; tweak++)
- {
- uint16_t p[2];
- p[0] = (pIter & 1);
- p[1] = ((pIter >> 1) & 1);
- MUInt15 ep[2][4];
- for (int epi = 0; epi < 2; epi++)
- for (int ch = 0; ch < 4; ch++)
- ep[epi][ch] = tweakBaseEP[tweak][epi][ch];
- for (int refine = 0; refine < numRefineRounds; refine++)
- {
- switch (mode)
- {
- case 0:
- CompressEndpoints0(ep, p);
- break;
- case 1:
- CompressEndpoints1(ep, p[0]);
- break;
- case 2:
- CompressEndpoints2(ep);
- break;
- case 3:
- CompressEndpoints3(ep, p);
- break;
- case 6:
- CompressEndpoints6(ep, p);
- break;
- case 7:
- CompressEndpoints7(ep, p);
- break;
- default:
- assert(false);
- break;
- };
- MFloat shapeError = ParallelMath::MakeFloatZero();
- IndexSelector<4> indexSelector;
- indexSelector.Init<false>(channelWeights, ep, 1 << indexPrec);
- EndpointRefiner<4> epRefiner;
- epRefiner.Init(1 << indexPrec, channelWeights);
- MUInt15 indexes[16];
- AggregatedError<4> aggError;
- for (int pxi = 0; pxi < shapeLength; pxi++)
- {
- int px = BC7Data::g_fragments[shapeStart + pxi];
- MUInt15 index;
- MUInt15 reconstructed[4];
- index = indexSelector.SelectIndexLDR(floatPixels[px], rtn);
- indexSelector.ReconstructLDR_BC7(index, reconstructed, numRealChannels);
- if (flags & cvtt::Flags::BC7_FastIndexing)
- BCCommon::ComputeErrorLDR<4>(flags, reconstructed, pixels[px], numRealChannels, aggError);
- else
- {
- MFloat error = BCCommon::ComputeErrorLDRSimple<4>(flags, reconstructed, pixels[px], numRealChannels, channelWeightsSq);
- MUInt15 altIndexes[2];
- altIndexes[0] = ParallelMath::Max(index, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1);
- altIndexes[1] = ParallelMath::Min(index + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << indexPrec) - 1)));
- for (int ii = 0; ii < 2; ii++)
- {
- indexSelector.ReconstructLDR_BC7(altIndexes[ii], reconstructed, numRealChannels);
- MFloat altError = BCCommon::ComputeErrorLDRSimple<4>(flags, reconstructed, pixels[px], numRealChannels, channelWeightsSq);
- ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altError, error));
- error = ParallelMath::Min(error, altError);
- ParallelMath::ConditionalSet(index, better, altIndexes[ii]);
- }
- shapeError = shapeError + error;
- }
- if (refine != numRefineRounds - 1)
- epRefiner.ContributeUnweightedPW(preWeightedPixels[px], index, numRealChannels);
- indexes[pxi] = index;
- }
- if (flags & cvtt::Flags::BC7_FastIndexing)
- shapeError = aggError.Finalize(flags, channelWeightsSq);
- if (isRGB)
- shapeError = shapeError + staticAlphaError;
- ParallelMath::FloatCompFlag shapeErrorBetter;
- ParallelMath::Int16CompFlag shapeErrorBetter16;
- shapeErrorBetter = ParallelMath::Less(shapeError, temps.shapeBestError[shape]);
- shapeErrorBetter16 = ParallelMath::FloatFlagToInt16(shapeErrorBetter);
- if (ParallelMath::AnySet(shapeErrorBetter16))
- {
- bool punchThroughOK = true;
- if (needPunchThroughCheck)
- {
- shapeErrorBetter16 = ParallelMath::AndNot(punchThroughInvalid[pIter], shapeErrorBetter16);
- shapeErrorBetter = ParallelMath::Int16FlagToFloat(shapeErrorBetter16);
- if (!ParallelMath::AnySet(shapeErrorBetter16))
- punchThroughOK = false;
- }
- if (punchThroughOK)
- {
- ParallelMath::ConditionalSet(temps.shapeBestError[shape], shapeErrorBetter, shapeError);
- for (int epi = 0; epi < 2; epi++)
- for (int ch = 0; ch < numRealChannels; ch++)
- ParallelMath::ConditionalSet(temps.shapeBestEP[shape][epi][ch], shapeErrorBetter16, ep[epi][ch]);
- for (int pxi = 0; pxi < shapeLength; pxi++)
- ParallelMath::ConditionalSet(temps.fragmentBestIndexes[shapeStart + pxi], shapeErrorBetter16, indexes[pxi]);
- }
- }
- if (refine != numRefineRounds - 1)
- epRefiner.GetRefinedEndpointsLDR(ep, numRealChannels, rtn);
- } // refine
- } // tweak
- } // p
- if (flags & cvtt::Flags::BC7_TrySingleColor)
- {
- MUInt15 total[4];
- for (int ch = 0; ch < 4; ch++)
- total[ch] = ParallelMath::MakeUInt15(0);
- for (int pxi = 0; pxi < shapeLength; pxi++)
- {
- int px = BC7Data::g_fragments[shapeStart + pxi];
- for (int ch = 0; ch < 4; ch++)
- total[ch] = total[ch] + pixels[pxi][ch];
- }
- MFloat rcpShapeLength = ParallelMath::MakeFloat(1.0f / static_cast<float>(shapeLength));
- MFloat average[4];
- for (int ch = 0; ch < 4; ch++)
- average[ch] = ParallelMath::ToFloat(total[ch]) * rcpShapeLength;
- const uint8_t *fragment = BC7Data::g_fragments + shapeStart;
- MFloat &shapeBestError = temps.shapeBestError[shape];
- MUInt15 (&shapeBestEP)[2][4] = temps.shapeBestEP[shape];
- MUInt15 *fragmentBestIndexes = temps.fragmentBestIndexes + shapeStart;
- const cvtt::Tables::BC7SC::Table **scTables = NULL;
- int numSCTables = 0;
- const cvtt::Tables::BC7SC::Table *tables0[] =
- {
- &cvtt::Tables::BC7SC::g_mode0_p00_i1,
- &cvtt::Tables::BC7SC::g_mode0_p00_i2,
- &cvtt::Tables::BC7SC::g_mode0_p00_i3,
- &cvtt::Tables::BC7SC::g_mode0_p01_i1,
- &cvtt::Tables::BC7SC::g_mode0_p01_i2,
- &cvtt::Tables::BC7SC::g_mode0_p01_i3,
- &cvtt::Tables::BC7SC::g_mode0_p10_i1,
- &cvtt::Tables::BC7SC::g_mode0_p10_i2,
- &cvtt::Tables::BC7SC::g_mode0_p10_i3,
- &cvtt::Tables::BC7SC::g_mode0_p11_i1,
- &cvtt::Tables::BC7SC::g_mode0_p11_i2,
- &cvtt::Tables::BC7SC::g_mode0_p11_i3,
- };
- const cvtt::Tables::BC7SC::Table *tables1[] =
- {
- &cvtt::Tables::BC7SC::g_mode1_p0_i1,
- &cvtt::Tables::BC7SC::g_mode1_p0_i2,
- &cvtt::Tables::BC7SC::g_mode1_p0_i3,
- &cvtt::Tables::BC7SC::g_mode1_p1_i1,
- &cvtt::Tables::BC7SC::g_mode1_p1_i2,
- &cvtt::Tables::BC7SC::g_mode1_p1_i3,
- };
- const cvtt::Tables::BC7SC::Table *tables2[] =
- {
- &cvtt::Tables::BC7SC::g_mode2,
- };
- const cvtt::Tables::BC7SC::Table *tables3[] =
- {
- &cvtt::Tables::BC7SC::g_mode3_p0,
- &cvtt::Tables::BC7SC::g_mode3_p1,
- };
- const cvtt::Tables::BC7SC::Table *tables6[] =
- {
- &cvtt::Tables::BC7SC::g_mode6_p0_i1,
- &cvtt::Tables::BC7SC::g_mode6_p0_i2,
- &cvtt::Tables::BC7SC::g_mode6_p0_i3,
- &cvtt::Tables::BC7SC::g_mode6_p0_i4,
- &cvtt::Tables::BC7SC::g_mode6_p0_i5,
- &cvtt::Tables::BC7SC::g_mode6_p0_i6,
- &cvtt::Tables::BC7SC::g_mode6_p0_i7,
- &cvtt::Tables::BC7SC::g_mode6_p1_i1,
- &cvtt::Tables::BC7SC::g_mode6_p1_i2,
- &cvtt::Tables::BC7SC::g_mode6_p1_i3,
- &cvtt::Tables::BC7SC::g_mode6_p1_i4,
- &cvtt::Tables::BC7SC::g_mode6_p1_i5,
- &cvtt::Tables::BC7SC::g_mode6_p1_i6,
- &cvtt::Tables::BC7SC::g_mode6_p1_i7,
- };
- const cvtt::Tables::BC7SC::Table *tables7[] =
- {
- &cvtt::Tables::BC7SC::g_mode7_p00,
- &cvtt::Tables::BC7SC::g_mode7_p01,
- &cvtt::Tables::BC7SC::g_mode7_p10,
- &cvtt::Tables::BC7SC::g_mode7_p11,
- };
- switch (mode)
- {
- case 0:
- {
- scTables = tables0;
- numSCTables = sizeof(tables0) / sizeof(tables0[0]);
- }
- break;
- case 1:
- {
- scTables = tables1;
- numSCTables = sizeof(tables1) / sizeof(tables1[0]);
- }
- break;
- case 2:
- {
- scTables = tables2;
- numSCTables = sizeof(tables2) / sizeof(tables2[0]);
- }
- break;
- case 3:
- {
- scTables = tables3;
- numSCTables = sizeof(tables3) / sizeof(tables3[0]);
- }
- break;
- case 6:
- {
- scTables = tables6;
- numSCTables = sizeof(tables6) / sizeof(tables6[0]);
- }
- break;
- case 7:
- {
- scTables = tables7;
- numSCTables = sizeof(tables7) / sizeof(tables7[0]);
- }
- break;
- default:
- assert(false);
- break;
- }
- TrySingleColorRGBAMultiTable(flags, pixels, average, numRealChannels, fragment, shapeLength, staticAlphaError, punchThroughInvalid, shapeBestError, shapeBestEP, fragmentBestIndexes, channelWeightsSq, scTables, numSCTables, rtn);
- }
- } // shapeIter
- uint64_t partitionsEnabledBits = 0xffffffffffffffffULL;
- switch (mode)
- {
- case 0:
- partitionsEnabledBits = encodingPlan.mode0PartitionEnabled;
- break;
- case 1:
- partitionsEnabledBits = encodingPlan.mode1PartitionEnabled;
- break;
- case 2:
- partitionsEnabledBits = encodingPlan.mode2PartitionEnabled;
- break;
- case 3:
- partitionsEnabledBits = encodingPlan.mode3PartitionEnabled;
- break;
- case 6:
- partitionsEnabledBits = encodingPlan.mode6Enabled ? 1 : 0;
- break;
- case 7:
- if (anyBlockHasAlpha)
- partitionEnabledBits = encodingPlan.mode7RGBAPartitionEnabled;
- else
- partitionEnabledBits = encodingPlan.mode7RGBPartitionEnabled;
- break;
- default:
- break;
- };
- for (uint16_t partition = 0; partition < numPartitions; partition++)
- {
- if (((partitionsEnabledBits >> partition) & 1) == 0)
- continue;
- const int *partitionShapes;
- if (numSubsets == 1)
- partitionShapes = BC7Data::g_shapes1[partition];
- else if (numSubsets == 2)
- partitionShapes = BC7Data::g_shapes2[partition];
- else
- {
- assert(numSubsets == 3);
- partitionShapes = BC7Data::g_shapes3[partition];
- }
- MFloat totalError = ParallelMath::MakeFloatZero();
- for (int subset = 0; subset < numSubsets; subset++)
- totalError = totalError + temps.shapeBestError[partitionShapes[subset]];
- ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(totalError, work.m_error);
- ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter);
- if (mode == 7 && anyBlockHasAlpha)
- {
- // Some lanes could be better, but we filter them out to ensure consistency with scalar
- bool isRGBAllowedForThisPartition = (((encodingPlan.mode7RGBPartitionEnabled >> partition) & 1) != 0);
- if (!isRGBAllowedForThisPartition)
- {
- errorBetter16 = (errorBetter16 & blockHasNonMaxAlpha);
- errorBetter = ParallelMath::Int16FlagToFloat(errorBetter16);
- }
- }
- if (ParallelMath::AnySet(errorBetter16))
- {
- for (int subset = 0; subset < numSubsets; subset++)
- {
- int shape = partitionShapes[subset];
- int shapeStart = BC7Data::g_shapeRanges[shape][0];
- int shapeLength = BC7Data::g_shapeRanges[shape][1];
- for (int epi = 0; epi < 2; epi++)
- for (int ch = 0; ch < 4; ch++)
- ParallelMath::ConditionalSet(work.m_ep[subset][epi][ch], errorBetter16, temps.shapeBestEP[shape][epi][ch]);
- for (int pxi = 0; pxi < shapeLength; pxi++)
- {
- int px = BC7Data::g_fragments[shapeStart + pxi];
- ParallelMath::ConditionalSet(work.m_indexes[px], errorBetter16, temps.fragmentBestIndexes[shapeStart + pxi]);
- }
- }
- ParallelMath::ConditionalSet(work.m_error, errorBetter, totalError);
- ParallelMath::ConditionalSet(work.m_mode, errorBetter16, ParallelMath::MakeUInt15(mode));
- ParallelMath::ConditionalSet(work.m_u.m_partition, errorBetter16, ParallelMath::MakeUInt15(partition));
- }
- }
- }
- }
- void cvtt::Internal::BC7Computer::TryDualPlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn)
- {
- // TODO: These error calculations are not optimal for weight-by-alpha, but this routine needs to be mostly rewritten for that.
- // The alpha/color solutions are co-dependent in that case, but a good way to solve it would probably be to
- // solve the alpha channel first, then solve the RGB channels, which in turn breaks down into two cases:
- // - Separate alpha channel, then weighted RGB
- // - Alpha+2 other channels, then the independent channel
- if (numRefineRounds < 1)
- numRefineRounds = 1;
- float channelWeightsSq[4];
- for (int ch = 0; ch < 4; ch++)
- channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch];
- for (uint16_t mode = 4; mode <= 5; mode++)
- {
- int numSP[2] = { 0, 0 };
- for (uint16_t rotation = 0; rotation < 4; rotation++)
- {
- if (mode == 4)
- {
- numSP[0] = encodingPlan.mode4SP[rotation][0];
- numSP[1] = encodingPlan.mode4SP[rotation][1];
- }
- else
- numSP[0] = numSP[1] = encodingPlan.mode5SP[rotation];
- if (numSP[0] == 0 && numSP[1] == 0)
- continue;
- int alphaChannel = (rotation + 3) & 3;
- int redChannel = (rotation == 1) ? 3 : 0;
- int greenChannel = (rotation == 2) ? 3 : 1;
- int blueChannel = (rotation == 3) ? 3 : 2;
- MUInt15 rotatedRGB[16][3];
- MFloat floatRotatedRGB[16][3];
- for (int px = 0; px < 16; px++)
- {
- rotatedRGB[px][0] = pixels[px][redChannel];
- rotatedRGB[px][1] = pixels[px][greenChannel];
- rotatedRGB[px][2] = pixels[px][blueChannel];
- for (int ch = 0; ch < 3; ch++)
- floatRotatedRGB[px][ch] = ParallelMath::ToFloat(rotatedRGB[px][ch]);
- }
- uint16_t maxIndexSelector = (mode == 4) ? 2 : 1;
- float rotatedRGBWeights[3] = { channelWeights[redChannel], channelWeights[greenChannel], channelWeights[blueChannel] };
- float rotatedRGBWeightsSq[3] = { channelWeightsSq[redChannel], channelWeightsSq[greenChannel], channelWeightsSq[blueChannel] };
- float rotatedAlphaWeight[1] = { channelWeights[alphaChannel] };
- float rotatedAlphaWeightSq[1] = { channelWeightsSq[alphaChannel] };
- float uniformWeight[1] = { 1.0f }; // Since the alpha channel is independent, there's no need to bother with weights when doing refinement or selection, only error
- MFloat preWeightedRotatedRGB[16][3];
- BCCommon::PreWeightPixelsLDR<3>(preWeightedRotatedRGB, rotatedRGB, rotatedRGBWeights);
- for (uint16_t indexSelector = 0; indexSelector < maxIndexSelector; indexSelector++)
- {
- int numTweakRounds = numSP[indexSelector];
- if (numTweakRounds <= 0)
- continue;
- if (numTweakRounds > MaxTweakRounds)
- numTweakRounds = MaxTweakRounds;
- EndpointSelector<3, 8> rgbSelector;
- for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++)
- {
- for (int px = 0; px < 16; px++)
- rgbSelector.ContributePass(preWeightedRotatedRGB[px], epPass, ParallelMath::MakeFloat(1.0f));
- rgbSelector.FinishPass(epPass);
- }
- MUInt15 alphaRange[2];
- alphaRange[0] = alphaRange[1] = pixels[0][alphaChannel];
- for (int px = 1; px < 16; px++)
- {
- alphaRange[0] = ParallelMath::Min(pixels[px][alphaChannel], alphaRange[0]);
- alphaRange[1] = ParallelMath::Max(pixels[px][alphaChannel], alphaRange[1]);
- }
- int rgbPrec = 0;
- int alphaPrec = 0;
- if (mode == 4)
- {
- rgbPrec = indexSelector ? 3 : 2;
- alphaPrec = indexSelector ? 2 : 3;
- }
- else
- rgbPrec = alphaPrec = 2;
- UnfinishedEndpoints<3> unfinishedRGB = rgbSelector.GetEndpoints(rotatedRGBWeights);
- MFloat bestRGBError = ParallelMath::MakeFloat(FLT_MAX);
- MFloat bestAlphaError = ParallelMath::MakeFloat(FLT_MAX);
- MUInt15 bestRGBIndexes[16];
- MUInt15 bestAlphaIndexes[16];
- MUInt15 bestEP[2][4];
- for (int px = 0; px < 16; px++)
- bestRGBIndexes[px] = bestAlphaIndexes[px] = ParallelMath::MakeUInt15(0);
- for (int tweak = 0; tweak < numTweakRounds; tweak++)
- {
- MUInt15 rgbEP[2][3];
- MUInt15 alphaEP[2];
- unfinishedRGB.FinishLDR(tweak, 1 << rgbPrec, rgbEP[0], rgbEP[1]);
- TweakAlpha(alphaRange, tweak, 1 << alphaPrec, alphaEP);
- for (int refine = 0; refine < numRefineRounds; refine++)
- {
- if (mode == 4)
- CompressEndpoints4(rgbEP, alphaEP);
- else
- CompressEndpoints5(rgbEP, alphaEP);
- IndexSelector<1> alphaIndexSelector;
- IndexSelector<3> rgbIndexSelector;
- {
- MUInt15 alphaEPTemp[2][1] = { { alphaEP[0] },{ alphaEP[1] } };
- alphaIndexSelector.Init<false>(uniformWeight, alphaEPTemp, 1 << alphaPrec);
- }
- rgbIndexSelector.Init<false>(rotatedRGBWeights, rgbEP, 1 << rgbPrec);
- EndpointRefiner<3> rgbRefiner;
- EndpointRefiner<1> alphaRefiner;
- rgbRefiner.Init(1 << rgbPrec, rotatedRGBWeights);
- alphaRefiner.Init(1 << alphaPrec, uniformWeight);
- MFloat errorRGB = ParallelMath::MakeFloatZero();
- MFloat errorA = ParallelMath::MakeFloatZero();
- MUInt15 rgbIndexes[16];
- MUInt15 alphaIndexes[16];
- AggregatedError<3> rgbAggError;
- AggregatedError<1> alphaAggError;
- for (int px = 0; px < 16; px++)
- {
- MUInt15 rgbIndex = rgbIndexSelector.SelectIndexLDR(floatRotatedRGB[px], rtn);
- MUInt15 alphaIndex = alphaIndexSelector.SelectIndexLDR(floatPixels[px] + alphaChannel, rtn);
- MUInt15 reconstructedRGB[3];
- MUInt15 reconstructedAlpha[1];
- rgbIndexSelector.ReconstructLDR_BC7(rgbIndex, reconstructedRGB);
- alphaIndexSelector.ReconstructLDR_BC7(alphaIndex, reconstructedAlpha);
- if (flags & cvtt::Flags::BC7_FastIndexing)
- {
- BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], rgbAggError);
- BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, alphaAggError);
- }
- else
- {
- AggregatedError<3> baseRGBAggError;
- AggregatedError<1> baseAlphaAggError;
- BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], baseRGBAggError);
- BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, baseAlphaAggError);
- MFloat rgbError = baseRGBAggError.Finalize(flags, rotatedRGBWeightsSq);
- MFloat alphaError = baseAlphaAggError.Finalize(flags, rotatedAlphaWeightSq);
- MUInt15 altRGBIndexes[2];
- MUInt15 altAlphaIndexes[2];
- altRGBIndexes[0] = ParallelMath::Max(rgbIndex, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1);
- altRGBIndexes[1] = ParallelMath::Min(rgbIndex + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << rgbPrec) - 1)));
- altAlphaIndexes[0] = ParallelMath::Max(alphaIndex, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1);
- altAlphaIndexes[1] = ParallelMath::Min(alphaIndex + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << alphaPrec) - 1)));
- for (int ii = 0; ii < 2; ii++)
- {
- rgbIndexSelector.ReconstructLDR_BC7(altRGBIndexes[ii], reconstructedRGB);
- alphaIndexSelector.ReconstructLDR_BC7(altAlphaIndexes[ii], reconstructedAlpha);
- AggregatedError<3> altRGBAggError;
- AggregatedError<1> altAlphaAggError;
- BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], altRGBAggError);
- BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, altAlphaAggError);
- MFloat altRGBError = altRGBAggError.Finalize(flags, rotatedRGBWeightsSq);
- MFloat altAlphaError = altAlphaAggError.Finalize(flags, rotatedAlphaWeightSq);
- ParallelMath::Int16CompFlag rgbBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altRGBError, rgbError));
- ParallelMath::Int16CompFlag alphaBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altAlphaError, alphaError));
- rgbError = ParallelMath::Min(altRGBError, rgbError);
- alphaError = ParallelMath::Min(altAlphaError, alphaError);
- ParallelMath::ConditionalSet(rgbIndex, rgbBetter, altRGBIndexes[ii]);
- ParallelMath::ConditionalSet(alphaIndex, alphaBetter, altAlphaIndexes[ii]);
- }
- errorRGB = errorRGB + rgbError;
- errorA = errorA + alphaError;
- }
- if (refine != numRefineRounds - 1)
- {
- rgbRefiner.ContributeUnweightedPW(preWeightedRotatedRGB[px], rgbIndex);
- alphaRefiner.ContributeUnweightedPW(floatPixels[px] + alphaChannel, alphaIndex);
- }
- if (flags & Flags::BC7_FastIndexing)
- {
- errorRGB = rgbAggError.Finalize(flags, rotatedRGBWeightsSq);
- errorA = alphaAggError.Finalize(flags, rotatedAlphaWeightSq);
- }
- rgbIndexes[px] = rgbIndex;
- alphaIndexes[px] = alphaIndex;
- }
- ParallelMath::FloatCompFlag rgbBetter = ParallelMath::Less(errorRGB, bestRGBError);
- ParallelMath::FloatCompFlag alphaBetter = ParallelMath::Less(errorA, bestAlphaError);
- ParallelMath::Int16CompFlag rgbBetterInt16 = ParallelMath::FloatFlagToInt16(rgbBetter);
- ParallelMath::Int16CompFlag alphaBetterInt16 = ParallelMath::FloatFlagToInt16(alphaBetter);
- if (ParallelMath::AnySet(rgbBetterInt16))
- {
- bestRGBError = ParallelMath::Min(errorRGB, bestRGBError);
- for (int px = 0; px < 16; px++)
- ParallelMath::ConditionalSet(bestRGBIndexes[px], rgbBetterInt16, rgbIndexes[px]);
- for (int ep = 0; ep < 2; ep++)
- {
- for (int ch = 0; ch < 3; ch++)
- ParallelMath::ConditionalSet(bestEP[ep][ch], rgbBetterInt16, rgbEP[ep][ch]);
- }
- }
- if (ParallelMath::AnySet(alphaBetterInt16))
- {
- bestAlphaError = ParallelMath::Min(errorA, bestAlphaError);
- for (int px = 0; px < 16; px++)
- ParallelMath::ConditionalSet(bestAlphaIndexes[px], alphaBetterInt16, alphaIndexes[px]);
- for (int ep = 0; ep < 2; ep++)
- ParallelMath::ConditionalSet(bestEP[ep][3], alphaBetterInt16, alphaEP[ep]);
- }
- if (refine != numRefineRounds - 1)
- {
- rgbRefiner.GetRefinedEndpointsLDR(rgbEP, rtn);
- MUInt15 alphaEPTemp[2][1];
- alphaRefiner.GetRefinedEndpointsLDR(alphaEPTemp, rtn);
- for (int i = 0; i < 2; i++)
- alphaEP[i] = alphaEPTemp[i][0];
- }
- } // refine
- } // tweak
- MFloat combinedError = bestRGBError + bestAlphaError;
- ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(combinedError, work.m_error);
- ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter);
- work.m_error = ParallelMath::Min(combinedError, work.m_error);
- ParallelMath::ConditionalSet(work.m_mode, errorBetter16, ParallelMath::MakeUInt15(mode));
- ParallelMath::ConditionalSet(work.m_u.m_isr.m_rotation, errorBetter16, ParallelMath::MakeUInt15(rotation));
- ParallelMath::ConditionalSet(work.m_u.m_isr.m_indexSelector, errorBetter16, ParallelMath::MakeUInt15(indexSelector));
- for (int px = 0; px < 16; px++)
- {
- ParallelMath::ConditionalSet(work.m_indexes[px], errorBetter16, indexSelector ? bestAlphaIndexes[px] : bestRGBIndexes[px]);
- ParallelMath::ConditionalSet(work.m_indexes2[px], errorBetter16, indexSelector ? bestRGBIndexes[px] : bestAlphaIndexes[px]);
- }
- for (int ep = 0; ep < 2; ep++)
- for (int ch = 0; ch < 4; ch++)
- ParallelMath::ConditionalSet(work.m_ep[0][ep][ch], errorBetter16, bestEP[ep][ch]);
- }
- }
- }
- }
- template<class T>
- void cvtt::Internal::BC7Computer::Swap(T& a, T& b)
- {
- T temp = a;
- a = b;
- b = temp;
- }
- void cvtt::Internal::BC7Computer::Pack(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds)
- {
- MUInt15 pixels[16][4];
- MFloat floatPixels[16][4];
- for (int px = 0; px < 16; px++)
- {
- for (int ch = 0; ch < 4; ch++)
- ParallelMath::ConvertLDRInputs(inputs, px, ch, pixels[px][ch]);
- }
- for (int px = 0; px < 16; px++)
- {
- for (int ch = 0; ch < 4; ch++)
- floatPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]);
- }
- BC67::WorkInfo work;
- memset(&work, 0, sizeof(work));
- work.m_error = ParallelMath::MakeFloat(FLT_MAX);
- {
- ParallelMath::RoundTowardNearestForScope rtn;
- TrySinglePlane(flags, pixels, floatPixels, channelWeights, encodingPlan, numRefineRounds, work, &rtn);
- TryDualPlane(flags, pixels, floatPixels, channelWeights, encodingPlan, numRefineRounds, work, &rtn);
- }
- for (int block = 0; block < ParallelMath::ParallelSize; block++)
- {
- PackingVector pv;
- pv.Init();
- ParallelMath::ScalarUInt16 mode = ParallelMath::Extract(work.m_mode, block);
- ParallelMath::ScalarUInt16 partition = ParallelMath::Extract(work.m_u.m_partition, block);
- ParallelMath::ScalarUInt16 indexSelector = ParallelMath::Extract(work.m_u.m_isr.m_indexSelector, block);
- const BC7Data::BC7ModeInfo& modeInfo = BC7Data::g_modes[mode];
- ParallelMath::ScalarUInt16 indexes[16];
- ParallelMath::ScalarUInt16 indexes2[16];
- ParallelMath::ScalarUInt16 endPoints[3][2][4];
- for (int i = 0; i < 16; i++)
- {
- indexes[i] = ParallelMath::Extract(work.m_indexes[i], block);
- if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
- indexes2[i] = ParallelMath::Extract(work.m_indexes2[i], block);
- }
- for (int subset = 0; subset < 3; subset++)
- {
- for (int ep = 0; ep < 2; ep++)
- {
- for (int ch = 0; ch < 4; ch++)
- endPoints[subset][ep][ch] = ParallelMath::Extract(work.m_ep[subset][ep][ch], block);
- }
- }
- int fixups[3] = { 0, 0, 0 };
- if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
- {
- bool flipRGB = ((indexes[0] & (1 << (modeInfo.m_indexBits - 1))) != 0);
- bool flipAlpha = ((indexes2[0] & (1 << (modeInfo.m_alphaIndexBits - 1))) != 0);
- if (flipRGB)
- {
- uint16_t highIndex = (1 << modeInfo.m_indexBits) - 1;
- for (int px = 0; px < 16; px++)
- indexes[px] = highIndex - indexes[px];
- }
- if (flipAlpha)
- {
- uint16_t highIndex = (1 << modeInfo.m_alphaIndexBits) - 1;
- for (int px = 0; px < 16; px++)
- indexes2[px] = highIndex - indexes2[px];
- }
- if (indexSelector)
- Swap(flipRGB, flipAlpha);
- if (flipRGB)
- {
- for (int ch = 0; ch < 3; ch++)
- Swap(endPoints[0][0][ch], endPoints[0][1][ch]);
- }
- if (flipAlpha)
- Swap(endPoints[0][0][3], endPoints[0][1][3]);
- }
- else
- {
- if (modeInfo.m_numSubsets == 2)
- fixups[1] = BC7Data::g_fixupIndexes2[partition];
- else if (modeInfo.m_numSubsets == 3)
- {
- fixups[1] = BC7Data::g_fixupIndexes3[partition][0];
- fixups[2] = BC7Data::g_fixupIndexes3[partition][1];
- }
- bool flip[3] = { false, false, false };
- for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
- flip[subset] = ((indexes[fixups[subset]] & (1 << (modeInfo.m_indexBits - 1))) != 0);
- if (flip[0] || flip[1] || flip[2])
- {
- uint16_t highIndex = (1 << modeInfo.m_indexBits) - 1;
- for (int px = 0; px < 16; px++)
- {
- int subset = 0;
- if (modeInfo.m_numSubsets == 2)
- subset = (BC7Data::g_partitionMap[partition] >> px) & 1;
- else if (modeInfo.m_numSubsets == 3)
- subset = (BC7Data::g_partitionMap2[partition] >> (px * 2)) & 3;
- if (flip[subset])
- indexes[px] = highIndex - indexes[px];
- }
- int maxCH = (modeInfo.m_alphaMode == BC7Data::AlphaMode_Combined) ? 4 : 3;
- for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
- {
- if (flip[subset])
- for (int ch = 0; ch < maxCH; ch++)
- Swap(endPoints[subset][0][ch], endPoints[subset][1][ch]);
- }
- }
- }
- pv.Pack(static_cast<uint8_t>(1 << mode), mode + 1);
- if (modeInfo.m_partitionBits)
- pv.Pack(partition, modeInfo.m_partitionBits);
- if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
- {
- ParallelMath::ScalarUInt16 rotation = ParallelMath::Extract(work.m_u.m_isr.m_rotation, block);
- pv.Pack(rotation, 2);
- }
- if (modeInfo.m_hasIndexSelector)
- pv.Pack(indexSelector, 1);
- // Encode RGB
- for (int ch = 0; ch < 3; ch++)
- {
- for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
- {
- for (int ep = 0; ep < 2; ep++)
- {
- ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][ch];
- epPart >>= (8 - modeInfo.m_rgbBits);
- pv.Pack(epPart, modeInfo.m_rgbBits);
- }
- }
- }
- // Encode alpha
- if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
- {
- for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
- {
- for (int ep = 0; ep < 2; ep++)
- {
- ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][3];
- epPart >>= (8 - modeInfo.m_alphaBits);
- pv.Pack(epPart, modeInfo.m_alphaBits);
- }
- }
- }
- // Encode parity bits
- if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerSubset)
- {
- for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
- {
- ParallelMath::ScalarUInt16 epPart = endPoints[subset][0][0];
- epPart >>= (7 - modeInfo.m_rgbBits);
- epPart &= 1;
- pv.Pack(epPart, 1);
- }
- }
- else if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerEndpoint)
- {
- for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
- {
- for (int ep = 0; ep < 2; ep++)
- {
- ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][0];
- epPart >>= (7 - modeInfo.m_rgbBits);
- epPart &= 1;
- pv.Pack(epPart, 1);
- }
- }
- }
- // Encode indexes
- for (int px = 0; px < 16; px++)
- {
- int bits = modeInfo.m_indexBits;
- if ((px == 0) || (px == fixups[1]) || (px == fixups[2]))
- bits--;
- pv.Pack(indexes[px], bits);
- }
- // Encode secondary indexes
- if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
- {
- for (int px = 0; px < 16; px++)
- {
- int bits = modeInfo.m_alphaIndexBits;
- if (px == 0)
- bits--;
- pv.Pack(indexes2[px], bits);
- }
- }
- pv.Flush(packedBlocks);
- packedBlocks += 16;
- }
- }
- void cvtt::Internal::BC7Computer::UnpackOne(PixelBlockU8 &output, const uint8_t* packedBlock)
- {
- UnpackingVector pv;
- pv.Init(packedBlock);
- int mode = 8;
- for (int i = 0; i < 8; i++)
- {
- if (pv.Unpack(1) == 1)
- {
- mode = i;
- break;
- }
- }
- if (mode > 7)
- {
- for (int px = 0; px < 16; px++)
- for (int ch = 0; ch < 4; ch++)
- output.m_pixels[px][ch] = 0;
- return;
- }
- const BC7Data::BC7ModeInfo &modeInfo = BC7Data::g_modes[mode];
- int partition = 0;
- if (modeInfo.m_partitionBits)
- partition = pv.Unpack(modeInfo.m_partitionBits);
- int rotation = 0;
- if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
- rotation = pv.Unpack(2);
- int indexSelector = 0;
- if (modeInfo.m_hasIndexSelector)
- indexSelector = pv.Unpack(1);
- // Resolve fixups
- int fixups[3] = { 0, 0, 0 };
- if (modeInfo.m_alphaMode != BC7Data::AlphaMode_Separate)
- {
- if (modeInfo.m_numSubsets == 2)
- fixups[1] = BC7Data::g_fixupIndexes2[partition];
- else if (modeInfo.m_numSubsets == 3)
- {
- fixups[1] = BC7Data::g_fixupIndexes3[partition][0];
- fixups[2] = BC7Data::g_fixupIndexes3[partition][1];
- }
- }
- int endPoints[3][2][4];
- // Decode RGB
- for (int ch = 0; ch < 3; ch++)
- {
- for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
- {
- for (int ep = 0; ep < 2; ep++)
- endPoints[subset][ep][ch] = (pv.Unpack(modeInfo.m_rgbBits) << (8 - modeInfo.m_rgbBits));
- }
- }
- // Decode alpha
- if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
- {
- for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
- {
- for (int ep = 0; ep < 2; ep++)
- endPoints[subset][ep][3] = (pv.Unpack(modeInfo.m_alphaBits) << (8 - modeInfo.m_alphaBits));
- }
- }
- else
- {
- for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
- {
- for (int ep = 0; ep < 2; ep++)
- endPoints[subset][ep][3] = 255;
- }
- }
- int parityBits = 0;
- // Decode parity bits
- if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerSubset)
- {
- for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
- {
- int p = pv.Unpack(1);
- for (int ep = 0; ep < 2; ep++)
- {
- for (int ch = 0; ch < 3; ch++)
- endPoints[subset][ep][ch] |= p << (7 - modeInfo.m_rgbBits);
- if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
- endPoints[subset][ep][3] |= p << (7 - modeInfo.m_alphaBits);
- }
- }
- parityBits = 1;
- }
- else if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerEndpoint)
- {
- for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
- {
- for (int ep = 0; ep < 2; ep++)
- {
- int p = pv.Unpack(1);
- for (int ch = 0; ch < 3; ch++)
- endPoints[subset][ep][ch] |= p << (7 - modeInfo.m_rgbBits);
- if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
- endPoints[subset][ep][3] |= p << (7 - modeInfo.m_alphaBits);
- }
- }
- parityBits = 1;
- }
- // Fill endpoint bits
- for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
- {
- for (int ep = 0; ep < 2; ep++)
- {
- for (int ch = 0; ch < 3; ch++)
- endPoints[subset][ep][ch] |= (endPoints[subset][ep][ch] >> (modeInfo.m_rgbBits + parityBits));
- if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
- endPoints[subset][ep][3] |= (endPoints[subset][ep][3] >> (modeInfo.m_alphaBits + parityBits));
- }
- }
- int indexes[16];
- int indexes2[16];
- // Decode indexes
- for (int px = 0; px < 16; px++)
- {
- int bits = modeInfo.m_indexBits;
- if ((px == 0) || (px == fixups[1]) || (px == fixups[2]))
- bits--;
- indexes[px] = pv.Unpack(bits);
- }
- // Decode secondary indexes
- if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
- {
- for (int px = 0; px < 16; px++)
- {
- int bits = modeInfo.m_alphaIndexBits;
- if (px == 0)
- bits--;
- indexes2[px] = pv.Unpack(bits);
- }
- }
- else
- {
- for (int px = 0; px < 16; px++)
- indexes2[px] = 0;
- }
- const int *alphaWeights = BC7Data::g_weightTables[modeInfo.m_alphaIndexBits];
- const int *rgbWeights = BC7Data::g_weightTables[modeInfo.m_indexBits];
- // Decode each pixel
- for (int px = 0; px < 16; px++)
- {
- int rgbWeight = 0;
- int alphaWeight = 0;
- int rgbIndex = indexes[px];
- rgbWeight = rgbWeights[indexes[px]];
- if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Combined)
- alphaWeight = rgbWeight;
- else if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
- alphaWeight = alphaWeights[indexes2[px]];
- if (indexSelector == 1)
- {
- int temp = rgbWeight;
- rgbWeight = alphaWeight;
- alphaWeight = temp;
- }
- int pixel[4] = { 0, 0, 0, 255 };
- int subset = 0;
- if (modeInfo.m_numSubsets == 2)
- subset = (BC7Data::g_partitionMap[partition] >> px) & 1;
- else if (modeInfo.m_numSubsets == 3)
- subset = (BC7Data::g_partitionMap2[partition] >> (px * 2)) & 3;
- for (int ch = 0; ch < 3; ch++)
- pixel[ch] = ((64 - rgbWeight) * endPoints[subset][0][ch] + rgbWeight * endPoints[subset][1][ch] + 32) >> 6;
- if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
- pixel[3] = ((64 - alphaWeight) * endPoints[subset][0][3] + alphaWeight * endPoints[subset][1][3] + 32) >> 6;
- if (rotation != 0)
- {
- int ch = rotation - 1;
- int temp = pixel[ch];
- pixel[ch] = pixel[3];
- pixel[3] = temp;
- }
- for (int ch = 0; ch < 4; ch++)
- output.m_pixels[px][ch] = static_cast<uint8_t>(pixel[ch]);
- }
- }
- cvtt::ParallelMath::SInt16 cvtt::Internal::BC6HComputer::QuantizeSingleEndpointElementSigned(const MSInt16 &elem2CL, int precision, const ParallelMath::RoundUpForScope* ru)
- {
- assert(ParallelMath::AllSet(ParallelMath::Less(elem2CL, ParallelMath::MakeSInt16(31744))));
- assert(ParallelMath::AllSet(ParallelMath::Less(ParallelMath::MakeSInt16(-31744), elem2CL)));
- // Expand to full range
- ParallelMath::Int16CompFlag isNegative = ParallelMath::Less(elem2CL, ParallelMath::MakeSInt16(0));
- MUInt15 absElem = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Select(isNegative, ParallelMath::MakeSInt16(0) - elem2CL, elem2CL));
- absElem = ParallelMath::RightShift(ParallelMath::RoundAndConvertToU15(ParallelMath::ToFloat(absElem) * 32.0f / 31.0f, ru), 16 - precision);
- MSInt16 absElemS16 = ParallelMath::LosslessCast<MSInt16>::Cast(absElem);
- return ParallelMath::Select(isNegative, ParallelMath::MakeSInt16(0) - absElemS16, absElemS16);
- }
- cvtt::ParallelMath::UInt15 cvtt::Internal::BC6HComputer::QuantizeSingleEndpointElementUnsigned(const MUInt15 &elem, int precision, const ParallelMath::RoundUpForScope* ru)
- {
- MUInt16 expandedElem = ParallelMath::RoundAndConvertToU16(ParallelMath::Min(ParallelMath::ToFloat(elem) * 64.0f / 31.0f, ParallelMath::MakeFloat(65535.0f)), ru);
- return ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(expandedElem, 16 - precision));
- }
- void cvtt::Internal::BC6HComputer::UnquantizeSingleEndpointElementSigned(const MSInt16 &comp, int precision, MSInt16 &outUnquantized, MSInt16 &outUnquantizedFinished2CL)
- {
- MSInt16 zero = ParallelMath::MakeSInt16(0);
- ParallelMath::Int16CompFlag negative = ParallelMath::Less(comp, zero);
- MUInt15 absComp = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Select(negative, MSInt16(zero - comp), comp));
- MSInt16 unq;
- MUInt15 absUnq;
- if (precision >= 16)
- {
- unq = comp;
- absUnq = absComp;
- }
- else
- {
- MSInt16 maxCompMinusOne = ParallelMath::MakeSInt16(static_cast<int16_t>((1 << (precision - 1)) - 2));
- ParallelMath::Int16CompFlag isZero = ParallelMath::Equal(comp, zero);
- ParallelMath::Int16CompFlag isMax = ParallelMath::Less(maxCompMinusOne, comp);
- absUnq = (absComp << (16 - precision)) + ParallelMath::MakeUInt15(static_cast<uint16_t>(0x4000 >> (precision - 1)));
- ParallelMath::ConditionalSet(absUnq, isZero, ParallelMath::MakeUInt15(0));
- ParallelMath::ConditionalSet(absUnq, isMax, ParallelMath::MakeUInt15(0x7fff));
- unq = ParallelMath::ConditionalNegate(negative, ParallelMath::LosslessCast<MSInt16>::Cast(absUnq));
- }
- outUnquantized = unq;
- MUInt15 funq = ParallelMath::ToUInt15(ParallelMath::RightShift(ParallelMath::XMultiply(absUnq, ParallelMath::MakeUInt15(31)), 5));
- outUnquantizedFinished2CL = ParallelMath::ConditionalNegate(negative, ParallelMath::LosslessCast<MSInt16>::Cast(funq));
- }
- void cvtt::Internal::BC6HComputer::UnquantizeSingleEndpointElementUnsigned(const MUInt15 &comp, int precision, MUInt16 &outUnquantized, MUInt16 &outUnquantizedFinished)
- {
- MUInt16 unq = ParallelMath::LosslessCast<MUInt16>::Cast(comp);
- if (precision < 15)
- {
- MUInt15 zero = ParallelMath::MakeUInt15(0);
- MUInt15 maxCompMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << precision) - 2));
- ParallelMath::Int16CompFlag isZero = ParallelMath::Equal(comp, zero);
- ParallelMath::Int16CompFlag isMax = ParallelMath::Less(maxCompMinusOne, comp);
- unq = (ParallelMath::LosslessCast<MUInt16>::Cast(comp) << (16 - precision)) + ParallelMath::MakeUInt16(static_cast<uint16_t>(0x8000 >> precision));
- ParallelMath::ConditionalSet(unq, isZero, ParallelMath::MakeUInt16(0));
- ParallelMath::ConditionalSet(unq, isMax, ParallelMath::MakeUInt16(0xffff));
- }
- outUnquantized = unq;
- outUnquantizedFinished = ParallelMath::ToUInt16(ParallelMath::RightShift(ParallelMath::XMultiply(unq, ParallelMath::MakeUInt15(31)), 6));
- }
- void cvtt::Internal::BC6HComputer::QuantizeEndpointsSigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn)
- {
- MSInt16 unquantizedEP[2][3];
- MSInt16 finishedUnquantizedEP[2][3];
- {
- ParallelMath::RoundUpForScope ru;
- for (int epi = 0; epi < 2; epi++)
- {
- for (int ch = 0; ch < 3; ch++)
- {
- MSInt16 qee = QuantizeSingleEndpointElementSigned(endPoints[epi][ch], precision, &ru);
- UnquantizeSingleEndpointElementSigned(qee, precision, unquantizedEP[epi][ch], finishedUnquantizedEP[epi][ch]);
- quantizedEndPoints[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(qee);
- }
- }
- }
- indexSelector.Init(channelWeights, unquantizedEP, finishedUnquantizedEP, indexRange);
- indexSelector.InitHDR(indexRange, true, fastIndexing, channelWeights);
- MUInt15 halfRangeMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange / 2) - 1);
- MUInt15 index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixelsColorSpace[fixupIndex], rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[fixupIndex], rtn);
- ParallelMath::Int16CompFlag invert = ParallelMath::Less(halfRangeMinusOne, index);
- if (ParallelMath::AnySet(invert))
- {
- ParallelMath::ConditionalSet(index, invert, MUInt15(ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange - 1)) - index));
- indexSelector.ConditionalInvert(invert);
- for (int ch = 0; ch < 3; ch++)
- {
- MAInt16 firstEP = quantizedEndPoints[0][ch];
- MAInt16 secondEP = quantizedEndPoints[1][ch];
- quantizedEndPoints[0][ch] = ParallelMath::Select(invert, secondEP, firstEP);
- quantizedEndPoints[1][ch] = ParallelMath::Select(invert, firstEP, secondEP);
- }
- }
- indexes[fixupIndex] = index;
- }
- void cvtt::Internal::BC6HComputer::QuantizeEndpointsUnsigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn)
- {
- MUInt16 unquantizedEP[2][3];
- MUInt16 finishedUnquantizedEP[2][3];
- {
- ParallelMath::RoundUpForScope ru;
- for (int epi = 0; epi < 2; epi++)
- {
- for (int ch = 0; ch < 3; ch++)
- {
- MUInt15 qee = QuantizeSingleEndpointElementUnsigned(ParallelMath::LosslessCast<MUInt15>::Cast(endPoints[epi][ch]), precision, &ru);
- UnquantizeSingleEndpointElementUnsigned(qee, precision, unquantizedEP[epi][ch], finishedUnquantizedEP[epi][ch]);
- quantizedEndPoints[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(qee);
- }
- }
- }
- indexSelector.Init(channelWeights, unquantizedEP, finishedUnquantizedEP, indexRange);
- indexSelector.InitHDR(indexRange, false, fastIndexing, channelWeights);
- MUInt15 halfRangeMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange / 2) - 1);
- MUInt15 index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixelsColorSpace[fixupIndex], rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[fixupIndex], rtn);
- ParallelMath::Int16CompFlag invert = ParallelMath::Less(halfRangeMinusOne, index);
- if (ParallelMath::AnySet(invert))
- {
- ParallelMath::ConditionalSet(index, invert, MUInt15(ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange - 1)) - index));
- indexSelector.ConditionalInvert(invert);
- for (int ch = 0; ch < 3; ch++)
- {
- MAInt16 firstEP = quantizedEndPoints[0][ch];
- MAInt16 secondEP = quantizedEndPoints[1][ch];
- quantizedEndPoints[0][ch] = ParallelMath::Select(invert, secondEP, firstEP);
- quantizedEndPoints[1][ch] = ParallelMath::Select(invert, firstEP, secondEP);
- }
- }
- indexes[fixupIndex] = index;
- }
- void cvtt::Internal::BC6HComputer::EvaluatePartitionedLegality(const MAInt16 ep0[2][3], const MAInt16 ep1[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][2][3], ParallelMath::Int16CompFlag& outIsLegal)
- {
- ParallelMath::Int16CompFlag allLegal = ParallelMath::MakeBoolInt16(true);
- MAInt16 aSignificantMask = ParallelMath::MakeAInt16(static_cast<int16_t>((1 << aPrec) - 1));
- for (int ch = 0; ch < 3; ch++)
- {
- outEncodedEPs[0][0][ch] = ep0[0][ch];
- outEncodedEPs[0][1][ch] = ep0[1][ch];
- outEncodedEPs[1][0][ch] = ep1[0][ch];
- outEncodedEPs[1][1][ch] = ep1[1][ch];
- if (isTransformed)
- {
- for (int subset = 0; subset < 2; subset++)
- {
- for (int epi = 0; epi < 2; epi++)
- {
- if (epi == 0 && subset == 0)
- continue;
- MAInt16 bReduced = (outEncodedEPs[subset][epi][ch] & aSignificantMask);
- MSInt16 delta = ParallelMath::TruncateToPrecisionSigned(ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::AbstractSubtract(outEncodedEPs[subset][epi][ch], outEncodedEPs[0][0][ch])), bPrec[ch]);
- outEncodedEPs[subset][epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(delta);
- MAInt16 reconstructed = (ParallelMath::AbstractAdd(outEncodedEPs[subset][epi][ch], outEncodedEPs[0][0][ch]) & aSignificantMask);
- allLegal = allLegal & ParallelMath::Equal(reconstructed, bReduced);
- }
- }
- }
- if (!ParallelMath::AnySet(allLegal))
- break;
- }
- outIsLegal = allLegal;
- }
- void cvtt::Internal::BC6HComputer::EvaluateSingleLegality(const MAInt16 ep[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][3], ParallelMath::Int16CompFlag& outIsLegal)
- {
- ParallelMath::Int16CompFlag allLegal = ParallelMath::MakeBoolInt16(true);
- MAInt16 aSignificantMask = ParallelMath::MakeAInt16(static_cast<int16_t>((1 << aPrec) - 1));
- for (int ch = 0; ch < 3; ch++)
- {
- outEncodedEPs[0][ch] = ep[0][ch];
- outEncodedEPs[1][ch] = ep[1][ch];
- if (isTransformed)
- {
- MAInt16 bReduced = (outEncodedEPs[1][ch] & aSignificantMask);
- MSInt16 delta = ParallelMath::TruncateToPrecisionSigned(ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::AbstractSubtract(outEncodedEPs[1][ch], outEncodedEPs[0][ch])), bPrec[ch]);
- outEncodedEPs[1][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(delta);
- MAInt16 reconstructed = (ParallelMath::AbstractAdd(outEncodedEPs[1][ch], outEncodedEPs[0][ch]) & aSignificantMask);
- allLegal = allLegal & ParallelMath::Equal(reconstructed, bReduced);
- }
- }
- outIsLegal = allLegal;
- }
- void cvtt::Internal::BC6HComputer::Pack(uint32_t flags, const PixelBlockF16* inputs, uint8_t* packedBlocks, const float channelWeights[4], bool isSigned, int numTweakRounds, int numRefineRounds)
- {
- if (numTweakRounds < 1)
- numTweakRounds = 1;
- else if (numTweakRounds > MaxTweakRounds)
- numTweakRounds = MaxTweakRounds;
- if (numRefineRounds < 1)
- numRefineRounds = 1;
- else if (numRefineRounds > MaxRefineRounds)
- numRefineRounds = MaxRefineRounds;
- bool fastIndexing = ((flags & cvtt::Flags::BC6H_FastIndexing) != 0);
- float channelWeightsSq[3];
- ParallelMath::RoundTowardNearestForScope rtn;
- MSInt16 pixels[16][3];
- MFloat floatPixels2CL[16][3];
- MFloat floatPixelsLinearWeighted[16][3];
- MSInt16 low15Bits = ParallelMath::MakeSInt16(32767);
- for (int ch = 0; ch < 3; ch++)
- channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch];
- for (int px = 0; px < 16; px++)
- {
- for (int ch = 0; ch < 3; ch++)
- {
- MSInt16 pixelValue;
- ParallelMath::ConvertHDRInputs(inputs, px, ch, pixelValue);
- // Convert from sign+magnitude to 2CL
- if (isSigned)
- {
- ParallelMath::Int16CompFlag negative = ParallelMath::Less(pixelValue, ParallelMath::MakeSInt16(0));
- MSInt16 magnitude = (pixelValue & low15Bits);
- ParallelMath::ConditionalSet(pixelValue, negative, ParallelMath::MakeSInt16(0) - magnitude);
- pixelValue = ParallelMath::Max(pixelValue, ParallelMath::MakeSInt16(-31743));
- }
- else
- pixelValue = ParallelMath::Max(pixelValue, ParallelMath::MakeSInt16(0));
- pixelValue = ParallelMath::Min(pixelValue, ParallelMath::MakeSInt16(31743));
- pixels[px][ch] = pixelValue;
- floatPixels2CL[px][ch] = ParallelMath::ToFloat(pixelValue);
- floatPixelsLinearWeighted[px][ch] = ParallelMath::TwosCLHalfToFloat(pixelValue) * channelWeights[ch];
- }
- }
- MFloat preWeightedPixels[16][3];
- BCCommon::PreWeightPixelsHDR<3>(preWeightedPixels, pixels, channelWeights);
- MAInt16 bestEndPoints[2][2][3];
- MUInt15 bestIndexes[16];
- MFloat bestError = ParallelMath::MakeFloat(FLT_MAX);
- MUInt15 bestMode = ParallelMath::MakeUInt15(0);
- MUInt15 bestPartition = ParallelMath::MakeUInt15(0);
- for (int px = 0; px < 16; px++)
- bestIndexes[px] = ParallelMath::MakeUInt15(0);
- for (int subset = 0; subset < 2; subset++)
- for (int epi = 0; epi < 2; epi++)
- for (int ch = 0; ch < 3; ch++)
- bestEndPoints[subset][epi][ch] = ParallelMath::MakeAInt16(0);
- UnfinishedEndpoints<3> partitionedUFEP[32][2];
- UnfinishedEndpoints<3> singleUFEP;
- // Generate UFEP for partitions
- for (int p = 0; p < 32; p++)
- {
- int partitionMask = BC7Data::g_partitionMap[p];
- EndpointSelector<3, 8> epSelectors[2];
- for (int pass = 0; pass < NumEndpointSelectorPasses; pass++)
- {
- for (int px = 0; px < 16; px++)
- {
- int subset = (partitionMask >> px) & 1;
- epSelectors[subset].ContributePass(preWeightedPixels[px], pass, ParallelMath::MakeFloat(1.0f));
- }
- for (int subset = 0; subset < 2; subset++)
- epSelectors[subset].FinishPass(pass);
- }
- for (int subset = 0; subset < 2; subset++)
- partitionedUFEP[p][subset] = epSelectors[subset].GetEndpoints(channelWeights);
- }
- // Generate UFEP for single
- {
- EndpointSelector<3, 8> epSelector;
- for (int pass = 0; pass < NumEndpointSelectorPasses; pass++)
- {
- for (int px = 0; px < 16; px++)
- epSelector.ContributePass(preWeightedPixels[px], pass, ParallelMath::MakeFloat(1.0f));
- epSelector.FinishPass(pass);
- }
- singleUFEP = epSelector.GetEndpoints(channelWeights);
- }
- for (int partitionedInt = 0; partitionedInt < 2; partitionedInt++)
- {
- bool partitioned = (partitionedInt == 1);
- for (int aPrec = BC7Data::g_maxHDRPrecision; aPrec >= 0; aPrec--)
- {
- if (!BC7Data::g_hdrModesExistForPrecision[partitionedInt][aPrec])
- continue;
- int numPartitions = partitioned ? 32 : 1;
- int numSubsets = partitioned ? 2 : 1;
- int indexBits = partitioned ? 3 : 4;
- int indexRange = (1 << indexBits);
- for (int p = 0; p < numPartitions; p++)
- {
- int partitionMask = partitioned ? BC7Data::g_partitionMap[p] : 0;
- const int MaxMetaRounds = MaxTweakRounds * MaxRefineRounds;
- MAInt16 metaEndPointsQuantized[MaxMetaRounds][2][2][3];
- MUInt15 metaIndexes[MaxMetaRounds][16];
- MFloat metaError[MaxMetaRounds][2];
- bool roundValid[MaxMetaRounds][2];
- for (int r = 0; r < MaxMetaRounds; r++)
- for (int subset = 0; subset < 2; subset++)
- roundValid[r][subset] = true;
- for (int subset = 0; subset < numSubsets; subset++)
- {
- for (int tweak = 0; tweak < MaxTweakRounds; tweak++)
- {
- EndpointRefiner<3> refiners[2];
- bool abortRemainingRefines = false;
- for (int refinePass = 0; refinePass < MaxRefineRounds; refinePass++)
- {
- int metaRound = tweak * MaxRefineRounds + refinePass;
- if (tweak >= numTweakRounds || refinePass >= numRefineRounds)
- abortRemainingRefines = true;
- if (abortRemainingRefines)
- {
- roundValid[metaRound][subset] = false;
- continue;
- }
- MAInt16(&mrQuantizedEndPoints)[2][2][3] = metaEndPointsQuantized[metaRound];
- MUInt15(&mrIndexes)[16] = metaIndexes[metaRound];
- MSInt16 endPointsColorSpace[2][3];
- if (refinePass == 0)
- {
- UnfinishedEndpoints<3> ufep = partitioned ? partitionedUFEP[p][subset] : singleUFEP;
- if (isSigned)
- ufep.FinishHDRSigned(tweak, indexRange, endPointsColorSpace[0], endPointsColorSpace[1], &rtn);
- else
- ufep.FinishHDRUnsigned(tweak, indexRange, endPointsColorSpace[0], endPointsColorSpace[1], &rtn);
- }
- else
- refiners[subset].GetRefinedEndpointsHDR(endPointsColorSpace, isSigned, &rtn);
- refiners[subset].Init(indexRange, channelWeights);
- int fixupIndex = (subset == 0) ? 0 : BC7Data::g_fixupIndexes2[p];
- IndexSelectorHDR<3> indexSelector;
- if (isSigned)
- QuantizeEndpointsSigned(endPointsColorSpace, floatPixels2CL, floatPixelsLinearWeighted, mrQuantizedEndPoints[subset], mrIndexes, indexSelector, fixupIndex, aPrec, indexRange, channelWeights, fastIndexing, &rtn);
- else
- QuantizeEndpointsUnsigned(endPointsColorSpace, floatPixels2CL, floatPixelsLinearWeighted, mrQuantizedEndPoints[subset], mrIndexes, indexSelector, fixupIndex, aPrec, indexRange, channelWeights, fastIndexing, &rtn);
- if (metaRound > 0)
- {
- ParallelMath::Int16CompFlag anySame = ParallelMath::MakeBoolInt16(false);
- for (int prevRound = 0; prevRound < metaRound; prevRound++)
- {
- MAInt16(&prevRoundEPs)[2][3] = metaEndPointsQuantized[prevRound][subset];
- ParallelMath::Int16CompFlag same = ParallelMath::MakeBoolInt16(true);
- for (int epi = 0; epi < 2; epi++)
- for (int ch = 0; ch < 3; ch++)
- same = (same & ParallelMath::Equal(prevRoundEPs[epi][ch], mrQuantizedEndPoints[subset][epi][ch]));
- anySame = (anySame | same);
- if (ParallelMath::AllSet(anySame))
- break;
- }
- if (ParallelMath::AllSet(anySame))
- {
- roundValid[metaRound][subset] = false;
- continue;
- }
- }
- MFloat subsetError = ParallelMath::MakeFloatZero();
- {
- for (int px = 0; px < 16; px++)
- {
- if (subset != ((partitionMask >> px) & 1))
- continue;
- MUInt15 index;
- if (px == fixupIndex)
- index = mrIndexes[px];
- else
- {
- index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixels2CL[px], &rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[px], &rtn);
- mrIndexes[px] = index;
- }
- MSInt16 reconstructed[3];
- if (isSigned)
- indexSelector.ReconstructHDRSigned(mrIndexes[px], reconstructed);
- else
- indexSelector.ReconstructHDRUnsigned(mrIndexes[px], reconstructed);
- subsetError = subsetError + (fastIndexing ? BCCommon::ComputeErrorHDRFast<3>(flags, reconstructed, pixels[px], channelWeightsSq) : BCCommon::ComputeErrorHDRSlow<3>(flags, reconstructed, pixels[px], channelWeightsSq));
- if (refinePass != numRefineRounds - 1)
- refiners[subset].ContributeUnweightedPW(preWeightedPixels[px], index);
- }
- }
- metaError[metaRound][subset] = subsetError;
- }
- }
- }
- // Now we have a bunch of attempts, but not all of them will fit in the delta coding scheme
- int numMeta1 = partitioned ? MaxMetaRounds : 1;
- for (int meta0 = 0; meta0 < MaxMetaRounds; meta0++)
- {
- if (!roundValid[meta0][0])
- continue;
- for (int meta1 = 0; meta1 < numMeta1; meta1++)
- {
- MFloat combinedError = metaError[meta0][0];
- if (partitioned)
- {
- if (!roundValid[meta1][1])
- continue;
- combinedError = combinedError + metaError[meta1][1];
- }
- ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(combinedError, bestError);
- if (!ParallelMath::AnySet(errorBetter))
- continue;
- ParallelMath::Int16CompFlag needsCommit = ParallelMath::FloatFlagToInt16(errorBetter);
- // Figure out if this is encodable
- for (int mode = 0; mode < BC7Data::g_numHDRModes; mode++)
- {
- const BC7Data::BC6HModeInfo &modeInfo = BC7Data::g_hdrModes[mode];
- if (modeInfo.m_partitioned != partitioned || modeInfo.m_aPrec != aPrec)
- continue;
- MAInt16 encodedEPs[2][2][3];
- ParallelMath::Int16CompFlag isLegal;
- if (partitioned)
- EvaluatePartitionedLegality(metaEndPointsQuantized[meta0][0], metaEndPointsQuantized[meta1][1], modeInfo.m_aPrec, modeInfo.m_bPrec, modeInfo.m_transformed, encodedEPs, isLegal);
- else
- EvaluateSingleLegality(metaEndPointsQuantized[meta0][0], modeInfo.m_aPrec, modeInfo.m_bPrec, modeInfo.m_transformed, encodedEPs[0], isLegal);
- ParallelMath::Int16CompFlag isLegalAndBetter = (ParallelMath::FloatFlagToInt16(errorBetter) & isLegal);
- if (!ParallelMath::AnySet(isLegalAndBetter))
- continue;
- ParallelMath::FloatCompFlag isLegalAndBetterFloat = ParallelMath::Int16FlagToFloat(isLegalAndBetter);
- ParallelMath::ConditionalSet(bestError, isLegalAndBetterFloat, combinedError);
- ParallelMath::ConditionalSet(bestMode, isLegalAndBetter, ParallelMath::MakeUInt15(static_cast<uint16_t>(mode)));
- ParallelMath::ConditionalSet(bestPartition, isLegalAndBetter, ParallelMath::MakeUInt15(static_cast<uint16_t>(p)));
- for (int subset = 0; subset < numSubsets; subset++)
- {
- for (int epi = 0; epi < 2; epi++)
- {
- for (int ch = 0; ch < 3; ch++)
- ParallelMath::ConditionalSet(bestEndPoints[subset][epi][ch], isLegalAndBetter, encodedEPs[subset][epi][ch]);
- }
- }
- for (int px = 0; px < 16; px++)
- {
- int subset = ((partitionMask >> px) & 1);
- if (subset == 0)
- ParallelMath::ConditionalSet(bestIndexes[px], isLegalAndBetter, metaIndexes[meta0][px]);
- else
- ParallelMath::ConditionalSet(bestIndexes[px], isLegalAndBetter, metaIndexes[meta1][px]);
- }
- needsCommit = ParallelMath::AndNot(needsCommit, isLegalAndBetter);
- if (!ParallelMath::AnySet(needsCommit))
- break;
- }
- }
- }
- }
- }
- }
- // At this point, everything should be set
- for (int block = 0; block < ParallelMath::ParallelSize; block++)
- {
- ParallelMath::ScalarUInt16 mode = ParallelMath::Extract(bestMode, block);
- ParallelMath::ScalarUInt16 partition = ParallelMath::Extract(bestPartition, block);
- int32_t eps[2][2][3];
- ParallelMath::ScalarUInt16 indexes[16];
- const BC7Data::BC6HModeInfo& modeInfo = BC7Data::g_hdrModes[mode];
- const BC6HData::ModeDescriptor *desc = BC6HData::g_modeDescriptors[mode];
- const size_t headerBits = modeInfo.m_partitioned ? 82 : 65;
- for (int subset = 0; subset < 2; subset++)
- {
- for (int epi = 0; epi < 2; epi++)
- {
- for (int ch = 0; ch < 3; ch++)
- eps[subset][epi][ch] = ParallelMath::Extract(bestEndPoints[subset][epi][ch], block);
- }
- }
- for (int px = 0; px < 16; px++)
- indexes[px] = ParallelMath::Extract(bestIndexes[px], block);
- uint16_t modeID = modeInfo.m_modeID;
- PackingVector pv;
- pv.Init();
- for (size_t i = 0; i < headerBits; i++) {
- int32_t codedValue = 0;
- switch (desc[i].m_eField) {
- case BC6HData::M:
- codedValue = modeID;
- break;
- case BC6HData::D:
- codedValue = partition;
- break;
- case BC6HData::RW:
- codedValue = eps[0][0][0];
- break;
- case BC6HData::RX:
- codedValue = eps[0][1][0];
- break;
- case BC6HData::RY:
- codedValue = eps[1][0][0];
- break;
- case BC6HData::RZ:
- codedValue = eps[1][1][0];
- break;
- case BC6HData::GW:
- codedValue = eps[0][0][1];
- break;
- case BC6HData::GX:
- codedValue = eps[0][1][1];
- break;
- case BC6HData::GY:
- codedValue = eps[1][0][1];
- break;
- case BC6HData::GZ:
- codedValue = eps[1][1][1];
- break;
- case BC6HData::BW:
- codedValue = eps[0][0][2];
- break;
- case BC6HData::BX:
- codedValue = eps[0][1][2];
- break;
- case BC6HData::BY:
- codedValue = eps[1][0][2];
- break;
- case BC6HData::BZ:
- codedValue = eps[1][1][2];
- break;
- default:
- assert(false);
- break;
- }
- pv.Pack(static_cast<uint16_t>((codedValue >> desc[i].m_uBit) & 1), 1);
- }
- int fixupIndex1 = 0;
- int indexBits = 4;
- if (modeInfo.m_partitioned)
- {
- fixupIndex1 = BC7Data::g_fixupIndexes2[partition];
- indexBits = 3;
- }
- for (int px = 0; px < 16; px++)
- {
- ParallelMath::ScalarUInt16 index = ParallelMath::Extract(bestIndexes[px], block);
- if (px == 0 || px == fixupIndex1)
- pv.Pack(index, indexBits - 1);
- else
- pv.Pack(index, indexBits);
- }
- pv.Flush(packedBlocks + 16 * block);
- }
- }
- void cvtt::Internal::BC6HComputer::SignExtendSingle(int &v, int bits)
- {
- if (v & (1 << (bits - 1)))
- v |= -(1 << bits);
- }
- void cvtt::Internal::BC6HComputer::UnpackOne(PixelBlockF16 &output, const uint8_t *pBC, bool isSigned)
- {
- UnpackingVector pv;
- pv.Init(pBC);
- int numModeBits = 2;
- int modeBits = pv.Unpack(2);
- if (modeBits != 0 && modeBits != 1)
- {
- modeBits |= pv.Unpack(3) << 2;
- numModeBits += 3;
- }
- int mode = -1;
- for (int possibleMode = 0; possibleMode < BC7Data::g_numHDRModes; possibleMode++)
- {
- if (BC7Data::g_hdrModes[possibleMode].m_modeID == modeBits)
- {
- mode = possibleMode;
- break;
- }
- }
- if (mode < 0)
- {
- for (int px = 0; px < 16; px++)
- {
- for (int ch = 0; ch < 3; ch++)
- output.m_pixels[px][ch] = 0;
- output.m_pixels[px][3] = 0x3c00; // 1.0
- }
- return;
- }
- const BC7Data::BC6HModeInfo& modeInfo = BC7Data::g_hdrModes[mode];
- const size_t headerBits = modeInfo.m_partitioned ? 82 : 65;
- const BC6HData::ModeDescriptor *desc = BC6HData::g_modeDescriptors[mode];
- int32_t partition = 0;
- int32_t eps[2][2][3];
- for (int subset = 0; subset < 2; subset++)
- for (int epi = 0; epi < 2; epi++)
- for (int ch = 0; ch < 3; ch++)
- eps[subset][epi][ch] = 0;
- for (size_t i = numModeBits; i < headerBits; i++) {
- int32_t *pCodedValue = NULL;
- switch (desc[i].m_eField) {
- case BC6HData::D:
- pCodedValue = &partition;
- break;
- case BC6HData::RW:
- pCodedValue = &eps[0][0][0];
- break;
- case BC6HData::RX:
- pCodedValue = &eps[0][1][0];
- break;
- case BC6HData::RY:
- pCodedValue = &eps[1][0][0];
- break;
- case BC6HData::RZ:
- pCodedValue = &eps[1][1][0];
- break;
- case BC6HData::GW:
- pCodedValue = &eps[0][0][1];
- break;
- case BC6HData::GX:
- pCodedValue = &eps[0][1][1];
- break;
- case BC6HData::GY:
- pCodedValue = &eps[1][0][1];
- break;
- case BC6HData::GZ:
- pCodedValue = &eps[1][1][1];
- break;
- case BC6HData::BW:
- pCodedValue = &eps[0][0][2];
- break;
- case BC6HData::BX:
- pCodedValue = &eps[0][1][2];
- break;
- case BC6HData::BY:
- pCodedValue = &eps[1][0][2];
- break;
- case BC6HData::BZ:
- pCodedValue = &eps[1][1][2];
- break;
- default:
- assert(false);
- break;
- }
- (*pCodedValue) |= pv.Unpack(1) << desc[i].m_uBit;
- }
- uint16_t modeID = modeInfo.m_modeID;
- int fixupIndex1 = 0;
- int indexBits = 4;
- int numSubsets = 1;
- if (modeInfo.m_partitioned)
- {
- fixupIndex1 = BC7Data::g_fixupIndexes2[partition];
- indexBits = 3;
- numSubsets = 2;
- }
- int indexes[16];
- for (int px = 0; px < 16; px++)
- {
- if (px == 0 || px == fixupIndex1)
- indexes[px] = pv.Unpack(indexBits - 1);
- else
- indexes[px] = pv.Unpack(indexBits);
- }
- if (modeInfo.m_partitioned)
- {
- for (int ch = 0; ch < 3; ch++)
- {
- if (isSigned)
- SignExtendSingle(eps[0][0][ch], modeInfo.m_aPrec);
- if (modeInfo.m_transformed || isSigned)
- {
- SignExtendSingle(eps[0][1][ch], modeInfo.m_bPrec[ch]);
- SignExtendSingle(eps[1][0][ch], modeInfo.m_bPrec[ch]);
- SignExtendSingle(eps[1][1][ch], modeInfo.m_bPrec[ch]);
- }
- }
- }
- else
- {
- for (int ch = 0; ch < 3; ch++)
- {
- if (isSigned)
- SignExtendSingle(eps[0][0][ch], modeInfo.m_aPrec);
- if (modeInfo.m_transformed || isSigned)
- SignExtendSingle(eps[0][1][ch], modeInfo.m_bPrec[ch]);
- }
- }
- int aPrec = modeInfo.m_aPrec;
- if (modeInfo.m_transformed)
- {
- for (int ch = 0; ch < 3; ch++)
- {
- int wrapMask = (1 << aPrec) - 1;
- eps[0][1][ch] = ((eps[0][0][ch] + eps[0][1][ch]) & wrapMask);
- if (isSigned)
- SignExtendSingle(eps[0][1][ch], aPrec);
- if (modeInfo.m_partitioned)
- {
- eps[1][0][ch] = ((eps[0][0][ch] + eps[1][0][ch]) & wrapMask);
- eps[1][1][ch] = ((eps[0][0][ch] + eps[1][1][ch]) & wrapMask);
- if (isSigned)
- {
- SignExtendSingle(eps[1][0][ch], aPrec);
- SignExtendSingle(eps[1][1][ch], aPrec);
- }
- }
- }
- }
- // Unquantize endpoints
- for (int subset = 0; subset < numSubsets; subset++)
- {
- for (int epi = 0; epi < 2; epi++)
- {
- for (int ch = 0; ch < 3; ch++)
- {
- int &v = eps[subset][epi][ch];
- if (isSigned)
- {
- if (aPrec >= 16)
- {
- // Nothing
- }
- else
- {
- bool s = false;
- int comp = v;
- if (v < 0)
- {
- s = true;
- comp = -comp;
- }
- int unq = 0;
- if (comp == 0)
- unq = 0;
- else if (comp >= ((1 << (aPrec - 1)) - 1))
- unq = 0x7fff;
- else
- unq = ((comp << 15) + 0x4000) >> (aPrec - 1);
- if (s)
- unq = -unq;
- v = unq;
- }
- }
- else
- {
- if (aPrec >= 15)
- {
- // Nothing
- }
- else if (v == 0)
- {
- // Nothing
- }
- else if (v == ((1 << aPrec) - 1))
- v = 0xffff;
- else
- v = ((v << 16) + 0x8000) >> aPrec;
- }
- }
- }
- }
- const int *weights = BC7Data::g_weightTables[indexBits];
- for (int px = 0; px < 16; px++)
- {
- int subset = 0;
- if (modeInfo.m_partitioned)
- subset = (BC7Data::g_partitionMap[partition] >> px) & 1;
- int w = weights[indexes[px]];
- for (int ch = 0; ch < 3; ch++)
- {
- int comp = ((64 - w) * eps[subset][0][ch] + w * eps[subset][1][ch] + 32) >> 6;
- if (isSigned)
- {
- if (comp < 0)
- comp = -(((-comp) * 31) >> 5);
- else
- comp = (comp * 31) >> 5;
- int s = 0;
- if (comp < 0)
- {
- s = 0x8000;
- comp = -comp;
- }
- output.m_pixels[px][ch] = static_cast<uint16_t>(s | comp);
- }
- else
- {
- comp = (comp * 31) >> 6;
- output.m_pixels[px][ch] = static_cast<uint16_t>(comp);
- }
- }
- output.m_pixels[px][3] = 0x3c00; // 1.0
- }
- }
- void cvtt::Kernels::ConfigureBC7EncodingPlanFromQuality(BC7EncodingPlan &encodingPlan, int quality)
- {
- static const int kMaxQuality = 100;
- if (quality < 1)
- quality = 1;
- else if (quality > kMaxQuality)
- quality = kMaxQuality;
- const int numRGBModes = cvtt::Tables::BC7Prio::g_bc7NumPrioCodesRGB * quality / kMaxQuality;
- const int numRGBAModes = cvtt::Tables::BC7Prio::g_bc7NumPrioCodesRGBA * quality / kMaxQuality;
- const uint16_t *prioLists[] = { cvtt::Tables::BC7Prio::g_bc7PrioCodesRGB, cvtt::Tables::BC7Prio::g_bc7PrioCodesRGBA };
- const int prioListSizes[] = { numRGBModes, numRGBAModes };
- BC7FineTuningParams ftParams;
- memset(&ftParams, 0, sizeof(ftParams));
- for (int listIndex = 0; listIndex < 2; listIndex++)
- {
- int prioListSize = prioListSizes[listIndex];
- const uint16_t *prioList = prioLists[listIndex];
- for (int prioIndex = 0; prioIndex < prioListSize; prioIndex++)
- {
- const uint16_t packedMode = prioList[prioIndex];
- uint8_t seedPoints = static_cast<uint8_t>(cvtt::Tables::BC7Prio::UnpackSeedPointCount(packedMode));
- int mode = cvtt::Tables::BC7Prio::UnpackMode(packedMode);
- switch (mode)
- {
- case 0:
- ftParams.mode0SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints;
- break;
- case 1:
- ftParams.mode1SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints;
- break;
- case 2:
- ftParams.mode2SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints;
- break;
- case 3:
- ftParams.mode3SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints;
- break;
- case 4:
- ftParams.mode4SP[cvtt::Tables::BC7Prio::UnpackRotation(packedMode)][cvtt::Tables::BC7Prio::UnpackIndexSelector(packedMode)] = seedPoints;
- break;
- case 5:
- ftParams.mode5SP[cvtt::Tables::BC7Prio::UnpackRotation(packedMode)] = seedPoints;
- break;
- case 6:
- ftParams.mode6SP = seedPoints;
- break;
- case 7:
- ftParams.mode7SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints;
- break;
- }
- }
- }
- ConfigureBC7EncodingPlanFromFineTuningParams(encodingPlan, ftParams);
- }
- // Generates a BC7 encoding plan from fine-tuning parameters.
- bool cvtt::Kernels::ConfigureBC7EncodingPlanFromFineTuningParams(BC7EncodingPlan &encodingPlan, const BC7FineTuningParams ¶ms)
- {
- memset(&encodingPlan, 0, sizeof(encodingPlan));
- // Mode 0
- for (int partition = 0; partition < 16; partition++)
- {
- uint8_t sp = params.mode0SP[partition];
- if (sp == 0)
- continue;
- encodingPlan.mode0PartitionEnabled |= static_cast<uint16_t>(1) << partition;
- for (int subset = 0; subset < 3; subset++)
- {
- int shape = cvtt::Internal::BC7Data::g_shapes3[partition][subset];
- encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp);
- }
- }
- // Mode 1
- for (int partition = 0; partition < 64; partition++)
- {
- uint8_t sp = params.mode1SP[partition];
- if (sp == 0)
- continue;
- encodingPlan.mode1PartitionEnabled |= static_cast<uint64_t>(1) << partition;
- for (int subset = 0; subset < 2; subset++)
- {
- int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset];
- encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp);
- }
- }
- // Mode 2
- for (int partition = 0; partition < 64; partition++)
- {
- uint8_t sp = params.mode2SP[partition];
- if (sp == 0)
- continue;
- encodingPlan.mode2PartitionEnabled |= static_cast<uint64_t>(1) << partition;
- for (int subset = 0; subset < 3; subset++)
- {
- int shape = cvtt::Internal::BC7Data::g_shapes3[partition][subset];
- encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp);
- }
- }
- // Mode 3
- for (int partition = 0; partition < 64; partition++)
- {
- uint8_t sp = params.mode3SP[partition];
- if (sp == 0)
- continue;
- encodingPlan.mode3PartitionEnabled |= static_cast<uint64_t>(1) << partition;
- for (int subset = 0; subset < 2; subset++)
- {
- int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset];
- encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp);
- }
- }
- // Mode 4
- for (int rotation = 0; rotation < 4; rotation++)
- {
- for (int indexMode = 0; indexMode < 2; indexMode++)
- encodingPlan.mode4SP[rotation][indexMode] = params.mode4SP[rotation][indexMode];
- }
- // Mode 5
- for (int rotation = 0; rotation < 4; rotation++)
- encodingPlan.mode5SP[rotation] = params.mode5SP[rotation];
- // Mode 6
- {
- uint8_t sp = params.mode6SP;
- if (sp != 0)
- {
- encodingPlan.mode6Enabled = true;
- int shape = cvtt::Internal::BC7Data::g_shapes1[0][0];
- encodingPlan.seedPointsForShapeRGBA[shape] = std::max(encodingPlan.seedPointsForShapeRGBA[shape], sp);
- }
- }
- // Mode 7
- for (int partition = 0; partition < 64; partition++)
- {
- uint8_t sp = params.mode7SP[partition];
- if (sp == 0)
- continue;
- encodingPlan.mode7RGBAPartitionEnabled |= static_cast<uint64_t>(1) << partition;
- for (int subset = 0; subset < 2; subset++)
- {
- int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset];
- encodingPlan.seedPointsForShapeRGBA[shape] = std::max(encodingPlan.seedPointsForShapeRGBA[shape], sp);
- }
- }
- for (int i = 0; i < BC7EncodingPlan::kNumRGBShapes; i++)
- {
- if (encodingPlan.seedPointsForShapeRGB[i] > 0)
- {
- encodingPlan.rgbShapeList[encodingPlan.rgbNumShapesToEvaluate] = i;
- encodingPlan.rgbNumShapesToEvaluate++;
- }
- }
- for (int i = 0; i < BC7EncodingPlan::kNumRGBAShapes; i++)
- {
- if (encodingPlan.seedPointsForShapeRGBA[i] > 0)
- {
- encodingPlan.rgbaShapeList[encodingPlan.rgbaNumShapesToEvaluate] = i;
- encodingPlan.rgbaNumShapesToEvaluate++;
- }
- }
- encodingPlan.mode7RGBPartitionEnabled = (encodingPlan.mode7RGBAPartitionEnabled & ~encodingPlan.mode3PartitionEnabled);
- return true;
- }
- #endif
|