12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759 |
- /* SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Copyright:
- * 2018-2020 Evan Nemerson <evan@nemerson.com>
- * 2019-2020 Michael R. Crusoe <crusoe@debian.org>
- * 2020 Himanshi Mathur <himanshi18037@iiitd.ac.in>
- * 2020 Hidayat Khan <huk2209@gmail.com>
- */
- #if !defined(SIMDE_X86_AVX2_H)
- #define SIMDE_X86_AVX2_H
- #include "avx.h"
- HEDLEY_DIAGNOSTIC_PUSH
- SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
- SIMDE_BEGIN_DECLS_
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_abs_epi8 (simde__m256i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_abs_epi8(a);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_abs_epi8(a_.m128i[0]);
- r_.m128i[1] = simde_mm_abs_epi8(a_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
- r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_abs_epi8
- #define _mm256_abs_epi8(a) simde_mm256_abs_epi8(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_abs_epi16 (simde__m256i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_abs_epi16(a);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_abs_epi16(a_.m128i[0]);
- r_.m128i[1] = simde_mm_abs_epi16(a_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_abs_epi16
- #define _mm256_abs_epi16(a) simde_mm256_abs_epi16(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_abs_epi32(simde__m256i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_abs_epi32(a);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_abs_epi32(a_.m128i[0]);
- r_.m128i[1] = simde_mm_abs_epi32(a_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
- r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_abs_epi32
- #define _mm256_abs_epi32(a) simde_mm256_abs_epi32(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_add_epi8 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_add_epi8(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_add_epi8(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_add_epi8(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i8 = a_.i8 + b_.i8;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
- r_.i8[i] = a_.i8[i] + b_.i8[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_add_epi8
- #define _mm256_add_epi8(a, b) simde_mm256_add_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_add_epi16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_add_epi16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_add_epi16(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_add_epi16(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i16 = a_.i16 + b_.i16;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = a_.i16[i] + b_.i16[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_add_epi16
- #define _mm256_add_epi16(a, b) simde_mm256_add_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_hadd_epi16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_hadd_epi16(a, b);
- #else
- return simde_mm256_add_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b));
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_hadd_epi16
- #define _mm256_hadd_epi16(a, b) simde_mm256_hadd_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_add_epi32 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_add_epi32(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_add_epi32(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_add_epi32(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32 = a_.i32 + b_.i32;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = a_.i32[i] + b_.i32[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_add_epi32
- #define _mm256_add_epi32(a, b) simde_mm256_add_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_hadd_epi32 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_hadd_epi32(a, b);
- #else
- return simde_mm256_add_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b));
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_hadd_epi32
- #define _mm256_hadd_epi32(a, b) simde_mm256_hadd_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_add_epi64 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_add_epi64(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_add_epi64(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_add_epi64(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS)
- r_.i64 = a_.i64 + b_.i64;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- r_.i64[i] = a_.i64[i] + b_.i64[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_add_epi64
- #define _mm256_add_epi64(a, b) simde_mm256_add_epi64(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_alignr_epi8 (simde__m256i a, simde__m256i b, int count)
- SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) {
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- if (HEDLEY_UNLIKELY(count > 31))
- return simde_mm256_setzero_si256();
- for (size_t h = 0 ; h < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; h++) {
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) {
- const int srcpos = count + HEDLEY_STATIC_CAST(int, i);
- if (srcpos > 31) {
- r_.m128i_private[h].i8[i] = 0;
- } else if (srcpos > 15) {
- r_.m128i_private[h].i8[i] = a_.m128i_private[h].i8[(srcpos) & 15];
- } else {
- r_.m128i_private[h].i8[i] = b_.m128i_private[h].i8[srcpos];
- }
- }
- }
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_BUG_PGI_30106)
- # define simde_mm256_alignr_epi8(a, b, count) _mm256_alignr_epi8(a, b, count)
- #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- # define simde_mm256_alignr_epi8(a, b, count) \
- simde_mm256_set_m128i( \
- simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (count)), \
- simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (count)))
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_alignr_epi8
- #define _mm256_alignr_epi8(a, b, count) simde_mm256_alignr_epi8(a, b, (count))
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_and_si256 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_and_si256(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_and_si128(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_and_si128(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32f = a_.i32f & b_.i32f;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- r_.i64[i] = a_.i64[i] & b_.i64[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_and_si256
- #define _mm256_and_si256(a, b) simde_mm256_and_si256(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_andnot_si256 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_andnot_si256(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_andnot_si128(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_andnot_si128(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
- r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_andnot_si256
- #define _mm256_andnot_si256(a, b) simde_mm256_andnot_si256(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_adds_epi8 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_adds_epi8(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_adds_epi8(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_adds_epi8(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
- r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]);
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_adds_epi8
- #define _mm256_adds_epi8(a, b) simde_mm256_adds_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_adds_epi16(simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_adds_epi16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_adds_epi16(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_adds_epi16(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]);
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_adds_epi16
- #define _mm256_adds_epi16(a, b) simde_mm256_adds_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_hadds_epi16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_hadds_epi16(a, b);
- #else
- return simde_mm256_adds_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b));
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_hadds_epi16
- #define _mm256_hadds_epi16(a, b) simde_mm256_hadds_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_adds_epu8 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_adds_epu8(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_adds_epu8(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_adds_epu8(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
- r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]);
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_adds_epu8
- #define _mm256_adds_epu8(a, b) simde_mm256_adds_epu8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_adds_epu16(simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_adds_epu16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_adds_epu16(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_adds_epu16(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
- r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]);
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_adds_epu16
- #define _mm256_adds_epu16(a, b) simde_mm256_adds_epu16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_avg_epu8 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_avg_epu8(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
- r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1;
- }
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_avg_epu8
- #define _mm256_avg_epu8(a, b) simde_mm256_avg_epu8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_avg_epu16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_avg_epu16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
- r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1;
- }
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_avg_epu16
- #define _mm256_avg_epu16(a, b) simde_mm256_avg_epu16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm_blend_epi32(simde__m128i a, simde__m128i b, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) {
- simde__m128i_private
- r_,
- a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i];
- }
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm_blend_epi32(a, b, imm8) _mm_blend_epi32(a, b, imm8)
- #elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128)
- # define simde_mm_blend_epi32(a, b, imm8) \
- simde_mm_castps_si128(simde_mm_blend_ps(simde_mm_castsi128_ps(a), simde_mm_castsi128_ps(b), (imm8)))
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_blend_epi32
- #define _mm_blend_epi32(a, b, imm8) simde_mm_blend_epi32(a, b, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_blend_epi16(simde__m256i a, simde__m256i b, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = ((imm8 >> i%8) & 1) ? b_.i16[i] : a_.i16[i];
- }
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_BUG_CLANG_REV_234560)
- # define simde_mm256_blend_epi16(a, b, imm8) _mm256_castpd_si256(_mm256_blend_epi16(a, b, imm8))
- #elif defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm256_blend_epi16(a, b, imm8) _mm256_blend_epi16(a, b, imm8)
- #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- # define simde_mm256_blend_epi16(a, b, imm8) \
- simde_mm256_set_m128i( \
- simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8)), \
- simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8)))
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_blend_epi16
- #define _mm256_blend_epi16(a, b, imm8) simde_mm256_blend_epi16(a, b, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_blend_epi32(simde__m256i a, simde__m256i b, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i];
- }
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm256_blend_epi32(a, b, imm8) _mm256_blend_epi32(a, b, imm8)
- #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- # define simde_mm256_blend_epi32(a, b, imm8) \
- simde_mm256_set_m128i( \
- simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8) >> 4), \
- simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8) & 0x0F))
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_blend_epi32
- #define _mm256_blend_epi32(a, b, imm8) simde_mm256_blend_epi32(a, b, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_blendv_epi8(simde__m256i a, simde__m256i b, simde__m256i mask) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_blendv_epi8(a, b, mask);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b),
- mask_ = simde__m256i_to_private(mask);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_blendv_epi8(a_.m128i[0], b_.m128i[0], mask_.m128i[0]);
- r_.m128i[1] = simde_mm_blendv_epi8(a_.m128i[1], b_.m128i[1], mask_.m128i[1]);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- __typeof__(mask_.i8) tmp = mask_.i8 >> 7;
- r_.i8 = (tmp & b_.i8) | (~tmp & a_.i8);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
- int8_t tmp = mask_.i8[i] >> 7;
- r_.i8[i] = (tmp & b_.i8[i]) | (~tmp & a_.i8[i]);
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm256_blendv_epi8(a, b, imm8) _mm256_blendv_epi8(a, b, imm8)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_blendv_epi8
- #define _mm256_blendv_epi8(a, b, mask) simde_mm256_blendv_epi8(a, b, mask)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm_broadcastb_epi8 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm_broadcastb_epi8(a);
- #else
- simde__m128i_private r_;
- simde__m128i_private a_= simde__m128i_to_private(a);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
- r_.i8[i] = a_.i8[0];
- }
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_broadcastb_epi8
- #define _mm_broadcastb_epi8(a) simde_mm_broadcastb_epi8(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_broadcastb_epi8 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_broadcastb_epi8(a);
- #else
- simde__m256i_private r_;
- simde__m128i_private a_= simde__m128i_to_private(a);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
- r_.i8[i] = a_.i8[0];
- }
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_broadcastb_epi8
- #define _mm256_broadcastb_epi8(a) simde_mm256_broadcastb_epi8(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm_broadcastw_epi16 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm_broadcastw_epi16(a);
- #else
- simde__m128i_private r_;
- simde__m128i_private a_= simde__m128i_to_private(a);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = a_.i16[0];
- }
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_broadcastw_epi16
- #define _mm_broadcastw_epi16(a) simde_mm_broadcastw_epi16(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_broadcastw_epi16 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_broadcastw_epi16(a);
- #else
- simde__m256i_private r_;
- simde__m128i_private a_= simde__m128i_to_private(a);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = a_.i16[0];
- }
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_broadcastw_epi16
- #define _mm256_broadcastw_epi16(a) simde_mm256_broadcastw_epi16(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm_broadcastd_epi32 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm_broadcastd_epi32(a);
- #else
- simde__m128i_private r_;
- simde__m128i_private a_= simde__m128i_to_private(a);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = a_.i32[0];
- }
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_broadcastd_epi32
- #define _mm_broadcastd_epi32(a) simde_mm_broadcastd_epi32(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_broadcastd_epi32 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_broadcastd_epi32(a);
- #else
- simde__m256i_private r_;
- simde__m128i_private a_= simde__m128i_to_private(a);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = a_.i32[0];
- }
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_broadcastd_epi32
- #define _mm256_broadcastd_epi32(a) simde_mm256_broadcastd_epi32(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm_broadcastq_epi64 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm_broadcastq_epi64(a);
- #else
- simde__m128i_private r_;
- simde__m128i_private a_= simde__m128i_to_private(a);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- r_.i64[i] = a_.i64[0];
- }
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_broadcastq_epi64
- #define _mm_broadcastq_epi64(a) simde_mm_broadcastq_epi64(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_broadcastq_epi64 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_broadcastq_epi64(a);
- #else
- simde__m256i_private r_;
- simde__m128i_private a_= simde__m128i_to_private(a);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- r_.i64[i] = a_.i64[0];
- }
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_broadcastq_epi64
- #define _mm256_broadcastq_epi64(a) simde_mm256_broadcastq_epi64(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128
- simde_mm_broadcastss_ps (simde__m128 a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm_broadcastss_ps(a);
- #elif defined(SIMDE_X86_SSE_NATIVE)
- return simde_mm_shuffle_ps(a, a, 0);
- #else
- simde__m128_private r_;
- simde__m128_private a_= simde__m128_to_private(a);
- #if defined(SIMDE_SHUFFLE_VECTOR_)
- r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
- r_.f32[i] = a_.f32[0];
- }
- #endif
- return simde__m128_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_broadcastss_ps
- #define _mm_broadcastss_ps(a) simde_mm_broadcastss_ps(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256
- simde_mm256_broadcastss_ps (simde__m128 a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_broadcastss_ps(a);
- #else
- simde__m256_private r_;
- simde__m128_private a_= simde__m128_to_private(a);
- #if defined(SIMDE_X86_AVX_NATIVE)
- __m128 tmp = _mm_permute_ps(a_.n, 0);
- r_.n = _mm256_insertf128_ps(_mm256_castps128_ps256(tmp), tmp, 1);
- #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
- r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 0, 0, 0, 0, 0, 0, 0);
- #elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128)
- r_.m128[0] = r_.m128[1] = simde_mm_broadcastss_ps(simde__m128_from_private(a_));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
- r_.f32[i] = a_.f32[0];
- }
- #endif
- return simde__m256_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_broadcastss_ps
- #define _mm256_broadcastss_ps(a) simde_mm256_broadcastss_ps(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d
- simde_mm_broadcastsd_pd (simde__m128d a) {
- return simde_mm_movedup_pd(a);
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_broadcastsd_pd
- #define _mm_broadcastsd_pd(a) simde_mm_broadcastsd_pd(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256d
- simde_mm256_broadcastsd_pd (simde__m128d a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_broadcastsd_pd(a);
- #else
- simde__m256d_private r_;
- simde__m128d_private a_= simde__m128d_to_private(a);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
- r_.f64[i] = a_.f64[0];
- }
- return simde__m256d_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_broadcastsd_pd
- #define _mm256_broadcastsd_pd(a) simde_mm256_broadcastsd_pd(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_broadcastsi128_si256 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE) && \
- (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0))
- return _mm256_broadcastsi128_si256(a);
- #else
- simde__m256i_private r_;
- simde__m128i_private a_ = simde__m128i_to_private(a);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i_private[0] = a_;
- r_.m128i_private[1] = a_;
- #else
- r_.i64[0] = a_.i64[0];
- r_.i64[1] = a_.i64[1];
- r_.i64[2] = a_.i64[0];
- r_.i64[3] = a_.i64[1];
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #define simde_mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a)
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_broadcastsi128_si256
- #define _mm256_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a)
- #undef _mm_broadcastsi128_si256
- #define _mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_bslli_epi128 (simde__m256i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0])));
- SIMDE_VECTORIZE
- for (int i = 0 ; i < ssize ; i++) {
- const int e = i - imm8;
- if(i >= (ssize/2)) {
- if(e >= (ssize/2) && e < ssize)
- r_.i8[i] = a_.i8[e];
- else
- r_.i8[i] = 0;
- }
- else{
- if(e >= 0 && e < (ssize/2))
- r_.i8[i] = a_.i8[e];
- else
- r_.i8[i] = 0;
- }
- }
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE) && \
- (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \
- SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0)
- #define simde_mm256_bslli_epi128(a, imm8) _mm256_bslli_epi128(a, imm8)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_bslli_epi128
- #define _mm256_bslli_epi128(a, imm8) simde_mm256_bslli_epi128(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_bsrli_epi128 (simde__m256i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0])));
- SIMDE_VECTORIZE
- for (int i = 0 ; i < ssize ; i++) {
- const int e = i + imm8;
- if(i < (ssize/2)) {
- if(e >= 0 && e < (ssize/2))
- r_.i8[i] = a_.i8[e];
- else
- r_.i8[i] = 0;
- }
- else{
- if(e >= (ssize/2) && e < ssize)
- r_.i8[i] = a_.i8[e];
- else
- r_.i8[i] = 0;
- }
- }
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE) && \
- (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \
- SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0)
- #define simde_mm256_bsrli_epi128(a, imm8) _mm256_bsrli_epi128(a, imm8)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_bsrli_epi128
- #define _mm256_bsrli_epi128(a, imm8) simde_mm256_bsrli_epi128(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_cmpeq_epi8 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_cmpeq_epi8(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_cmpeq_epi8(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_cmpeq_epi8(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
- r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_cmpeq_epi8
- #define _mm256_cmpeq_epi8(a, b) simde_mm256_cmpeq_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_cmpeq_epi16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_cmpeq_epi16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_cmpeq_epi16(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_cmpeq_epi16(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_cmpeq_epi16
- #define _mm256_cmpeq_epi16(a, b) simde_mm256_cmpeq_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_cmpeq_epi32 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_cmpeq_epi32(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_cmpeq_epi32(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_cmpeq_epi32(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_cmpeq_epi32
- #define _mm256_cmpeq_epi32(a, b) simde_mm256_cmpeq_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_cmpeq_epi64 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_cmpeq_epi64(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_cmpeq_epi64(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_cmpeq_epi64(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_cmpeq_epi64
- #define _mm256_cmpeq_epi64(a, b) simde_mm256_cmpeq_epi64(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_cmpgt_epi8 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_cmpgt_epi8(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_cmpgt_epi8(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_cmpgt_epi8(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 > b_.i8);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
- r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_cmpgt_epi8
- #define _mm256_cmpgt_epi8(a, b) simde_mm256_cmpgt_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_cmpgt_epi16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_cmpgt_epi16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_cmpgt_epi16(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_cmpgt_epi16(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i16 = a_.i16 > b_.i16;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_cmpgt_epi16
- #define _mm256_cmpgt_epi16(a, b) simde_mm256_cmpgt_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_cmpgt_epi32 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_cmpgt_epi32(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_cmpgt_epi32(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_cmpgt_epi32(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 > b_.i32);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_cmpgt_epi32
- #define _mm256_cmpgt_epi32(a, b) simde_mm256_cmpgt_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_cmpgt_epi64 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_cmpgt_epi64(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_cmpgt_epi64(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_cmpgt_epi64(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_cmpgt_epi64
- #define _mm256_cmpgt_epi64(a, b) simde_mm256_cmpgt_epi64(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_cvtepi8_epi16 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_cvtepi8_epi16(a);
- #else
- simde__m256i_private r_;
- simde__m128i_private a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_CONVERT_VECTOR_)
- SIMDE_CONVERT_VECTOR_(r_.i16, a_.i8);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = a_.i8[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_cvtepi8_epi16
- #define _mm256_cvtepi8_epi16(a) simde_mm256_cvtepi8_epi16(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_cvtepi8_epi32 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_cvtepi8_epi32(a);
- #else
- simde__m256i_private r_;
- simde__m128i_private a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_CONVERT_VECTOR_)
- SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].i8);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = a_.i8[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_cvtepi8_epi32
- #define _mm256_cvtepi8_epi32(a) simde_mm256_cvtepi8_epi32(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_cvtepi8_epi64 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_cvtepi8_epi64(a);
- #else
- simde__m256i_private r_;
- simde__m128i_private a_ = simde__m128i_to_private(a);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- r_.i64[i] = a_.i8[i];
- }
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_cvtepi8_epi64
- #define _mm256_cvtepi8_epi64(a) simde_mm256_cvtepi8_epi64(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_cvtepi16_epi32 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_cvtepi16_epi32(a);
- #else
- simde__m256i_private r_;
- simde__m128i_private a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_CONVERT_VECTOR_)
- SIMDE_CONVERT_VECTOR_(r_.i32, a_.i16);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = a_.i16[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_cvtepi16_epi32
- #define _mm256_cvtepi16_epi32(a) simde_mm256_cvtepi16_epi32(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_cvtepi16_epi64 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_cvtepi16_epi64(a);
- #else
- simde__m256i_private r_;
- simde__m128i_private a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_CONVERT_VECTOR_)
- SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i16);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- r_.i64[i] = a_.i16[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_cvtepi16_epi64
- #define _mm256_cvtepi16_epi64(a) simde_mm256_cvtepi16_epi64(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_cvtepi32_epi64 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_cvtepi32_epi64(a);
- #else
- simde__m256i_private r_;
- simde__m128i_private a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_CONVERT_VECTOR_)
- SIMDE_CONVERT_VECTOR_(r_.i64, a_.i32);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- r_.i64[i] = a_.i32[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_cvtepi32_epi64
- #define _mm256_cvtepi32_epi64(a) simde_mm256_cvtepi32_epi64(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_cvtepu8_epi16 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_cvtepu8_epi16(a);
- #else
- simde__m256i_private r_;
- simde__m128i_private a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_CONVERT_VECTOR_)
- SIMDE_CONVERT_VECTOR_(r_.i16, a_.u8);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = a_.u8[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_cvtepu8_epi16
- #define _mm256_cvtepu8_epi16(a) simde_mm256_cvtepu8_epi16(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_cvtepu8_epi32 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_cvtepu8_epi32(a);
- #else
- simde__m256i_private r_;
- simde__m128i_private a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_CONVERT_VECTOR_)
- SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u8);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = a_.u8[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_cvtepu8_epi32
- #define _mm256_cvtepu8_epi32(a) simde_mm256_cvtepu8_epi32(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_cvtepu8_epi64 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_cvtepu8_epi64(a);
- #else
- simde__m256i_private r_;
- simde__m128i_private a_ = simde__m128i_to_private(a);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- r_.i64[i] = a_.u8[i];
- }
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_cvtepu8_epi64
- #define _mm256_cvtepu8_epi64(a) simde_mm256_cvtepu8_epi64(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_cvtepu16_epi32 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_cvtepu16_epi32(a);
- #else
- simde__m256i_private r_;
- simde__m128i_private a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_CONVERT_VECTOR_)
- SIMDE_CONVERT_VECTOR_(r_.i32, a_.u16);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = a_.u16[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_cvtepu16_epi32
- #define _mm256_cvtepu16_epi32(a) simde_mm256_cvtepu16_epi32(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_cvtepu16_epi64 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_cvtepu16_epi64(a);
- #else
- simde__m256i_private r_;
- simde__m128i_private a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_CONVERT_VECTOR_)
- SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u16);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- r_.i64[i] = a_.u16[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_cvtepu16_epi64
- #define _mm256_cvtepu16_epi64(a) simde_mm256_cvtepu16_epi64(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_cvtepu32_epi64 (simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_cvtepu32_epi64(a);
- #else
- simde__m256i_private r_;
- simde__m128i_private a_ = simde__m128i_to_private(a);
- #if defined(SIMDE_CONVERT_VECTOR_)
- SIMDE_CONVERT_VECTOR_(r_.i64, a_.u32);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- r_.i64[i] = a_.u32[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_cvtepu32_epi64
- #define _mm256_cvtepu32_epi64(a) simde_mm256_cvtepu32_epi64(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int
- simde_mm256_extract_epi8 (simde__m256i a, const int index)
- SIMDE_REQUIRE_RANGE(index, 0, 31){
- simde__m256i_private a_ = simde__m256i_to_private(a);
- return a_.i8[index];
- }
- #if defined(SIMDE_X86_AVX2_NATIVE) && \
- (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0))
- #define simde_mm256_extract_epi8(a, index) _mm256_extract_epi8(a, index)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_extract_epi8
- #define _mm256_extract_epi8(a, index) simde_mm256_extract_epi8(a, index)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int
- simde_mm256_extract_epi16 (simde__m256i a, const int index)
- SIMDE_REQUIRE_RANGE(index, 0, 15) {
- simde__m256i_private a_ = simde__m256i_to_private(a);
- return a_.i16[index];
- }
- #if defined(SIMDE_X86_AVX2_NATIVE) && \
- (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0))
- #define simde_mm256_extract_epi16(a, index) _mm256_extract_epi16(a, index)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_extract_epi16
- #define _mm256_extract_epi16(a, index) simde_mm256_extract_epi16(a, index)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm256_extracti128_si256 (simde__m256i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) {
- simde__m256i_private a_ = simde__m256i_to_private(a);
- return a_.m128i[imm8];
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm256_extracti128_si256(a, imm8) _mm256_extracti128_si256(a, imm8)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_extracti128_si256
- #define _mm256_extracti128_si256(a, imm8) simde_mm256_extracti128_si256(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm_i32gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m128i_private
- vindex_ = simde__m128i_to_private(vindex),
- r_;
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {
- const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- int32_t dst;
- simde_memcpy(&dst, src, sizeof(dst));
- r_.i32[i] = dst;
- }
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm_i32gather_epi32(base_addr, vindex, scale) _mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_i32gather_epi32
- #define _mm_i32gather_epi32(base_addr, vindex, scale) simde_mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm_mask_i32gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m128i_private
- vindex_ = simde__m128i_to_private(vindex),
- src_ = simde__m128i_to_private(src),
- mask_ = simde__m128i_to_private(mask),
- r_;
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {
- if ((mask_.i32[i] >> 31) & 1) {
- const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- int32_t dst;
- simde_memcpy(&dst, src1, sizeof(dst));
- r_.i32[i] = dst;
- }
- else {
- r_.i32[i] = src_.i32[i];
- }
- }
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_mask_i32gather_epi32
- #define _mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_i32gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m256i_private
- vindex_ = simde__m256i_to_private(vindex),
- r_;
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {
- const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- int32_t dst;
- simde_memcpy(&dst, src, sizeof(dst));
- r_.i32[i] = dst;
- }
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm256_i32gather_epi32(base_addr, vindex, scale) _mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_i32gather_epi32
- #define _mm256_i32gather_epi32(base_addr, vindex, scale) simde_mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_mask_i32gather_epi32(simde__m256i src, const int32_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m256i_private
- vindex_ = simde__m256i_to_private(vindex),
- src_ = simde__m256i_to_private(src),
- mask_ = simde__m256i_to_private(mask),
- r_;
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {
- if ((mask_.i32[i] >> 31) & 1) {
- const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- int32_t dst;
- simde_memcpy(&dst, src1, sizeof(dst));
- r_.i32[i] = dst;
- }
- else {
- r_.i32[i] = src_.i32[i];
- }
- }
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_mask_i32gather_epi32
- #define _mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm_i64gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m128i_private
- vindex_ = simde__m128i_to_private(vindex),
- r_ = simde__m128i_to_private(simde_mm_setzero_si128());
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {
- const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- int32_t dst;
- simde_memcpy(&dst, src, sizeof(dst));
- r_.i32[i] = dst;
- }
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm_i64gather_epi32(base_addr, vindex, scale) _mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_i64gather_epi32
- #define _mm_i64gather_epi32(base_addr, vindex, scale) simde_mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m128i_private
- vindex_ = simde__m128i_to_private(vindex),
- src_ = simde__m128i_to_private(src),
- mask_ = simde__m128i_to_private(mask),
- r_ = simde__m128i_to_private(simde_mm_setzero_si128());
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {
- if ((mask_.i32[i] >> 31) & 1) {
- const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- int32_t dst;
- simde_memcpy(&dst, src1, sizeof(dst));
- r_.i32[i] = dst;
- }
- else {
- r_.i32[i] = src_.i32[i];
- }
- }
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_mask_i64gather_epi32
- #define _mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm256_i64gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m256i_private
- vindex_ = simde__m256i_to_private(vindex);
- simde__m128i_private
- r_ = simde__m128i_to_private(simde_mm_setzero_si128());
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {
- const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- int32_t dst;
- simde_memcpy(&dst, src, sizeof(dst));
- r_.i32[i] = dst;
- }
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm256_i64gather_epi32(base_addr, vindex, scale) _mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_i64gather_epi32
- #define _mm256_i64gather_epi32(base_addr, vindex, scale) simde_mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm256_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m256i vindex, simde__m128i mask, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m256i_private
- vindex_ = simde__m256i_to_private(vindex);
- simde__m128i_private
- src_ = simde__m128i_to_private(src),
- mask_ = simde__m128i_to_private(mask),
- r_ = simde__m128i_to_private(simde_mm_setzero_si128());
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {
- if ((mask_.i32[i] >> 31) & 1) {
- const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- int32_t dst;
- simde_memcpy(&dst, src1, sizeof(dst));
- r_.i32[i] = dst;
- }
- else {
- r_.i32[i] = src_.i32[i];
- }
- }
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_mask_i64gather_epi32
- #define _mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m128i_private
- vindex_ = simde__m128i_to_private(vindex),
- r_;
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- int64_t dst;
- simde_memcpy(&dst, src, sizeof(dst));
- r_.i64[i] = dst;
- }
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)
- #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale)
- #else
- #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale)
- #endif
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_i32gather_epi64
- #define _mm_i32gather_epi64(base_addr, vindex, scale) simde_mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm_mask_i32gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m128i_private
- vindex_ = simde__m128i_to_private(vindex),
- src_ = simde__m128i_to_private(src),
- mask_ = simde__m128i_to_private(mask),
- r_;
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- if ((mask_.i64[i] >> 63) & 1) {
- const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- int64_t dst;
- simde_memcpy(&dst, src1, sizeof(dst));
- r_.i64[i] = dst;
- }
- else {
- r_.i64[i] = src_.i64[i];
- }
- }
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)
- #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale)
- #else
- #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale)
- #endif
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_mask_i32gather_epi64
- #define _mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m128i_private
- vindex_ = simde__m128i_to_private(vindex);
- simde__m256i_private
- r_;
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {
- const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- int64_t dst;
- simde_memcpy(&dst, src, sizeof(dst));
- r_.i64[i] = dst;
- }
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)
- #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale)
- #else
- #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale)
- #endif
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_i32gather_epi64
- #define _mm256_i32gather_epi64(base_addr, vindex, scale) simde_mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_mask_i32gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m128i vindex, simde__m256i mask, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m256i_private
- src_ = simde__m256i_to_private(src),
- mask_ = simde__m256i_to_private(mask),
- r_;
- simde__m128i_private
- vindex_ = simde__m128i_to_private(vindex);
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {
- if ((mask_.i64[i] >> 63) & 1) {
- const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- int64_t dst;
- simde_memcpy(&dst, src1, sizeof(dst));
- r_.i64[i] = dst;
- }
- else {
- r_.i64[i] = src_.i64[i];
- }
- }
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)
- #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale)
- #else
- #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale)
- #endif
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_mask_i32gather_epi64
- #define _mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm_i64gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m128i_private
- vindex_ = simde__m128i_to_private(vindex),
- r_ = simde__m128i_to_private(simde_mm_setzero_si128());
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {
- const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- int64_t dst;
- simde_memcpy(&dst, src, sizeof(dst));
- r_.i64[i] = dst;
- }
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)
- #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale)
- #else
- #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale)
- #endif
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_i64gather_epi64
- #define _mm_i64gather_epi64(base_addr, vindex, scale) simde_mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm_mask_i64gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m128i_private
- vindex_ = simde__m128i_to_private(vindex),
- src_ = simde__m128i_to_private(src),
- mask_ = simde__m128i_to_private(mask),
- r_ = simde__m128i_to_private(simde_mm_setzero_si128());
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {
- if ((mask_.i64[i] >> 63) & 1) {
- const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- int64_t dst;
- simde_memcpy(&dst, src1, sizeof(dst));
- r_.i64[i] = dst;
- }
- else {
- r_.i64[i] = src_.i64[i];
- }
- }
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)
- #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale)
- #else
- #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale)
- #endif
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_mask_i64gather_epi64
- #define _mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_i64gather_epi64(const int64_t* base_addr, simde__m256i vindex, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m256i_private
- vindex_ = simde__m256i_to_private(vindex),
- r_ = simde__m256i_to_private(simde_mm256_setzero_si256());
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {
- const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- int64_t dst;
- simde_memcpy(&dst, src, sizeof(dst));
- r_.i64[i] = dst;
- }
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)
- #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale)
- #else
- #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale)
- #endif
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_i64gather_epi64
- #define _mm256_i64gather_epi64(base_addr, vindex, scale) simde_mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_mask_i64gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m256i_private
- vindex_ = simde__m256i_to_private(vindex),
- src_ = simde__m256i_to_private(src),
- mask_ = simde__m256i_to_private(mask),
- r_ = simde__m256i_to_private(simde_mm256_setzero_si256());
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {
- if ((mask_.i64[i] >> 63) & 1) {
- const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- int64_t dst;
- simde_memcpy(&dst, src1, sizeof(dst));
- r_.i64[i] = dst;
- }
- else {
- r_.i64[i] = src_.i64[i];
- }
- }
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)
- #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale)
- #else
- #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale)
- #endif
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_mask_i64gather_epi64
- #define _mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128
- simde_mm_i32gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m128i_private
- vindex_ = simde__m128i_to_private(vindex);
- simde__m128_private
- r_;
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {
- const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- simde_float32 dst;
- simde_memcpy(&dst, src, sizeof(dst));
- r_.f32[i] = dst;
- }
- return simde__m128_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm_i32gather_ps(base_addr, vindex, scale) _mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_i32gather_ps
- #define _mm_i32gather_ps(base_addr, vindex, scale) simde_mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128
- simde_mm_mask_i32gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m128i_private
- vindex_ = simde__m128i_to_private(vindex);
- simde__m128_private
- src_ = simde__m128_to_private(src),
- mask_ = simde__m128_to_private(mask),
- r_;
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {
- if ((mask_.i32[i] >> 31) & 1) {
- const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- simde_float32 dst;
- simde_memcpy(&dst, src1, sizeof(dst));
- r_.f32[i] = dst;
- }
- else {
- r_.f32[i] = src_.f32[i];
- }
- }
- return simde__m128_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_mask_i32gather_ps
- #define _mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256
- simde_mm256_i32gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m256i_private
- vindex_ = simde__m256i_to_private(vindex);
- simde__m256_private
- r_;
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {
- const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- simde_float32 dst;
- simde_memcpy(&dst, src, sizeof(dst));
- r_.f32[i] = dst;
- }
- return simde__m256_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm256_i32gather_ps(base_addr, vindex, scale) _mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, (base_addr)), (vindex), (scale))
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_i32gather_ps
- #define _mm256_i32gather_ps(base_addr, vindex, scale) simde_mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, (base_addr)), (vindex), (scale))
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256
- simde_mm256_mask_i32gather_ps(simde__m256 src, const simde_float32* base_addr, simde__m256i vindex, simde__m256 mask, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m256i_private
- vindex_ = simde__m256i_to_private(vindex);
- simde__m256_private
- src_ = simde__m256_to_private(src),
- mask_ = simde__m256_to_private(mask),
- r_;
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {
- if ((mask_.i32[i] >> 31) & 1) {
- const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- simde_float32 dst;
- simde_memcpy(&dst, src1, sizeof(dst));
- r_.f32[i] = dst;
- }
- else {
- r_.f32[i] = src_.f32[i];
- }
- }
- return simde__m256_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_mask_i32gather_ps
- #define _mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128
- simde_mm_i64gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m128i_private
- vindex_ = simde__m128i_to_private(vindex);
- simde__m128_private
- r_ = simde__m128_to_private(simde_mm_setzero_ps());
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {
- const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- simde_float32 dst;
- simde_memcpy(&dst, src, sizeof(dst));
- r_.f32[i] = dst;
- }
- return simde__m128_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm_i64gather_ps(base_addr, vindex, scale) _mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_i64gather_ps
- #define _mm_i64gather_ps(base_addr, vindex, scale) simde_mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128
- simde_mm_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m128i_private
- vindex_ = simde__m128i_to_private(vindex);
- simde__m128_private
- src_ = simde__m128_to_private(src),
- mask_ = simde__m128_to_private(mask),
- r_ = simde__m128_to_private(simde_mm_setzero_ps());
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {
- if ((mask_.i32[i] >> 31) & 1) {
- const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- simde_float32 dst;
- simde_memcpy(&dst, src1, sizeof(dst));
- r_.f32[i] = dst;
- }
- else {
- r_.f32[i] = src_.f32[i];
- }
- }
- return simde__m128_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, float32_t const*, base_addr), vindex, mask, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_mask_i64gather_ps
- #define _mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128
- simde_mm256_i64gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m256i_private
- vindex_ = simde__m256i_to_private(vindex);
- simde__m128_private
- r_ = simde__m128_to_private(simde_mm_setzero_ps());
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {
- const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- simde_float32 dst;
- simde_memcpy(&dst, src, sizeof(dst));
- r_.f32[i] = dst;
- }
- return simde__m128_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm256_i64gather_ps(base_addr, vindex, scale) _mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_i64gather_ps
- #define _mm256_i64gather_ps(base_addr, vindex, scale) simde_mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128
- simde_mm256_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m256i vindex, simde__m128 mask, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m256i_private
- vindex_ = simde__m256i_to_private(vindex);
- simde__m128_private
- src_ = simde__m128_to_private(src),
- mask_ = simde__m128_to_private(mask),
- r_ = simde__m128_to_private(simde_mm_setzero_ps());
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {
- if ((mask_.i32[i] >> 31) & 1) {
- const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- simde_float32 dst;
- simde_memcpy(&dst, src1, sizeof(dst));
- r_.f32[i] = dst;
- }
- else {
- r_.f32[i] = src_.f32[i];
- }
- }
- return simde__m128_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_mask_i64gather_ps
- #define _mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d
- simde_mm_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m128i_private
- vindex_ = simde__m128i_to_private(vindex);
- simde__m128d_private
- r_;
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
- const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- simde_float64 dst;
- simde_memcpy(&dst, src, sizeof(dst));
- r_.f64[i] = dst;
- }
- return simde__m128d_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm_i32gather_pd(base_addr, vindex, scale) _mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_i32gather_pd
- #define _mm_i32gather_pd(base_addr, vindex, scale) simde_mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d
- simde_mm_mask_i32gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m128i_private
- vindex_ = simde__m128i_to_private(vindex);
- simde__m128d_private
- src_ = simde__m128d_to_private(src),
- mask_ = simde__m128d_to_private(mask),
- r_;
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
- if ((mask_.i64[i] >> 63) & 1) {
- const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- simde_float64 dst;
- simde_memcpy(&dst, src1, sizeof(dst));
- r_.f64[i] = dst;
- }
- else {
- r_.f64[i] = src_.f64[i];
- }
- }
- return simde__m128d_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_mask_i32gather_pd
- #define _mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256d
- simde_mm256_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m128i_private
- vindex_ = simde__m128i_to_private(vindex);
- simde__m256d_private
- r_;
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {
- const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- simde_float64 dst;
- simde_memcpy(&dst, src, sizeof(dst));
- r_.f64[i] = dst;
- }
- return simde__m256d_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm256_i32gather_pd(base_addr, vindex, scale) _mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_i32gather_pd
- #define _mm256_i32gather_pd(base_addr, vindex, scale) simde_mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256d
- simde_mm256_mask_i32gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m128i vindex, simde__m256d mask, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m256d_private
- src_ = simde__m256d_to_private(src),
- mask_ = simde__m256d_to_private(mask),
- r_;
- simde__m128i_private
- vindex_ = simde__m128i_to_private(vindex);
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) {
- if ((mask_.i64[i] >> 63) & 1) {
- const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- simde_float64 dst;
- simde_memcpy(&dst, src1, sizeof(dst));
- r_.f64[i] = dst;
- }
- else {
- r_.f64[i] = src_.f64[i];
- }
- }
- return simde__m256d_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_mask_i32gather_pd
- #define _mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d
- simde_mm_i64gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m128i_private
- vindex_ = simde__m128i_to_private(vindex);
- simde__m128d_private
- r_ = simde__m128d_to_private(simde_mm_setzero_pd());
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {
- const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- simde_float64 dst;
- simde_memcpy(&dst, src, sizeof(dst));
- r_.f64[i] = dst;
- }
- return simde__m128d_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm_i64gather_pd(base_addr, vindex, scale) _mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_i64gather_pd
- #define _mm_i64gather_pd(base_addr, vindex, scale) simde_mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128d
- simde_mm_mask_i64gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m128i_private
- vindex_ = simde__m128i_to_private(vindex);
- simde__m128d_private
- src_ = simde__m128d_to_private(src),
- mask_ = simde__m128d_to_private(mask),
- r_ = simde__m128d_to_private(simde_mm_setzero_pd());
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {
- if ((mask_.i64[i] >> 63) & 1) {
- const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- simde_float64 dst;
- simde_memcpy(&dst, src1, sizeof(dst));
- r_.f64[i] = dst;
- }
- else {
- r_.f64[i] = src_.f64[i];
- }
- }
- return simde__m128d_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_mask_i64gather_pd
- #define _mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256d
- simde_mm256_i64gather_pd(const simde_float64* base_addr, simde__m256i vindex, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m256i_private
- vindex_ = simde__m256i_to_private(vindex);
- simde__m256d_private
- r_ = simde__m256d_to_private(simde_mm256_setzero_pd());
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {
- const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- simde_float64 dst;
- simde_memcpy(&dst, src, sizeof(dst));
- r_.f64[i] = dst;
- }
- return simde__m256d_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm256_i64gather_pd(base_addr, vindex, scale) _mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_i64gather_pd
- #define _mm256_i64gather_pd(base_addr, vindex, scale) simde_mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256d
- simde_mm256_mask_i64gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m256i vindex, simde__m256d mask, const int32_t scale)
- SIMDE_REQUIRE_CONSTANT(scale)
- HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") {
- simde__m256i_private
- vindex_ = simde__m256i_to_private(vindex);
- simde__m256d_private
- src_ = simde__m256d_to_private(src),
- mask_ = simde__m256d_to_private(mask),
- r_ = simde__m256d_to_private(simde_mm256_setzero_pd());
- const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) {
- if ((mask_.i64[i] >> 63) & 1) {
- const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale));
- simde_float64 dst;
- simde_memcpy(&dst, src1, sizeof(dst));
- r_.f64[i] = dst;
- }
- else {
- r_.f64[i] = src_.f64[i];
- }
- }
- return simde__m256d_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_mask_i64gather_pd
- #define _mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_inserti128_si256(simde__m256i a, simde__m128i b, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) {
- simde__m256i_private a_ = simde__m256i_to_private(a);
- simde__m128i_private b_ = simde__m128i_to_private(b);
- a_.m128i_private[ imm8 & 1 ] = b_;
- return simde__m256i_from_private(a_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm256_inserti128_si256(a, b, imm8) _mm256_inserti128_si256(a, b, imm8)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_inserti128_si256
- #define _mm256_inserti128_si256(a, b, imm8) simde_mm256_inserti128_si256(a, b, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_madd_epi16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_madd_epi16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_madd_epi16(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_madd_epi16(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector)
- SIMDE_ALIGN_TO_32 int32_t product SIMDE_VECTOR(64);
- SIMDE_ALIGN_TO_32 int32_t a32x16 SIMDE_VECTOR(64);
- SIMDE_ALIGN_TO_32 int32_t b32x16 SIMDE_VECTOR(64);
- SIMDE_ALIGN_TO_32 int32_t even SIMDE_VECTOR(32);
- SIMDE_ALIGN_TO_32 int32_t odd SIMDE_VECTOR(32);
- SIMDE_CONVERT_VECTOR_(a32x16, a_.i16);
- SIMDE_CONVERT_VECTOR_(b32x16, b_.i16);
- product = a32x16 * b32x16;
- even = __builtin_shufflevector(product, product, 0, 2, 4, 6, 8, 10, 12, 14);
- odd = __builtin_shufflevector(product, product, 1, 3, 5, 7, 9, 11, 13, 15);
- r_.i32 = even + odd;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) {
- r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]);
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_madd_epi16
- #define _mm256_madd_epi16(a, b) simde_mm256_madd_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_maddubs_epi16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_maddubs_epi16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_maddubs_epi16(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_maddubs_epi16(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- const int idx = HEDLEY_STATIC_CAST(int, i) << 1;
- int32_t ts =
- (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) +
- (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1]));
- r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN;
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_maddubs_epi16
- #define _mm256_maddubs_epi16(a, b) simde_mm256_maddubs_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm_maskload_epi32(mem_addr, mask);
- #else
- simde__m128i_private
- r_,
- mask_ = simde__m128i_to_private(mask),
- mask_shr_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- mask_shr_.i32[i] = mask_.i32[i] >> 31;
- }
- #endif
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = mask_shr_.i32[i] ? mem_addr[i] : INT32_C(0);
- }
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_maskload_epi32
- #define _mm_maskload_epi32(mem_addr, mask) simde_mm_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_maskload_epi32(mem_addr, mask);
- #else
- simde__m256i_private
- mask_ = simde__m256i_to_private(mask),
- r_;
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : INT32_C(0);
- }
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_maskload_epi32
- #define _mm256_maskload_epi32(mem_addr, mask) simde_mm256_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask);
- #else
- simde__m128i_private
- r_,
- mask_ = simde__m128i_to_private(mask),
- mask_shr_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) {
- mask_shr_.i64[i] = mask_.i64[i] >> 63;
- }
- #endif
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- r_.i64[i] = mask_shr_.i64[i] ? mem_addr[i] : INT64_C(0);
- }
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_maskload_epi64
- #define _mm_maskload_epi64(mem_addr, mask) simde_mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask);
- #else
- simde__m256i_private
- mask_ = simde__m256i_to_private(mask),
- r_;
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- r_.i64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : INT64_C(0);
- }
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_maskload_epi64
- #define _mm256_maskload_epi64(mem_addr, mask) simde_mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void
- simde_mm_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- _mm_maskstore_epi32(mem_addr, mask, a);
- #else
- simde__m128i_private mask_ = simde__m128i_to_private(mask);
- simde__m128i_private a_ = simde__m128i_to_private(a);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
- if (mask_.u32[i] & (UINT32_C(1) << 31))
- mem_addr[i] = a_.i32[i];
- }
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_maskstore_epi32
- #define _mm_maskstore_epi32(mem_addr, mask, a) simde_mm_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void
- simde_mm256_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- _mm256_maskstore_epi32(mem_addr, mask, a);
- #else
- simde__m256i_private mask_ = simde__m256i_to_private(mask);
- simde__m256i_private a_ = simde__m256i_to_private(a);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
- if (mask_.u32[i] & (UINT32_C(1) << 31))
- mem_addr[i] = a_.i32[i];
- }
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_maskstore_epi32
- #define _mm256_maskstore_epi32(mem_addr, mask, a) simde_mm256_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void
- simde_mm_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- _mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a);
- #else
- simde__m128i_private mask_ = simde__m128i_to_private(mask);
- simde__m128i_private a_ = simde__m128i_to_private(a);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
- if (mask_.u64[i] >> 63)
- mem_addr[i] = a_.i64[i];
- }
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_maskstore_epi64
- #define _mm_maskstore_epi64(mem_addr, mask, a) simde_mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- void
- simde_mm256_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- _mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a);
- #else
- simde__m256i_private mask_ = simde__m256i_to_private(mask);
- simde__m256i_private a_ = simde__m256i_to_private(a);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
- if (mask_.u64[i] & (UINT64_C(1) << 63))
- mem_addr[i] = a_.i64[i];
- }
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_maskstore_epi64
- #define _mm256_maskstore_epi64(mem_addr, mask, a) simde_mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_max_epi8 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI)
- return _mm256_max_epi8(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_max_epi8(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_max_epi8(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
- r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_max_epi8
- #define _mm256_max_epi8(a, b) simde_mm256_max_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_max_epu8 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_max_epu8(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_max_epu8(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_max_epu8(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
- r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_max_epu8
- #define _mm256_max_epu8(a, b) simde_mm256_max_epu8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_max_epu16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_max_epu16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_max_epu16(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_max_epu16(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
- r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_max_epu16
- #define _mm256_max_epu16(a, b) simde_mm256_max_epu16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_max_epu32 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_max_epu32(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_max_epu32(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_max_epu32(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
- r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_max_epu32
- #define _mm256_max_epu32(a, b) simde_mm256_max_epu32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_max_epi16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_max_epi16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_max_epi16(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_max_epi16(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_max_epi16
- #define _mm256_max_epi16(a, b) simde_mm256_max_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_max_epi32 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_max_epi32(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_max_epi32(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_max_epi32(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_max_epi32
- #define _mm256_max_epi32(a, b) simde_mm256_max_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_min_epi8 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI)
- return _mm256_min_epi8(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_min_epi8(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_min_epi8(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
- r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_min_epi8
- #define _mm256_min_epi8(a, b) simde_mm256_min_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_min_epi16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_min_epi16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_min_epi16(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_min_epi16(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_min_epi16
- #define _mm256_min_epi16(a, b) simde_mm256_min_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_min_epi32 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_min_epi32(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_min_epi32(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_min_epi32(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_min_epi32
- #define _mm256_min_epi32(a, b) simde_mm256_min_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_min_epu8 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_min_epu8(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_min_epu8(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_min_epu8(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
- r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_min_epu8
- #define _mm256_min_epu8(a, b) simde_mm256_min_epu8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_min_epu16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_min_epu16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_min_epu16(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_min_epu16(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
- r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_min_epu16
- #define _mm256_min_epu16(a, b) simde_mm256_min_epu16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_min_epu32 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_min_epu32(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_min_epu32(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_min_epu32(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
- r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_min_epu32
- #define _mm256_min_epu32(a, b) simde_mm256_min_epu32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int32_t
- simde_mm256_movemask_epi8 (simde__m256i a) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_movemask_epi8(a);
- #else
- simde__m256i_private a_ = simde__m256i_to_private(a);
- uint32_t r = 0;
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) {
- r |= HEDLEY_STATIC_CAST(uint32_t,simde_mm_movemask_epi8(a_.m128i[i])) << (16 * i);
- }
- #else
- r = 0;
- SIMDE_VECTORIZE_REDUCTION(|:r)
- for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) {
- r |= HEDLEY_STATIC_CAST(uint32_t, (a_.u8[31 - i] >> 7)) << (31 - i);
- }
- #endif
- return HEDLEY_STATIC_CAST(int32_t, r);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_movemask_epi8
- #define _mm256_movemask_epi8(a) simde_mm256_movemask_epi8(a)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_mpsadbw_epu8 (simde__m256i a, simde__m256i b, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- const int a_offset1 = imm8 & 4;
- const int b_offset1 = (imm8 & 3) << 2;
- const int a_offset2 = (imm8 >> 3) & 4;
- const int b_offset2 = ((imm8 >> 3) & 3) << 2;
- #if defined(simde_math_abs)
- const int halfway_point = HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0])) ) / 2;
- for (int i = 0 ; i < halfway_point ; i++) {
- r_.u16[i] =
- HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 0] - b_.u8[b_offset1 + 0]))) +
- HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 1] - b_.u8[b_offset1 + 1]))) +
- HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 2] - b_.u8[b_offset1 + 2]))) +
- HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 3] - b_.u8[b_offset1 + 3])));
- r_.u16[halfway_point + i] =
- HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 0] - b_.u8[2 * halfway_point + b_offset2 + 0]))) +
- HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 1] - b_.u8[2 * halfway_point + b_offset2 + 1]))) +
- HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 2] - b_.u8[2 * halfway_point + b_offset2 + 2]))) +
- HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 3] - b_.u8[2 * halfway_point + b_offset2 + 3])));
- }
- #else
- HEDLEY_UNREACHABLE();
- #endif
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE) && SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0)
- #define simde_mm256_mpsadbw_epu8(a, b, imm8) _mm256_mpsadbw_epu8(a, b, imm8)
- #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- #define simde_mm256_mpsadbw_epu8(a, b, imm8) \
- simde_mm256_set_m128i( \
- simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8 >> 3)), \
- simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8)))
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_mpsadbw_epu8
- #define _mm256_mpsadbw_epu8(a, b, imm8) simde_mm256_mpsadbw_epu8(a, b, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_mul_epi32 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_mul_epi32(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_mul_epi32(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_mul_epi32(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- r_.i64[i] =
- HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) *
- HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]);
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- # define _mm256_mul_epi32(a, b) simde_mm256_mul_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_mul_epu32 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_mul_epu32(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_mul_epu32(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_mul_epu32(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
- r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]);
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- # define _mm256_mul_epu32(a, b) simde_mm256_mul_epu32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_mulhi_epi16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_mulhi_epi16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16));
- }
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- # define _mm256_mulhi_epi16(a, b) simde_mm256_mulhi_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_mulhi_epu16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_mulhi_epu16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
- r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16);
- }
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- # define _mm256_mulhi_epu16(a, b) simde_mm256_mulhi_epu16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_mulhrs_epi16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_mulhrs_epi16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15));
- }
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- # define _mm256_mulhrs_epi16(a, b) simde_mm256_mulhrs_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_mullo_epi16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_mullo_epi16(a, b);
- #else
- simde__m256i_private
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b),
- r_;
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] * b_.i16[i]);
- }
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_mullo_epi16
- #define _mm256_mullo_epi16(a, b) simde_mm256_mullo_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_mullo_epi32 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_mullo_epi32(a, b);
- #else
- simde__m256i_private
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b),
- r_;
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] * b_.i32[i]);
- }
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_mullo_epi32
- #define _mm256_mullo_epi32(a, b) simde_mm256_mullo_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_x_mm256_mullo_epu32 (simde__m256i a, simde__m256i b) {
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.u32 = a_.u32 * b_.u32;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
- r_.u32[i] = a_.u32[i] * b_.u32[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_or_si256 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_or_si256(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_or_si128(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_or_si128(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32f = a_.i32f | b_.i32f;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
- r_.i32f[i] = a_.i32f[i] | b_.i32f[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_or_si256
- #define _mm256_or_si256(a, b) simde_mm256_or_si256(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_packs_epi16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_packs_epi16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_packs_epi16(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_packs_epi16(a_.m128i[1], b_.m128i[1]);
- #else
- const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/2;
- const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/4;
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < quarter_point ; i++) {
- r_.i8[i] = (a_.i16[i] > INT8_MAX) ? INT8_MAX : ((a_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[i]));
- r_.i8[i + quarter_point] = (b_.i16[i] > INT8_MAX) ? INT8_MAX : ((b_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[i]));
- r_.i8[halfway_point + i] = (a_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + i]));
- r_.i8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + i]));
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_packs_epi16
- #define _mm256_packs_epi16(a, b) simde_mm256_packs_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_packs_epi32 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_packs_epi32(a, b);
- #else
- simde__m256i_private
- r_,
- v_[] = {
- simde__m256i_to_private(a),
- simde__m256i_to_private(b)
- };
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_packs_epi32(v_[0].m128i[0], v_[1].m128i[0]);
- r_.m128i[1] = simde_mm_packs_epi32(v_[0].m128i[1], v_[1].m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- const int32_t v = v_[(i >> 2) & 1].i32[(i & 11) - ((i & 8) >> 1)];
- r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (v > INT16_MAX) ? INT16_MAX : ((v < INT16_MIN) ? INT16_MIN : v));
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_packs_epi32
- #define _mm256_packs_epi32(a, b) simde_mm256_packs_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_packus_epi16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_packus_epi16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_packus_epi16(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_packus_epi16(a_.m128i[1], b_.m128i[1]);
- #else
- const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2;
- const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4;
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < quarter_point ; i++) {
- r_.u8[i] = (a_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]));
- r_.u8[i + quarter_point] = (b_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]));
- r_.u8[halfway_point + i] = (a_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + i]));
- r_.u8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + i]));
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_packus_epi16
- #define _mm256_packus_epi16(a, b) simde_mm256_packus_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_packus_epi32 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_packus_epi32(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_packus_epi32(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_packus_epi32(a_.m128i[1], b_.m128i[1]);
- #else
- const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2;
- const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4;
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < quarter_point ; i++) {
- r_.u16[i] = (a_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i]));
- r_.u16[i + quarter_point] = (b_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i]));
- r_.u16[halfway_point + i] = (a_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + i]));
- r_.u16[halfway_point + i + quarter_point] = (b_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point + i]));
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_packus_epi32
- #define _mm256_packus_epi32(a, b) simde_mm256_packus_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_permute2x128_si256 (simde__m256i a, simde__m256i b, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]);
- r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]);
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm256_permute2x128_si256(a, b, imm8) _mm256_permute2x128_si256(a, b, imm8)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_permute2x128_si256
- #define _mm256_permute2x128_si256(a, b, imm8) simde_mm256_permute2x128_si256(a, b, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_permute4x64_epi64 (simde__m256i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- r_.i64[0] = (imm8 & 0x02) ? a_.i64[((imm8 ) & 1)+2] : a_.i64[(imm8 ) & 1];
- r_.i64[1] = (imm8 & 0x08) ? a_.i64[((imm8 >> 2 ) & 1)+2] : a_.i64[(imm8 >> 2 ) & 1];
- r_.i64[2] = (imm8 & 0x20) ? a_.i64[((imm8 >> 4 ) & 1)+2] : a_.i64[(imm8 >> 4 ) & 1];
- r_.i64[3] = (imm8 & 0x80) ? a_.i64[((imm8 >> 6 ) & 1)+2] : a_.i64[(imm8 >> 6 ) & 1];
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm256_permute4x64_epi64(a, imm8) _mm256_permute4x64_epi64(a, imm8)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_permute4x64_epi64
- #define _mm256_permute4x64_epi64(a, imm8) simde_mm256_permute4x64_epi64(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256d
- simde_mm256_permute4x64_pd (simde__m256d a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {
- simde__m256d_private
- r_,
- a_ = simde__m256d_to_private(a);
- r_.f64[0] = (imm8 & 0x02) ? a_.f64[((imm8 ) & 1)+2] : a_.f64[(imm8 ) & 1];
- r_.f64[1] = (imm8 & 0x08) ? a_.f64[((imm8 >> 2 ) & 1)+2] : a_.f64[(imm8 >> 2 ) & 1];
- r_.f64[2] = (imm8 & 0x20) ? a_.f64[((imm8 >> 4 ) & 1)+2] : a_.f64[(imm8 >> 4 ) & 1];
- r_.f64[3] = (imm8 & 0x80) ? a_.f64[((imm8 >> 6 ) & 1)+2] : a_.f64[(imm8 >> 6 ) & 1];
- return simde__m256d_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm256_permute4x64_pd(a, imm8) _mm256_permute4x64_pd(a, imm8)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_permute4x64_pd
- #define _mm256_permute4x64_pd(a, imm8) simde_mm256_permute4x64_pd(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_permutevar8x32_epi32 (simde__m256i a, simde__m256i idx) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_permutevar8x32_epi32(a, idx);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- idx_ = simde__m256i_to_private(idx);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = a_.i32[idx_.i32[i] & 7];
- }
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_permutevar8x32_epi32
- #define _mm256_permutevar8x32_epi32(a, idx) simde_mm256_permutevar8x32_epi32(a, idx)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256
- simde_mm256_permutevar8x32_ps (simde__m256 a, simde__m256i idx) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0)
- return _mm256_permutevar8x32_ps(a, HEDLEY_REINTERPRET_CAST(simde__m256, idx));
- #else
- return _mm256_permutevar8x32_ps(a, idx);
- #endif
- #else
- simde__m256_private
- r_,
- a_ = simde__m256_to_private(a);
- simde__m256i_private
- idx_ = simde__m256i_to_private(idx);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
- r_.f32[i] = a_.f32[idx_.i32[i] & 7];
- }
- return simde__m256_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_permutevar8x32_ps
- #define _mm256_permutevar8x32_ps(a, idx) simde_mm256_permutevar8x32_ps(a, idx)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_sad_epu8 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_sad_epu8(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_sad_epu8(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_sad_epu8(a_.m128i[1], b_.m128i[1]);
- #else
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- uint16_t tmp = 0;
- SIMDE_VECTORIZE_REDUCTION(+:tmp)
- for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 4) ; j++) {
- const size_t e = j + (i * 8);
- tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]);
- }
- r_.i64[i] = tmp;
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_sad_epu8
- #define _mm256_sad_epu8(a, b) simde_mm256_sad_epu8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_shuffle_epi8 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_shuffle_epi8(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_shuffle_epi8(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_shuffle_epi8(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; i++) {
- r_.u8[ i ] = (b_.u8[ i ] & 0x80) ? 0 : a_.u8[(b_.u8[ i ] & 0x0f) ];
- r_.u8[i + 16] = (b_.u8[i + 16] & 0x80) ? 0 : a_.u8[(b_.u8[i + 16] & 0x0f) + 16];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_shuffle_epi8
- #define _mm256_shuffle_epi8(a, b) simde_mm256_shuffle_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_shuffle_epi32 (simde__m256i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) {
- r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3];
- }
- for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) {
- r_.i32[i + 4] = a_.i32[((imm8 >> (i * 2)) & 3) + 4];
- }
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm256_shuffle_epi32(a, imm8) _mm256_shuffle_epi32(a, imm8)
- #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI)
- # define simde_mm256_shuffle_epi32(a, imm8) \
- simde_mm256_set_m128i( \
- simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \
- simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 0), (imm8)))
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- # define simde_mm256_shuffle_epi32(a, imm8) (__extension__ ({ \
- const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \
- simde__m256i_from_private((simde__m256i_private) { .i32 = \
- SIMDE_SHUFFLE_VECTOR_(32, 32, \
- (simde_tmp_a_).i32, \
- (simde_tmp_a_).i32, \
- ((imm8) ) & 3, \
- ((imm8) >> 2) & 3, \
- ((imm8) >> 4) & 3, \
- ((imm8) >> 6) & 3, \
- (((imm8) ) & 3) + 4, \
- (((imm8) >> 2) & 3) + 4, \
- (((imm8) >> 4) & 3) + 4, \
- (((imm8) >> 6) & 3) + 4) }); }))
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_shuffle_epi32
- #define _mm256_shuffle_epi32(a, imm8) simde_mm256_shuffle_epi32(a, imm8)
- #endif
- #if defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm256_shufflehi_epi16(a, imm8) _mm256_shufflehi_epi16(a, imm8)
- #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- # define simde_mm256_shufflehi_epi16(a, imm8) \
- simde_mm256_set_m128i( \
- simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \
- simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), (imm8)))
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- # define simde_mm256_shufflehi_epi16(a, imm8) (__extension__ ({ \
- const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \
- simde__m256i_from_private((simde__m256i_private) { .i16 = \
- SIMDE_SHUFFLE_VECTOR_(16, 32, \
- (simde_tmp_a_).i16, \
- (simde_tmp_a_).i16, \
- 0, 1, 2, 3, \
- (((imm8) ) & 3) + 4, \
- (((imm8) >> 2) & 3) + 4, \
- (((imm8) >> 4) & 3) + 4, \
- (((imm8) >> 6) & 3) + 4, \
- 8, 9, 10, 11, \
- ((((imm8) ) & 3) + 8 + 4), \
- ((((imm8) >> 2) & 3) + 8 + 4), \
- ((((imm8) >> 4) & 3) + 8 + 4), \
- ((((imm8) >> 6) & 3) + 8 + 4) \
- ) }); }))
- #else
- # define simde_mm256_shufflehi_epi16(a, imm8) \
- simde_mm256_set_m128i( \
- simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \
- simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), imm8))
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_shufflehi_epi16
- #define _mm256_shufflehi_epi16(a, imm8) simde_mm256_shufflehi_epi16(a, imm8)
- #endif
- #if defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm256_shufflelo_epi16(a, imm8) _mm256_shufflelo_epi16(a, imm8)
- #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- # define simde_mm256_shufflelo_epi16(a, imm8) \
- simde_mm256_set_m128i( \
- simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \
- simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), (imm8)))
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- # define simde_mm256_shufflelo_epi16(a, imm8) (__extension__ ({ \
- const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \
- simde__m256i_from_private((simde__m256i_private) { .i16 = \
- SIMDE_SHUFFLE_VECTOR_(16, 32, \
- (simde_tmp_a_).i16, \
- (simde_tmp_a_).i16, \
- (((imm8) ) & 3), \
- (((imm8) >> 2) & 3), \
- (((imm8) >> 4) & 3), \
- (((imm8) >> 6) & 3), \
- 4, 5, 6, 7, \
- ((((imm8) ) & 3) + 8), \
- ((((imm8) >> 2) & 3) + 8), \
- ((((imm8) >> 4) & 3) + 8), \
- ((((imm8) >> 6) & 3) + 8), \
- 12, 13, 14, 15) }); }))
- #else
- # define simde_mm256_shufflelo_epi16(a, imm8) \
- simde_mm256_set_m128i( \
- simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \
- simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), imm8))
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_shufflelo_epi16
- #define _mm256_shufflelo_epi16(a, imm8) simde_mm256_shufflelo_epi16(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_sign_epi8 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_sign_epi8(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
- r_.i8[i] = (b_.i8[i] == INT8_C(0)) ? INT8_C(0) : (b_.i8[i] < INT8_C(0)) ? -a_.i8[i] : a_.i8[i];
- }
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_sign_epi8
- #define _mm256_sign_epi8(a, b) simde_mm256_sign_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_sign_epi16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_sign_epi16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = (b_.i16[i] == INT16_C(0)) ? INT16_C(0) : (b_.i16[i] < INT16_C(0)) ? -a_.i16[i] : a_.i16[i];
- }
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_sign_epi16
- #define _mm256_sign_epi16(a, b) simde_mm256_sign_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_sign_epi32(simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_sign_epi32(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- SIMDE_VECTORIZE
- for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
- r_.i32[i] = (b_.i32[i] == INT32_C(0)) ? INT32_C(0) : (b_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i];
- }
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_sign_epi32
- #define _mm256_sign_epi32(a, b) simde_mm256_sign_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_sll_epi16 (simde__m256i a, simde__m128i count) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_sll_epi16(a, count);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_sll_epi16(a_.m128i[0], count);
- r_.m128i[1] = simde_mm_sll_epi16(a_.m128i[1], count);
- #else
- simde__m128i_private
- count_ = simde__m128i_to_private(count);
- uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]);
- if (shift > 15)
- return simde_mm256_setzero_si256();
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, shift);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (shift));
- }
- #endif
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_sll_epi16
- #define _mm256_sll_epi16(a, count) simde_mm256_sll_epi16(a, count)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_sll_epi32 (simde__m256i a, simde__m128i count) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_sll_epi32(a, count);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_sll_epi32(a_.m128i[0], count);
- r_.m128i[1] = simde_mm_sll_epi32(a_.m128i[1], count);
- #else
- simde__m128i_private
- count_ = simde__m128i_to_private(count);
- uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]);
- if (shift > 31)
- return simde_mm256_setzero_si256();
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, shift);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << (shift));
- }
- #endif
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_sll_epi32
- #define _mm256_sll_epi32(a, count) simde_mm256_sll_epi32(a, count)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_sll_epi64 (simde__m256i a, simde__m128i count) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_sll_epi64(a, count);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_sll_epi64(a_.m128i[0], count);
- r_.m128i[1] = simde_mm_sll_epi64(a_.m128i[1], count);
- #else
- simde__m128i_private
- count_ = simde__m128i_to_private(count);
- uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]);
- if (shift > 63)
- return simde_mm256_setzero_si256();
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, shift);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << (shift));
- }
- #endif
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_sll_epi64
- #define _mm256_sll_epi64(a, count) simde_mm256_sll_epi64(a, count)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_slli_epi16 (simde__m256i a, const int imm8)
- SIMDE_REQUIRE_RANGE(imm8, 0, 255) {
- /* Note: There is no consistency in how compilers handle values outside of
- the expected range, hence the discrepancy between what we allow and what
- Intel specifies. Some compilers will return 0, others seem to just mask
- off everything outside of the range. */
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8));
- for (size_t i = 0 ; i < (sizeof(a_.altivec_i16) / sizeof(a_.altivec_i16[0])) ; i++) {
- r_.altivec_i16[i] = vec_sl(a_.altivec_i16[i], sv);
- }
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (imm8 & 0xff));
- }
- #endif
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm256_slli_epi16(a, imm8) _mm256_slli_epi16(a, imm8)
- #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- # define simde_mm256_slli_epi16(a, imm8) \
- simde_mm256_set_m128i( \
- simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \
- simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8)))
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_slli_epi16
- #define _mm256_slli_epi16(a, imm8) simde_mm256_slli_epi16(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_slli_epi32 (simde__m256i a, const int imm8)
- SIMDE_REQUIRE_RANGE(imm8, 0, 255) {
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8));
- for (size_t i = 0 ; i < (sizeof(a_.altivec_i32) / sizeof(a_.altivec_i32[0])) ; i++) {
- r_.altivec_i32[i] = vec_sl(a_.altivec_i32[i], sv);
- }
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, imm8);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = a_.i32[i] << (imm8 & 0xff);
- }
- #endif
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm256_slli_epi32(a, imm8) _mm256_slli_epi32(a, imm8)
- #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- # define simde_mm256_slli_epi32(a, imm8) \
- simde_mm256_set_m128i( \
- simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \
- simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8)))
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_slli_epi32
- #define _mm256_slli_epi32(a, imm8) simde_mm256_slli_epi32(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_slli_epi64 (simde__m256i a, const int imm8)
- SIMDE_REQUIRE_RANGE(imm8, 0, 255) {
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, imm8);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- r_.i64[i] = a_.i64[i] << (imm8 & 0xff);
- }
- #endif
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm256_slli_epi64(a, imm8) _mm256_slli_epi64(a, imm8)
- #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- # define simde_mm256_slli_epi64(a, imm8) \
- simde_mm256_set_m128i( \
- simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \
- simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8)))
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_slli_epi64
- #define _mm256_slli_epi64(a, imm8) simde_mm256_slli_epi64(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_slli_si256 (simde__m256i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) {
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) {
- const int e = HEDLEY_STATIC_CAST(int, i) - imm8;
- r_.m128i_private[h].i8[i] = (e >= 0) ? a_.m128i_private[h].i8[e] : 0;
- }
- }
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm256_slli_si256(a, imm8) _mm256_slli_si256(a, imm8)
- #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI)
- # define simde_mm256_slli_si256(a, imm8) \
- simde_mm256_set_m128i( \
- simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \
- simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 0), (imm8)))
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- # define simde_mm256_slli_si256(a, imm8) \
- simde_mm256_set_m128i( \
- simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \
- simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 0), (imm8)))
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_slli_si256
- #define _mm256_slli_si256(a, imm8) simde_mm256_slli_si256(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm_sllv_epi32 (simde__m128i a, simde__m128i b) {
- simde__m128i_private
- a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b),
- r_;
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- r_.neon_u32 = vshlq_u32(a_.neon_u32, vreinterpretq_s32_u32(b_.neon_u32));
- r_.neon_u32 = vandq_u32(r_.neon_u32, vcltq_u32(b_.neon_u32, vdupq_n_u32(32)));
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < UINT32_C(32))) & (a_.u32 << b_.u32);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
- r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0;
- }
- #endif
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm_sllv_epi32(a, b) _mm_sllv_epi32(a, b)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_sllv_epi32
- #define _mm_sllv_epi32(a, b) simde_mm_sllv_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_sllv_epi32 (simde__m256i a, simde__m256i b) {
- simde__m256i_private
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b),
- r_;
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_sllv_epi32(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_sllv_epi32(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 << b_.u32);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
- r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0;
- }
- #endif
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm256_sllv_epi32(a, b) _mm256_sllv_epi32(a, b)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_sllv_epi32
- #define _mm256_sllv_epi32(a, b) simde_mm256_sllv_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm_sllv_epi64 (simde__m128i a, simde__m128i b) {
- simde__m128i_private
- a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b),
- r_;
- #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
- r_.neon_u64 = vshlq_u64(a_.neon_u64, vreinterpretq_s64_u64(b_.neon_u64));
- r_.neon_u64 = vandq_u64(r_.neon_u64, vcltq_u64(b_.neon_u64, vdupq_n_u64(64)));
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
- r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0;
- }
- #endif
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm_sllv_epi64(a, b) _mm_sllv_epi64(a, b)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_sllv_epi64
- #define _mm_sllv_epi64(a, b) simde_mm_sllv_epi64(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_sllv_epi64 (simde__m256i a, simde__m256i b) {
- simde__m256i_private
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b),
- r_;
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_sllv_epi64(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_sllv_epi64(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
- r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0;
- }
- #endif
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm256_sllv_epi64(a, b) _mm256_sllv_epi64(a, b)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_sllv_epi64
- #define _mm256_sllv_epi64(a, b) simde_mm256_sllv_epi64(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_sra_epi16 (simde__m256i a, simde__m128i count) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_sra_epi16(a, count);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_sra_epi16(a_.m128i[0], count);
- r_.m128i[1] = simde_mm_sra_epi16(a_.m128i[1], count);
- #else
- simde__m128i_private
- count_ = simde__m128i_to_private(count);
- uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]);
- if (shift > 15) shift = 15;
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = a_.i16[i] >> shift;
- }
- #endif
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_sra_epi16
- #define _mm256_sra_epi16(a, count) simde_mm256_sra_epi16(a, count)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_sra_epi32 (simde__m256i a, simde__m128i count) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_sra_epi32(a, count);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_sra_epi32(a_.m128i[0], count);
- r_.m128i[1] = simde_mm_sra_epi32(a_.m128i[1], count);
- #else
- simde__m128i_private
- count_ = simde__m128i_to_private(count);
- uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]);
- if (shift > 31) shift = 31;
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = a_.i32[i] >> shift;
- }
- #endif
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_sra_epi32
- #define _mm256_sra_epi32(a, count) simde_mm256_sra_epi32(a, count)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_srai_epi16 (simde__m256i a, const int imm8)
- SIMDE_REQUIRE_RANGE(imm8, 0, 255) {
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8);
- if (shift > 15) shift = 15;
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = a_.i16[i] >> shift;
- }
- #endif
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm256_srai_epi16(a, imm8) _mm256_srai_epi16(a, imm8)
- #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- # define simde_mm256_srai_epi16(a, imm8) \
- simde_mm256_set_m128i( \
- simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \
- simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 0), (imm8)))
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_srai_epi16
- #define _mm256_srai_epi16(a, imm8) simde_mm256_srai_epi16(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_srai_epi32 (simde__m256i a, const int imm8)
- SIMDE_REQUIRE_RANGE(imm8, 0, 255) {
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8);
- if (shift > 31) shift = 31;
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = a_.i32[i] >> shift;
- }
- #endif
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm256_srai_epi32(a, imm8) _mm256_srai_epi32(a, imm8)
- #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- # define simde_mm256_srai_epi32(a, imm8) \
- simde_mm256_set_m128i( \
- simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \
- simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 0), (imm8)))
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_srai_epi32
- #define _mm256_srai_epi32(a, imm8) simde_mm256_srai_epi32(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm_srav_epi32 (simde__m128i a, simde__m128i count) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm_srav_epi32(a, count);
- #else
- simde__m128i_private
- r_,
- a_ = simde__m128i_to_private(a),
- count_ = simde__m128i_to_private(count);
- #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- int32x4_t cnt = vreinterpretq_s32_u32(vminq_u32(count_.neon_u32, vdupq_n_u32(31)));
- r_.neon_i32 = vshlq_s32(a_.neon_i32, vnegq_s32(cnt));
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]);
- r_.i32[i] = a_.i32[i] >> HEDLEY_STATIC_CAST(int, shift > 31 ? 31 : shift);
- }
- #endif
- return simde__m128i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_srav_epi32
- #define _mm_srav_epi32(a, count) simde_mm_srav_epi32(a, count)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_srav_epi32 (simde__m256i a, simde__m256i count) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_srav_epi32(a, count);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- count_ = simde__m256i_to_private(count);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_srav_epi32(a_.m128i[0], count_.m128i[0]);
- r_.m128i[1] = simde_mm_srav_epi32(a_.m128i[1], count_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]);
- if (shift > 31) shift = 31;
- r_.i32[i] = a_.i32[i] >> shift;
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_srav_epi32
- #define _mm256_srav_epi32(a, count) simde_mm256_srav_epi32(a, count)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_srl_epi16 (simde__m256i a, simde__m128i count) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_srl_epi16(a, count);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_srl_epi16(a_.m128i[0], count);
- r_.m128i[1] = simde_mm_srl_epi16(a_.m128i[1], count);
- #else
- simde__m128i_private
- count_ = simde__m128i_to_private(count);
- uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 16 ? 16 : count_.i64[0]));
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, shift);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.u16[i] = a_.u16[i] >> (shift);
- }
- #endif
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_srl_epi16
- #define _mm256_srl_epi16(a, count) simde_mm256_srl_epi16(a, count)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_srl_epi32 (simde__m256i a, simde__m128i count) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_srl_epi32(a, count);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_srl_epi32(a_.m128i[0], count);
- r_.m128i[1] = simde_mm_srl_epi32(a_.m128i[1], count);
- #else
- simde__m128i_private
- count_ = simde__m128i_to_private(count);
- uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 32 ? 32 : count_.i64[0]));
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, shift);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.u32[i] = a_.u32[i] >> (shift);
- }
- #endif
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_srl_epi32
- #define _mm256_srl_epi32(a, count) simde_mm256_srl_epi32(a, count)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_srl_epi64 (simde__m256i a, simde__m128i count) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_srl_epi64(a, count);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_srl_epi64(a_.m128i[0], count);
- r_.m128i[1] = simde_mm_srl_epi64(a_.m128i[1], count);
- #else
- simde__m128i_private
- count_ = simde__m128i_to_private(count);
- uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 64 ? 64 : count_.i64[0]));
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(64, shift);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- r_.u64[i] = a_.u64[i] >> (shift);
- }
- #endif
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_srl_epi64
- #define _mm256_srl_epi64(a, count) simde_mm256_srl_epi64(a, count)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_srli_epi16 (simde__m256i a, const int imm8)
- SIMDE_REQUIRE_RANGE(imm8, 0, 255) {
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- if (imm8 > 15)
- return simde_mm256_setzero_si256();
- #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8));
- for (size_t i = 0 ; i < (sizeof(a_.altivec_u16) / sizeof(a_.altivec_u16[0])) ; i++) {
- r_.altivec_u16[i] = vec_sr(a_.altivec_u16[i], sv);
- }
- #else
- if (HEDLEY_STATIC_CAST(unsigned int, imm8) > 15) {
- simde_memset(&r_, 0, sizeof(r_));
- } else {
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
- r_.u16[i] = a_.u16[i] >> imm8;
- }
- #endif
- }
- #endif
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm256_srli_epi16(a, imm8) _mm256_srli_epi16(a, imm8)
- #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- # define simde_mm256_srli_epi16(a, imm8) \
- simde_mm256_set_m128i( \
- simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \
- simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8)))
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_srli_epi16
- #define _mm256_srli_epi16(a, imm8) simde_mm256_srli_epi16(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_srli_epi32 (simde__m256i a, const int imm8)
- SIMDE_REQUIRE_RANGE(imm8, 0, 255) {
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
- SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8));
- for (size_t i = 0 ; i < (sizeof(a_.altivec_u32) / sizeof(a_.altivec_u32[0])) ; i++) {
- r_.altivec_u32[i] = vec_sr(a_.altivec_u32[i], sv);
- }
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
- r_.u32[i] = a_.u32[i] >> imm8;
- }
- #endif
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm256_srli_epi32(a, imm8) _mm256_srli_epi32(a, imm8)
- #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- # define simde_mm256_srli_epi32(a, imm8) \
- simde_mm256_set_m128i( \
- simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \
- simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8)))
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_srli_epi32
- #define _mm256_srli_epi32(a, imm8) simde_mm256_srli_epi32(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_srli_epi64 (simde__m256i a, const int imm8)
- SIMDE_REQUIRE_RANGE(imm8, 0, 255) {
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, imm8);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
- r_.u64[i] = a_.u64[i] >> imm8;
- }
- #endif
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm256_srli_epi64(a, imm8) _mm256_srli_epi64(a, imm8)
- #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- # define simde_mm256_srli_epi64(a, imm8) \
- simde_mm256_set_m128i( \
- simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \
- simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8)))
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_srli_epi64
- #define _mm256_srli_epi64(a, imm8) simde_mm256_srli_epi64(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_srli_si256 (simde__m256i a, const int imm8)
- SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) {
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a);
- for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) {
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) {
- const int e = imm8 + HEDLEY_STATIC_CAST(int, i);
- r_.m128i_private[h].i8[i] = (e < 16) ? a_.m128i_private[h].i8[e] : 0;
- }
- }
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- # define simde_mm256_srli_si256(a, imm8) _mm256_srli_si256(a, imm8)
- #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI)
- # define simde_mm256_srli_si256(a, imm8) \
- simde_mm256_set_m128i( \
- simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \
- simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 0), (imm8)))
- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
- # define simde_mm256_srli_si256(a, imm8) \
- simde_mm256_set_m128i( \
- simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \
- simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 0), (imm8)))
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_srli_si256
- #define _mm256_srli_si256(a, imm8) simde_mm256_srli_si256(a, imm8)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm_srlv_epi32 (simde__m128i a, simde__m128i b) {
- simde__m128i_private
- a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b),
- r_;
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
- r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0;
- }
- #endif
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm_srlv_epi32(a, b) _mm_srlv_epi32(a, b)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_srlv_epi32
- #define _mm_srlv_epi32(a, b) simde_mm_srlv_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_srlv_epi32 (simde__m256i a, simde__m256i b) {
- simde__m256i_private
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b),
- r_;
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
- r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0;
- }
- #endif
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm256_srlv_epi32(a, b) _mm256_srlv_epi32(a, b)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_srlv_epi32
- #define _mm256_srlv_epi32(a, b) simde_mm256_srlv_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m128i
- simde_mm_srlv_epi64 (simde__m128i a, simde__m128i b) {
- simde__m128i_private
- a_ = simde__m128i_to_private(a),
- b_ = simde__m128i_to_private(b),
- r_;
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
- r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0;
- }
- #endif
- return simde__m128i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm_srlv_epi64(a, b) _mm_srlv_epi64(a, b)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm_srlv_epi64
- #define _mm_srlv_epi64(a, b) simde_mm_srlv_epi64(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_srlv_epi64 (simde__m256i a, simde__m256i b) {
- simde__m256i_private
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b),
- r_;
- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
- r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
- r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0;
- }
- #endif
- return simde__m256i_from_private(r_);
- }
- #if defined(SIMDE_X86_AVX2_NATIVE)
- #define simde_mm256_srlv_epi64(a, b) _mm256_srlv_epi64(a, b)
- #endif
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_srlv_epi64
- #define _mm256_srlv_epi64(a, b) simde_mm256_srlv_epi64(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_stream_load_si256 (const simde__m256i* mem_addr) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_stream_load_si256(HEDLEY_CONST_CAST(simde__m256i*, mem_addr));
- #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT)
- return __builtin_nontemporal_load(mem_addr);
- #else
- simde__m256i r;
- simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r));
- return r;
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- # define _mm256_stream_load_si256(mem_addr) simde_mm256_stream_load_si256(mem_addr)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_sub_epi8 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_sub_epi8(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_sub_epi8(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_sub_epi8(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i8 = a_.i8 - b_.i8;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
- r_.i8[i] = a_.i8[i] - b_.i8[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_sub_epi8
- #define _mm256_sub_epi8(a, b) simde_mm256_sub_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_sub_epi16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_sub_epi16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_sub_epi16(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_sub_epi16(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i16 = a_.i16 - b_.i16;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = a_.i16[i] - b_.i16[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_sub_epi16
- #define _mm256_sub_epi16(a, b) simde_mm256_sub_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_hsub_epi16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_hsub_epi16(a, b);
- #else
- return simde_mm256_sub_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b));
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_hsub_epi16
- #define _mm256_hsub_epi16(a, b) simde_mm256_hsub_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_sub_epi32 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_sub_epi32(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_sub_epi32(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_sub_epi32(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32 = a_.i32 - b_.i32;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
- r_.i32[i] = a_.i32[i] - b_.i32[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_sub_epi32
- #define _mm256_sub_epi32(a, b) simde_mm256_sub_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_hsub_epi32 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_hsub_epi32(a, b);
- #else
- return simde_mm256_sub_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b));
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_hsub_epi32
- #define _mm256_hsub_epi32(a, b) simde_mm256_hsub_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_sub_epi64 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_sub_epi64(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_sub_epi64(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_sub_epi64(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i64 = a_.i64 - b_.i64;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- r_.i64[i] = a_.i64[i] - b_.i64[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_sub_epi64
- #define _mm256_sub_epi64(a, b) simde_mm256_sub_epi64(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_x_mm256_sub_epu32 (simde__m256i a, simde__m256i b) {
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.u32 = a_.u32 - b_.u32;
- #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_x_mm_sub_epu32(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_x_mm_sub_epu32(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
- r_.u32[i] = a_.u32[i] - b_.u32[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_subs_epi8 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_subs_epi8(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_subs_epi8(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_subs_epi8(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
- r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]);
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_subs_epi8
- #define _mm256_subs_epi8(a, b) simde_mm256_subs_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_subs_epi16(simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_subs_epi16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_subs_epi16(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_subs_epi16(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]);
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_subs_epi16
- #define _mm256_subs_epi16(a, b) simde_mm256_subs_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_hsubs_epi16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_hsubs_epi16(a, b);
- #else
- return simde_mm256_subs_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b));
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_hsubs_epi16
- #define _mm256_hsubs_epi16(a, b) simde_mm256_hsubs_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_subs_epu8 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_subs_epu8(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_subs_epu8(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_subs_epu8(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
- r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]);
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_subs_epu8
- #define _mm256_subs_epu8(a, b) simde_mm256_subs_epu8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_subs_epu16(simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_subs_epu16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_subs_epu16(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_subs_epu16(a_.m128i[1], b_.m128i[1]);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
- r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]);
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_subs_epu16
- #define _mm256_subs_epu16(a, b) simde_mm256_subs_epu16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- int
- simde_x_mm256_test_all_ones (simde__m256i a) {
- simde__m256i_private a_ = simde__m256i_to_private(a);
- int r;
- int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0);
- SIMDE_VECTORIZE_REDUCTION(&:r_)
- for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) {
- r_ &= a_.i32f[i];
- }
- r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0));
- return r;
- }
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_unpacklo_epi8 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_unpacklo_epi8(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_unpacklo_epi8(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_unpacklo_epi8(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8,
- 0, 32, 1, 33, 2, 34, 3, 35,
- 4, 36, 5, 37, 6, 38, 7, 39,
- 16, 48, 17, 49, 18, 50, 19, 51,
- 20, 52, 21, 53, 22, 54, 23, 55);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) {
- r_.i8[2 * i] = a_.i8[i + ~(~i | 7)];
- r_.i8[2 * i + 1] = b_.i8[i + ~(~i | 7)];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_unpacklo_epi8
- #define _mm256_unpacklo_epi8(a, b) simde_mm256_unpacklo_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_unpacklo_epi16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_unpacklo_epi16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_unpacklo_epi16(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_unpacklo_epi16(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- r_.i16 =SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16,
- 0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) {
- r_.i16[2 * i] = a_.i16[i + ~(~i | 3)];
- r_.i16[2 * i + 1] = b_.i16[i + ~(~i | 3)];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_unpacklo_epi16
- #define _mm256_unpacklo_epi16(a, b) simde_mm256_unpacklo_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_unpacklo_epi32 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_unpacklo_epi32(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_unpacklo_epi32(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_unpacklo_epi32(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32,
- 0, 8, 1, 9, 4, 12, 5, 13);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) {
- r_.i32[2 * i] = a_.i32[i + ~(~i | 1)];
- r_.i32[2 * i + 1] = b_.i32[i + ~(~i | 1)];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_unpacklo_epi32
- #define _mm256_unpacklo_epi32(a, b) simde_mm256_unpacklo_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_unpacklo_epi64 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_unpacklo_epi64(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_unpacklo_epi64(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_unpacklo_epi64(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 0, 4, 2, 6);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) {
- r_.i64[2 * i] = a_.i64[2 * i];
- r_.i64[2 * i + 1] = b_.i64[2 * i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_unpacklo_epi64
- #define _mm256_unpacklo_epi64(a, b) simde_mm256_unpacklo_epi64(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_unpackhi_epi8 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_unpackhi_epi8(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_unpackhi_epi8(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_unpackhi_epi8(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8,
- 8, 40, 9, 41, 10, 42, 11, 43,
- 12, 44, 13, 45, 14, 46, 15, 47,
- 24, 56, 25, 57, 26, 58, 27, 59,
- 28, 60, 29, 61, 30, 62, 31, 63);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) {
- r_.i8[2 * i] = a_.i8[i + 8 + ~(~i | 7)];
- r_.i8[2 * i + 1] = b_.i8[i + 8 + ~(~i | 7)];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_unpackhi_epi8
- #define _mm256_unpackhi_epi8(a, b) simde_mm256_unpackhi_epi8(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_unpackhi_epi16 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_unpackhi_epi16(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_unpackhi_epi16(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_unpackhi_epi16(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16,
- 4, 20, 5, 21, 6, 22, 7, 23,
- 12, 28, 13, 29, 14, 30, 15, 31);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) {
- r_.i16[2 * i] = a_.i16[i + 4 + ~(~i | 3)];
- r_.i16[2 * i + 1] = b_.i16[i + 4 + ~(~i | 3)];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_unpackhi_epi16
- #define _mm256_unpackhi_epi16(a, b) simde_mm256_unpackhi_epi16(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_unpackhi_epi32 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_unpackhi_epi32(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_unpackhi_epi32(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_unpackhi_epi32(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32,
- 2, 10, 3, 11, 6, 14, 7, 15);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) {
- r_.i32[2 * i] = a_.i32[i + 2 + ~(~i | 1)];
- r_.i32[2 * i + 1] = b_.i32[i + 2 + ~(~i | 1)];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_unpackhi_epi32
- #define _mm256_unpackhi_epi32(a, b) simde_mm256_unpackhi_epi32(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_unpackhi_epi64 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_unpackhi_epi64(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_unpackhi_epi64(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_unpackhi_epi64(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_SHUFFLE_VECTOR_)
- r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 1, 5, 3, 7);
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) {
- r_.i64[2 * i] = a_.i64[2 * i + 1];
- r_.i64[2 * i + 1] = b_.i64[2 * i + 1];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_unpackhi_epi64
- #define _mm256_unpackhi_epi64(a, b) simde_mm256_unpackhi_epi64(a, b)
- #endif
- SIMDE_FUNCTION_ATTRIBUTES
- simde__m256i
- simde_mm256_xor_si256 (simde__m256i a, simde__m256i b) {
- #if defined(SIMDE_X86_AVX2_NATIVE)
- return _mm256_xor_si256(a, b);
- #else
- simde__m256i_private
- r_,
- a_ = simde__m256i_to_private(a),
- b_ = simde__m256i_to_private(b);
- #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
- r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]);
- #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i32f = a_.i32f ^ b_.i32f;
- #else
- SIMDE_VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
- r_.i64[i] = a_.i64[i] ^ b_.i64[i];
- }
- #endif
- return simde__m256i_from_private(r_);
- #endif
- }
- #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES)
- #undef _mm256_xor_si256
- #define _mm256_xor_si256(a, b) simde_mm256_xor_si256(a, b)
- #endif
- SIMDE_END_DECLS_
- HEDLEY_DIAGNOSTIC_POP
- #endif /* !defined(SIMDE_X86_AVX2_H) */
|