chacha20poly1305_amd64.s 186 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762
  1. // Code generated by command: go run chacha20poly1305_amd64_asm.go -out ../chacha20poly1305_amd64.s -pkg chacha20poly1305. DO NOT EDIT.
  2. //go:build gc && !purego
  3. #include "textflag.h"
  4. // func polyHashADInternal<>()
  5. TEXT polyHashADInternal<>(SB), NOSPLIT, $0
  6. // Hack: Must declare #define macros inside of a function due to Avo constraints
  7. // ROL rotates the uint32s in register R left by N bits, using temporary T.
  8. #define ROL(N, R, T) \
  9. MOVO R, T; \
  10. PSLLL $(N), T; \
  11. PSRLL $(32-(N)), R; \
  12. PXOR T, R
  13. // ROL8 rotates the uint32s in register R left by 8, using temporary T if needed.
  14. #ifdef GOAMD64_v2
  15. #define ROL8(R, T) PSHUFB ·rol8<>(SB), R
  16. #else
  17. #define ROL8(R, T) ROL(8, R, T)
  18. #endif
  19. // ROL16 rotates the uint32s in register R left by 16, using temporary T if needed.
  20. #ifdef GOAMD64_v2
  21. #define ROL16(R, T) PSHUFB ·rol16<>(SB), R
  22. #else
  23. #define ROL16(R, T) ROL(16, R, T)
  24. #endif
  25. XORQ R10, R10
  26. XORQ R11, R11
  27. XORQ R12, R12
  28. CMPQ R9, $0x0d
  29. JNE hashADLoop
  30. MOVQ (CX), R10
  31. MOVQ 5(CX), R11
  32. SHRQ $0x18, R11
  33. MOVQ $0x00000001, R12
  34. MOVQ (BP), AX
  35. MOVQ AX, R15
  36. MULQ R10
  37. MOVQ AX, R13
  38. MOVQ DX, R14
  39. MOVQ (BP), AX
  40. MULQ R11
  41. IMULQ R12, R15
  42. ADDQ AX, R14
  43. ADCQ DX, R15
  44. MOVQ 8(BP), AX
  45. MOVQ AX, R8
  46. MULQ R10
  47. ADDQ AX, R14
  48. ADCQ $0x00, DX
  49. MOVQ DX, R10
  50. MOVQ 8(BP), AX
  51. MULQ R11
  52. ADDQ AX, R15
  53. ADCQ $0x00, DX
  54. IMULQ R12, R8
  55. ADDQ R10, R15
  56. ADCQ DX, R8
  57. MOVQ R13, R10
  58. MOVQ R14, R11
  59. MOVQ R15, R12
  60. ANDQ $0x03, R12
  61. MOVQ R15, R13
  62. ANDQ $-4, R13
  63. MOVQ R8, R14
  64. SHRQ $0x02, R8, R15
  65. SHRQ $0x02, R8
  66. ADDQ R13, R10
  67. ADCQ R14, R11
  68. ADCQ $0x00, R12
  69. ADDQ R15, R10
  70. ADCQ R8, R11
  71. ADCQ $0x00, R12
  72. RET
  73. hashADLoop:
  74. // Hash in 16 byte chunks
  75. CMPQ R9, $0x10
  76. JB hashADTail
  77. ADDQ (CX), R10
  78. ADCQ 8(CX), R11
  79. ADCQ $0x01, R12
  80. LEAQ 16(CX), CX
  81. SUBQ $0x10, R9
  82. MOVQ (BP), AX
  83. MOVQ AX, R15
  84. MULQ R10
  85. MOVQ AX, R13
  86. MOVQ DX, R14
  87. MOVQ (BP), AX
  88. MULQ R11
  89. IMULQ R12, R15
  90. ADDQ AX, R14
  91. ADCQ DX, R15
  92. MOVQ 8(BP), AX
  93. MOVQ AX, R8
  94. MULQ R10
  95. ADDQ AX, R14
  96. ADCQ $0x00, DX
  97. MOVQ DX, R10
  98. MOVQ 8(BP), AX
  99. MULQ R11
  100. ADDQ AX, R15
  101. ADCQ $0x00, DX
  102. IMULQ R12, R8
  103. ADDQ R10, R15
  104. ADCQ DX, R8
  105. MOVQ R13, R10
  106. MOVQ R14, R11
  107. MOVQ R15, R12
  108. ANDQ $0x03, R12
  109. MOVQ R15, R13
  110. ANDQ $-4, R13
  111. MOVQ R8, R14
  112. SHRQ $0x02, R8, R15
  113. SHRQ $0x02, R8
  114. ADDQ R13, R10
  115. ADCQ R14, R11
  116. ADCQ $0x00, R12
  117. ADDQ R15, R10
  118. ADCQ R8, R11
  119. ADCQ $0x00, R12
  120. JMP hashADLoop
  121. hashADTail:
  122. CMPQ R9, $0x00
  123. JE hashADDone
  124. // Hash last < 16 byte tail
  125. XORQ R13, R13
  126. XORQ R14, R14
  127. XORQ R15, R15
  128. ADDQ R9, CX
  129. hashADTailLoop:
  130. SHLQ $0x08, R13, R14
  131. SHLQ $0x08, R13
  132. MOVB -1(CX), R15
  133. XORQ R15, R13
  134. DECQ CX
  135. DECQ R9
  136. JNE hashADTailLoop
  137. ADDQ R13, R10
  138. ADCQ R14, R11
  139. ADCQ $0x01, R12
  140. MOVQ (BP), AX
  141. MOVQ AX, R15
  142. MULQ R10
  143. MOVQ AX, R13
  144. MOVQ DX, R14
  145. MOVQ (BP), AX
  146. MULQ R11
  147. IMULQ R12, R15
  148. ADDQ AX, R14
  149. ADCQ DX, R15
  150. MOVQ 8(BP), AX
  151. MOVQ AX, R8
  152. MULQ R10
  153. ADDQ AX, R14
  154. ADCQ $0x00, DX
  155. MOVQ DX, R10
  156. MOVQ 8(BP), AX
  157. MULQ R11
  158. ADDQ AX, R15
  159. ADCQ $0x00, DX
  160. IMULQ R12, R8
  161. ADDQ R10, R15
  162. ADCQ DX, R8
  163. MOVQ R13, R10
  164. MOVQ R14, R11
  165. MOVQ R15, R12
  166. ANDQ $0x03, R12
  167. MOVQ R15, R13
  168. ANDQ $-4, R13
  169. MOVQ R8, R14
  170. SHRQ $0x02, R8, R15
  171. SHRQ $0x02, R8
  172. ADDQ R13, R10
  173. ADCQ R14, R11
  174. ADCQ $0x00, R12
  175. ADDQ R15, R10
  176. ADCQ R8, R11
  177. ADCQ $0x00, R12
  178. hashADDone:
  179. RET
  180. // func chacha20Poly1305Open(dst []byte, key []uint32, src []byte, ad []byte) bool
  181. // Requires: AVX, AVX2, BMI2, CMOV, SSE2
  182. TEXT ·chacha20Poly1305Open(SB), $288-97
  183. // For aligned stack access
  184. MOVQ SP, BP
  185. ADDQ $0x20, BP
  186. ANDQ $-32, BP
  187. MOVQ dst_base+0(FP), DI
  188. MOVQ key_base+24(FP), R8
  189. MOVQ src_base+48(FP), SI
  190. MOVQ src_len+56(FP), BX
  191. MOVQ ad_base+72(FP), CX
  192. // Check for AVX2 support
  193. CMPB ·useAVX2+0(SB), $0x01
  194. JE chacha20Poly1305Open_AVX2
  195. // Special optimization, for very short buffers
  196. CMPQ BX, $0x80
  197. JBE openSSE128
  198. // For long buffers, prepare the poly key first
  199. MOVOU ·chacha20Constants<>+0(SB), X0
  200. MOVOU 16(R8), X3
  201. MOVOU 32(R8), X6
  202. MOVOU 48(R8), X9
  203. MOVO X9, X13
  204. // Store state on stack for future use
  205. MOVO X3, 32(BP)
  206. MOVO X6, 48(BP)
  207. MOVO X9, 128(BP)
  208. MOVQ $0x0000000a, R9
  209. openSSEPreparePolyKey:
  210. PADDD X3, X0
  211. PXOR X0, X9
  212. ROL16(X9, X12)
  213. PADDD X9, X6
  214. PXOR X6, X3
  215. MOVO X3, X12
  216. PSLLL $0x0c, X12
  217. PSRLL $0x14, X3
  218. PXOR X12, X3
  219. PADDD X3, X0
  220. PXOR X0, X9
  221. ROL8(X9, X12)
  222. PADDD X9, X6
  223. PXOR X6, X3
  224. MOVO X3, X12
  225. PSLLL $0x07, X12
  226. PSRLL $0x19, X3
  227. PXOR X12, X3
  228. BYTE $0x66
  229. BYTE $0x0f
  230. BYTE $0x3a
  231. BYTE $0x0f
  232. BYTE $0xdb
  233. BYTE $0x04
  234. BYTE $0x66
  235. BYTE $0x0f
  236. BYTE $0x3a
  237. BYTE $0x0f
  238. BYTE $0xf6
  239. BYTE $0x08
  240. BYTE $0x66
  241. BYTE $0x45
  242. BYTE $0x0f
  243. BYTE $0x3a
  244. BYTE $0x0f
  245. BYTE $0xc9
  246. BYTE $0x0c
  247. PADDD X3, X0
  248. PXOR X0, X9
  249. ROL16(X9, X12)
  250. PADDD X9, X6
  251. PXOR X6, X3
  252. MOVO X3, X12
  253. PSLLL $0x0c, X12
  254. PSRLL $0x14, X3
  255. PXOR X12, X3
  256. PADDD X3, X0
  257. PXOR X0, X9
  258. ROL8(X9, X12)
  259. PADDD X9, X6
  260. PXOR X6, X3
  261. MOVO X3, X12
  262. PSLLL $0x07, X12
  263. PSRLL $0x19, X3
  264. PXOR X12, X3
  265. BYTE $0x66
  266. BYTE $0x0f
  267. BYTE $0x3a
  268. BYTE $0x0f
  269. BYTE $0xdb
  270. BYTE $0x0c
  271. BYTE $0x66
  272. BYTE $0x0f
  273. BYTE $0x3a
  274. BYTE $0x0f
  275. BYTE $0xf6
  276. BYTE $0x08
  277. BYTE $0x66
  278. BYTE $0x45
  279. BYTE $0x0f
  280. BYTE $0x3a
  281. BYTE $0x0f
  282. BYTE $0xc9
  283. BYTE $0x04
  284. DECQ R9
  285. JNE openSSEPreparePolyKey
  286. // A0|B0 hold the Poly1305 32-byte key, C0,D0 can be discarded
  287. PADDL ·chacha20Constants<>+0(SB), X0
  288. PADDL 32(BP), X3
  289. // Clamp and store the key
  290. PAND ·polyClampMask<>+0(SB), X0
  291. MOVO X0, (BP)
  292. MOVO X3, 16(BP)
  293. // Hash AAD
  294. MOVQ ad_len+80(FP), R9
  295. CALL polyHashADInternal<>(SB)
  296. openSSEMainLoop:
  297. CMPQ BX, $0x00000100
  298. JB openSSEMainLoopDone
  299. // Load state, increment counter blocks
  300. MOVO ·chacha20Constants<>+0(SB), X0
  301. MOVO 32(BP), X3
  302. MOVO 48(BP), X6
  303. MOVO 128(BP), X9
  304. PADDL ·sseIncMask<>+0(SB), X9
  305. MOVO X0, X1
  306. MOVO X3, X4
  307. MOVO X6, X7
  308. MOVO X9, X10
  309. PADDL ·sseIncMask<>+0(SB), X10
  310. MOVO X1, X2
  311. MOVO X4, X5
  312. MOVO X7, X8
  313. MOVO X10, X11
  314. PADDL ·sseIncMask<>+0(SB), X11
  315. MOVO X2, X12
  316. MOVO X5, X13
  317. MOVO X8, X14
  318. MOVO X11, X15
  319. PADDL ·sseIncMask<>+0(SB), X15
  320. // Store counters
  321. MOVO X9, 80(BP)
  322. MOVO X10, 96(BP)
  323. MOVO X11, 112(BP)
  324. MOVO X15, 128(BP)
  325. // There are 10 ChaCha20 iterations of 2QR each, so for 6 iterations we hash
  326. // 2 blocks, and for the remaining 4 only 1 block - for a total of 16
  327. MOVQ $0x00000004, CX
  328. MOVQ SI, R9
  329. openSSEInternalLoop:
  330. MOVO X14, 64(BP)
  331. PADDD X3, X0
  332. PXOR X0, X9
  333. ROL16(X9, X14)
  334. PADDD X9, X6
  335. PXOR X6, X3
  336. MOVO X3, X14
  337. PSLLL $0x0c, X14
  338. PSRLL $0x14, X3
  339. PXOR X14, X3
  340. PADDD X3, X0
  341. PXOR X0, X9
  342. ROL8(X9, X14)
  343. PADDD X9, X6
  344. PXOR X6, X3
  345. MOVO X3, X14
  346. PSLLL $0x07, X14
  347. PSRLL $0x19, X3
  348. PXOR X14, X3
  349. PADDD X4, X1
  350. PXOR X1, X10
  351. ROL16(X10, X14)
  352. PADDD X10, X7
  353. PXOR X7, X4
  354. MOVO X4, X14
  355. PSLLL $0x0c, X14
  356. PSRLL $0x14, X4
  357. PXOR X14, X4
  358. PADDD X4, X1
  359. PXOR X1, X10
  360. ROL8(X10, X14)
  361. PADDD X10, X7
  362. PXOR X7, X4
  363. MOVO X4, X14
  364. PSLLL $0x07, X14
  365. PSRLL $0x19, X4
  366. PXOR X14, X4
  367. PADDD X5, X2
  368. PXOR X2, X11
  369. ROL16(X11, X14)
  370. PADDD X11, X8
  371. PXOR X8, X5
  372. MOVO X5, X14
  373. PSLLL $0x0c, X14
  374. PSRLL $0x14, X5
  375. PXOR X14, X5
  376. PADDD X5, X2
  377. PXOR X2, X11
  378. ROL8(X11, X14)
  379. PADDD X11, X8
  380. PXOR X8, X5
  381. MOVO X5, X14
  382. PSLLL $0x07, X14
  383. PSRLL $0x19, X5
  384. PXOR X14, X5
  385. MOVO 64(BP), X14
  386. MOVO X7, 64(BP)
  387. PADDD X13, X12
  388. PXOR X12, X15
  389. ROL16(X15, X7)
  390. PADDD X15, X14
  391. PXOR X14, X13
  392. MOVO X13, X7
  393. PSLLL $0x0c, X7
  394. PSRLL $0x14, X13
  395. PXOR X7, X13
  396. PADDD X13, X12
  397. PXOR X12, X15
  398. ROL8(X15, X7)
  399. PADDD X15, X14
  400. PXOR X14, X13
  401. MOVO X13, X7
  402. PSLLL $0x07, X7
  403. PSRLL $0x19, X13
  404. PXOR X7, X13
  405. MOVO 64(BP), X7
  406. ADDQ (R9), R10
  407. ADCQ 8(R9), R11
  408. ADCQ $0x01, R12
  409. BYTE $0x66
  410. BYTE $0x0f
  411. BYTE $0x3a
  412. BYTE $0x0f
  413. BYTE $0xdb
  414. BYTE $0x04
  415. BYTE $0x66
  416. BYTE $0x0f
  417. BYTE $0x3a
  418. BYTE $0x0f
  419. BYTE $0xe4
  420. BYTE $0x04
  421. BYTE $0x66
  422. BYTE $0x0f
  423. BYTE $0x3a
  424. BYTE $0x0f
  425. BYTE $0xed
  426. BYTE $0x04
  427. BYTE $0x66
  428. BYTE $0x45
  429. BYTE $0x0f
  430. BYTE $0x3a
  431. BYTE $0x0f
  432. BYTE $0xed
  433. BYTE $0x04
  434. BYTE $0x66
  435. BYTE $0x0f
  436. BYTE $0x3a
  437. BYTE $0x0f
  438. BYTE $0xf6
  439. BYTE $0x08
  440. BYTE $0x66
  441. BYTE $0x0f
  442. BYTE $0x3a
  443. BYTE $0x0f
  444. BYTE $0xff
  445. BYTE $0x08
  446. BYTE $0x66
  447. BYTE $0x45
  448. BYTE $0x0f
  449. BYTE $0x3a
  450. BYTE $0x0f
  451. BYTE $0xc0
  452. BYTE $0x08
  453. BYTE $0x66
  454. BYTE $0x45
  455. BYTE $0x0f
  456. BYTE $0x3a
  457. BYTE $0x0f
  458. BYTE $0xf6
  459. BYTE $0x08
  460. BYTE $0x66
  461. BYTE $0x45
  462. BYTE $0x0f
  463. BYTE $0x3a
  464. BYTE $0x0f
  465. BYTE $0xc9
  466. BYTE $0x0c
  467. BYTE $0x66
  468. BYTE $0x45
  469. BYTE $0x0f
  470. BYTE $0x3a
  471. BYTE $0x0f
  472. BYTE $0xd2
  473. BYTE $0x0c
  474. BYTE $0x66
  475. BYTE $0x45
  476. BYTE $0x0f
  477. BYTE $0x3a
  478. BYTE $0x0f
  479. BYTE $0xdb
  480. BYTE $0x0c
  481. BYTE $0x66
  482. BYTE $0x45
  483. BYTE $0x0f
  484. BYTE $0x3a
  485. BYTE $0x0f
  486. BYTE $0xff
  487. BYTE $0x0c
  488. MOVQ (BP), AX
  489. MOVQ AX, R15
  490. MULQ R10
  491. MOVQ AX, R13
  492. MOVQ DX, R14
  493. MOVQ (BP), AX
  494. MULQ R11
  495. IMULQ R12, R15
  496. ADDQ AX, R14
  497. ADCQ DX, R15
  498. MOVQ 8(BP), AX
  499. MOVQ AX, R8
  500. MULQ R10
  501. ADDQ AX, R14
  502. ADCQ $0x00, DX
  503. MOVQ DX, R10
  504. MOVQ 8(BP), AX
  505. MULQ R11
  506. ADDQ AX, R15
  507. ADCQ $0x00, DX
  508. LEAQ 16(R9), R9
  509. MOVO X14, 64(BP)
  510. PADDD X3, X0
  511. PXOR X0, X9
  512. ROL16(X9, X14)
  513. PADDD X9, X6
  514. PXOR X6, X3
  515. MOVO X3, X14
  516. PSLLL $0x0c, X14
  517. PSRLL $0x14, X3
  518. PXOR X14, X3
  519. PADDD X3, X0
  520. PXOR X0, X9
  521. ROL8(X9, X14)
  522. PADDD X9, X6
  523. PXOR X6, X3
  524. MOVO X3, X14
  525. PSLLL $0x07, X14
  526. PSRLL $0x19, X3
  527. PXOR X14, X3
  528. PADDD X4, X1
  529. PXOR X1, X10
  530. ROL16(X10, X14)
  531. PADDD X10, X7
  532. PXOR X7, X4
  533. MOVO X4, X14
  534. PSLLL $0x0c, X14
  535. PSRLL $0x14, X4
  536. PXOR X14, X4
  537. PADDD X4, X1
  538. PXOR X1, X10
  539. ROL8(X10, X14)
  540. PADDD X10, X7
  541. PXOR X7, X4
  542. MOVO X4, X14
  543. PSLLL $0x07, X14
  544. PSRLL $0x19, X4
  545. PXOR X14, X4
  546. PADDD X5, X2
  547. PXOR X2, X11
  548. ROL16(X11, X14)
  549. PADDD X11, X8
  550. PXOR X8, X5
  551. MOVO X5, X14
  552. PSLLL $0x0c, X14
  553. PSRLL $0x14, X5
  554. PXOR X14, X5
  555. PADDD X5, X2
  556. PXOR X2, X11
  557. ROL8(X11, X14)
  558. PADDD X11, X8
  559. PXOR X8, X5
  560. MOVO X5, X14
  561. PSLLL $0x07, X14
  562. PSRLL $0x19, X5
  563. PXOR X14, X5
  564. MOVO 64(BP), X14
  565. MOVO X7, 64(BP)
  566. IMULQ R12, R8
  567. ADDQ R10, R15
  568. ADCQ DX, R8
  569. PADDD X13, X12
  570. PXOR X12, X15
  571. ROL16(X15, X7)
  572. PADDD X15, X14
  573. PXOR X14, X13
  574. MOVO X13, X7
  575. PSLLL $0x0c, X7
  576. PSRLL $0x14, X13
  577. PXOR X7, X13
  578. PADDD X13, X12
  579. PXOR X12, X15
  580. ROL8(X15, X7)
  581. PADDD X15, X14
  582. PXOR X14, X13
  583. MOVO X13, X7
  584. PSLLL $0x07, X7
  585. PSRLL $0x19, X13
  586. PXOR X7, X13
  587. MOVO 64(BP), X7
  588. MOVQ R13, R10
  589. MOVQ R14, R11
  590. MOVQ R15, R12
  591. ANDQ $0x03, R12
  592. MOVQ R15, R13
  593. ANDQ $-4, R13
  594. MOVQ R8, R14
  595. SHRQ $0x02, R8, R15
  596. SHRQ $0x02, R8
  597. ADDQ R13, R10
  598. ADCQ R14, R11
  599. ADCQ $0x00, R12
  600. ADDQ R15, R10
  601. ADCQ R8, R11
  602. ADCQ $0x00, R12
  603. BYTE $0x66
  604. BYTE $0x0f
  605. BYTE $0x3a
  606. BYTE $0x0f
  607. BYTE $0xdb
  608. BYTE $0x0c
  609. BYTE $0x66
  610. BYTE $0x0f
  611. BYTE $0x3a
  612. BYTE $0x0f
  613. BYTE $0xe4
  614. BYTE $0x0c
  615. BYTE $0x66
  616. BYTE $0x0f
  617. BYTE $0x3a
  618. BYTE $0x0f
  619. BYTE $0xed
  620. BYTE $0x0c
  621. BYTE $0x66
  622. BYTE $0x45
  623. BYTE $0x0f
  624. BYTE $0x3a
  625. BYTE $0x0f
  626. BYTE $0xed
  627. BYTE $0x0c
  628. BYTE $0x66
  629. BYTE $0x0f
  630. BYTE $0x3a
  631. BYTE $0x0f
  632. BYTE $0xf6
  633. BYTE $0x08
  634. BYTE $0x66
  635. BYTE $0x0f
  636. BYTE $0x3a
  637. BYTE $0x0f
  638. BYTE $0xff
  639. BYTE $0x08
  640. BYTE $0x66
  641. BYTE $0x45
  642. BYTE $0x0f
  643. BYTE $0x3a
  644. BYTE $0x0f
  645. BYTE $0xc0
  646. BYTE $0x08
  647. BYTE $0x66
  648. BYTE $0x45
  649. BYTE $0x0f
  650. BYTE $0x3a
  651. BYTE $0x0f
  652. BYTE $0xf6
  653. BYTE $0x08
  654. BYTE $0x66
  655. BYTE $0x45
  656. BYTE $0x0f
  657. BYTE $0x3a
  658. BYTE $0x0f
  659. BYTE $0xc9
  660. BYTE $0x04
  661. BYTE $0x66
  662. BYTE $0x45
  663. BYTE $0x0f
  664. BYTE $0x3a
  665. BYTE $0x0f
  666. BYTE $0xd2
  667. BYTE $0x04
  668. BYTE $0x66
  669. BYTE $0x45
  670. BYTE $0x0f
  671. BYTE $0x3a
  672. BYTE $0x0f
  673. BYTE $0xdb
  674. BYTE $0x04
  675. BYTE $0x66
  676. BYTE $0x45
  677. BYTE $0x0f
  678. BYTE $0x3a
  679. BYTE $0x0f
  680. BYTE $0xff
  681. BYTE $0x04
  682. DECQ CX
  683. JGE openSSEInternalLoop
  684. ADDQ (R9), R10
  685. ADCQ 8(R9), R11
  686. ADCQ $0x01, R12
  687. MOVQ (BP), AX
  688. MOVQ AX, R15
  689. MULQ R10
  690. MOVQ AX, R13
  691. MOVQ DX, R14
  692. MOVQ (BP), AX
  693. MULQ R11
  694. IMULQ R12, R15
  695. ADDQ AX, R14
  696. ADCQ DX, R15
  697. MOVQ 8(BP), AX
  698. MOVQ AX, R8
  699. MULQ R10
  700. ADDQ AX, R14
  701. ADCQ $0x00, DX
  702. MOVQ DX, R10
  703. MOVQ 8(BP), AX
  704. MULQ R11
  705. ADDQ AX, R15
  706. ADCQ $0x00, DX
  707. IMULQ R12, R8
  708. ADDQ R10, R15
  709. ADCQ DX, R8
  710. MOVQ R13, R10
  711. MOVQ R14, R11
  712. MOVQ R15, R12
  713. ANDQ $0x03, R12
  714. MOVQ R15, R13
  715. ANDQ $-4, R13
  716. MOVQ R8, R14
  717. SHRQ $0x02, R8, R15
  718. SHRQ $0x02, R8
  719. ADDQ R13, R10
  720. ADCQ R14, R11
  721. ADCQ $0x00, R12
  722. ADDQ R15, R10
  723. ADCQ R8, R11
  724. ADCQ $0x00, R12
  725. LEAQ 16(R9), R9
  726. CMPQ CX, $-6
  727. JG openSSEInternalLoop
  728. // Add in the state
  729. PADDD ·chacha20Constants<>+0(SB), X0
  730. PADDD ·chacha20Constants<>+0(SB), X1
  731. PADDD ·chacha20Constants<>+0(SB), X2
  732. PADDD ·chacha20Constants<>+0(SB), X12
  733. PADDD 32(BP), X3
  734. PADDD 32(BP), X4
  735. PADDD 32(BP), X5
  736. PADDD 32(BP), X13
  737. PADDD 48(BP), X6
  738. PADDD 48(BP), X7
  739. PADDD 48(BP), X8
  740. PADDD 48(BP), X14
  741. PADDD 80(BP), X9
  742. PADDD 96(BP), X10
  743. PADDD 112(BP), X11
  744. PADDD 128(BP), X15
  745. // Load - xor - store
  746. MOVO X15, 64(BP)
  747. MOVOU (SI), X15
  748. PXOR X15, X0
  749. MOVOU X0, (DI)
  750. MOVOU 16(SI), X15
  751. PXOR X15, X3
  752. MOVOU X3, 16(DI)
  753. MOVOU 32(SI), X15
  754. PXOR X15, X6
  755. MOVOU X6, 32(DI)
  756. MOVOU 48(SI), X15
  757. PXOR X15, X9
  758. MOVOU X9, 48(DI)
  759. MOVOU 64(SI), X9
  760. PXOR X9, X1
  761. MOVOU X1, 64(DI)
  762. MOVOU 80(SI), X9
  763. PXOR X9, X4
  764. MOVOU X4, 80(DI)
  765. MOVOU 96(SI), X9
  766. PXOR X9, X7
  767. MOVOU X7, 96(DI)
  768. MOVOU 112(SI), X9
  769. PXOR X9, X10
  770. MOVOU X10, 112(DI)
  771. MOVOU 128(SI), X9
  772. PXOR X9, X2
  773. MOVOU X2, 128(DI)
  774. MOVOU 144(SI), X9
  775. PXOR X9, X5
  776. MOVOU X5, 144(DI)
  777. MOVOU 160(SI), X9
  778. PXOR X9, X8
  779. MOVOU X8, 160(DI)
  780. MOVOU 176(SI), X9
  781. PXOR X9, X11
  782. MOVOU X11, 176(DI)
  783. MOVOU 192(SI), X9
  784. PXOR X9, X12
  785. MOVOU X12, 192(DI)
  786. MOVOU 208(SI), X9
  787. PXOR X9, X13
  788. MOVOU X13, 208(DI)
  789. MOVOU 224(SI), X9
  790. PXOR X9, X14
  791. MOVOU X14, 224(DI)
  792. MOVOU 240(SI), X9
  793. PXOR 64(BP), X9
  794. MOVOU X9, 240(DI)
  795. LEAQ 256(SI), SI
  796. LEAQ 256(DI), DI
  797. SUBQ $0x00000100, BX
  798. JMP openSSEMainLoop
  799. openSSEMainLoopDone:
  800. // Handle the various tail sizes efficiently
  801. TESTQ BX, BX
  802. JE openSSEFinalize
  803. CMPQ BX, $0x40
  804. JBE openSSETail64
  805. CMPQ BX, $0x80
  806. JBE openSSETail128
  807. CMPQ BX, $0xc0
  808. JBE openSSETail192
  809. JMP openSSETail256
  810. openSSEFinalize:
  811. // Hash in the PT, AAD lengths
  812. ADDQ ad_len+80(FP), R10
  813. ADCQ src_len+56(FP), R11
  814. ADCQ $0x01, R12
  815. MOVQ (BP), AX
  816. MOVQ AX, R15
  817. MULQ R10
  818. MOVQ AX, R13
  819. MOVQ DX, R14
  820. MOVQ (BP), AX
  821. MULQ R11
  822. IMULQ R12, R15
  823. ADDQ AX, R14
  824. ADCQ DX, R15
  825. MOVQ 8(BP), AX
  826. MOVQ AX, R8
  827. MULQ R10
  828. ADDQ AX, R14
  829. ADCQ $0x00, DX
  830. MOVQ DX, R10
  831. MOVQ 8(BP), AX
  832. MULQ R11
  833. ADDQ AX, R15
  834. ADCQ $0x00, DX
  835. IMULQ R12, R8
  836. ADDQ R10, R15
  837. ADCQ DX, R8
  838. MOVQ R13, R10
  839. MOVQ R14, R11
  840. MOVQ R15, R12
  841. ANDQ $0x03, R12
  842. MOVQ R15, R13
  843. ANDQ $-4, R13
  844. MOVQ R8, R14
  845. SHRQ $0x02, R8, R15
  846. SHRQ $0x02, R8
  847. ADDQ R13, R10
  848. ADCQ R14, R11
  849. ADCQ $0x00, R12
  850. ADDQ R15, R10
  851. ADCQ R8, R11
  852. ADCQ $0x00, R12
  853. // Final reduce
  854. MOVQ R10, R13
  855. MOVQ R11, R14
  856. MOVQ R12, R15
  857. SUBQ $-5, R10
  858. SBBQ $-1, R11
  859. SBBQ $0x03, R12
  860. CMOVQCS R13, R10
  861. CMOVQCS R14, R11
  862. CMOVQCS R15, R12
  863. // Add in the "s" part of the key
  864. ADDQ 16(BP), R10
  865. ADCQ 24(BP), R11
  866. // Finally, constant time compare to the tag at the end of the message
  867. XORQ AX, AX
  868. MOVQ $0x00000001, DX
  869. XORQ (SI), R10
  870. XORQ 8(SI), R11
  871. ORQ R11, R10
  872. CMOVQEQ DX, AX
  873. // Return true iff tags are equal
  874. MOVB AX, ret+96(FP)
  875. RET
  876. openSSE128:
  877. MOVOU ·chacha20Constants<>+0(SB), X0
  878. MOVOU 16(R8), X3
  879. MOVOU 32(R8), X6
  880. MOVOU 48(R8), X9
  881. MOVO X0, X1
  882. MOVO X3, X4
  883. MOVO X6, X7
  884. MOVO X9, X10
  885. PADDL ·sseIncMask<>+0(SB), X10
  886. MOVO X1, X2
  887. MOVO X4, X5
  888. MOVO X7, X8
  889. MOVO X10, X11
  890. PADDL ·sseIncMask<>+0(SB), X11
  891. MOVO X3, X13
  892. MOVO X6, X14
  893. MOVO X10, X15
  894. MOVQ $0x0000000a, R9
  895. openSSE128InnerCipherLoop:
  896. PADDD X3, X0
  897. PXOR X0, X9
  898. ROL16(X9, X12)
  899. PADDD X9, X6
  900. PXOR X6, X3
  901. MOVO X3, X12
  902. PSLLL $0x0c, X12
  903. PSRLL $0x14, X3
  904. PXOR X12, X3
  905. PADDD X3, X0
  906. PXOR X0, X9
  907. ROL8(X9, X12)
  908. PADDD X9, X6
  909. PXOR X6, X3
  910. MOVO X3, X12
  911. PSLLL $0x07, X12
  912. PSRLL $0x19, X3
  913. PXOR X12, X3
  914. PADDD X4, X1
  915. PXOR X1, X10
  916. ROL16(X10, X12)
  917. PADDD X10, X7
  918. PXOR X7, X4
  919. MOVO X4, X12
  920. PSLLL $0x0c, X12
  921. PSRLL $0x14, X4
  922. PXOR X12, X4
  923. PADDD X4, X1
  924. PXOR X1, X10
  925. ROL8(X10, X12)
  926. PADDD X10, X7
  927. PXOR X7, X4
  928. MOVO X4, X12
  929. PSLLL $0x07, X12
  930. PSRLL $0x19, X4
  931. PXOR X12, X4
  932. PADDD X5, X2
  933. PXOR X2, X11
  934. ROL16(X11, X12)
  935. PADDD X11, X8
  936. PXOR X8, X5
  937. MOVO X5, X12
  938. PSLLL $0x0c, X12
  939. PSRLL $0x14, X5
  940. PXOR X12, X5
  941. PADDD X5, X2
  942. PXOR X2, X11
  943. ROL8(X11, X12)
  944. PADDD X11, X8
  945. PXOR X8, X5
  946. MOVO X5, X12
  947. PSLLL $0x07, X12
  948. PSRLL $0x19, X5
  949. PXOR X12, X5
  950. BYTE $0x66
  951. BYTE $0x0f
  952. BYTE $0x3a
  953. BYTE $0x0f
  954. BYTE $0xdb
  955. BYTE $0x04
  956. BYTE $0x66
  957. BYTE $0x0f
  958. BYTE $0x3a
  959. BYTE $0x0f
  960. BYTE $0xe4
  961. BYTE $0x04
  962. BYTE $0x66
  963. BYTE $0x0f
  964. BYTE $0x3a
  965. BYTE $0x0f
  966. BYTE $0xed
  967. BYTE $0x04
  968. BYTE $0x66
  969. BYTE $0x0f
  970. BYTE $0x3a
  971. BYTE $0x0f
  972. BYTE $0xf6
  973. BYTE $0x08
  974. BYTE $0x66
  975. BYTE $0x0f
  976. BYTE $0x3a
  977. BYTE $0x0f
  978. BYTE $0xff
  979. BYTE $0x08
  980. BYTE $0x66
  981. BYTE $0x45
  982. BYTE $0x0f
  983. BYTE $0x3a
  984. BYTE $0x0f
  985. BYTE $0xc0
  986. BYTE $0x08
  987. BYTE $0x66
  988. BYTE $0x45
  989. BYTE $0x0f
  990. BYTE $0x3a
  991. BYTE $0x0f
  992. BYTE $0xc9
  993. BYTE $0x0c
  994. BYTE $0x66
  995. BYTE $0x45
  996. BYTE $0x0f
  997. BYTE $0x3a
  998. BYTE $0x0f
  999. BYTE $0xd2
  1000. BYTE $0x0c
  1001. BYTE $0x66
  1002. BYTE $0x45
  1003. BYTE $0x0f
  1004. BYTE $0x3a
  1005. BYTE $0x0f
  1006. BYTE $0xdb
  1007. BYTE $0x0c
  1008. PADDD X3, X0
  1009. PXOR X0, X9
  1010. ROL16(X9, X12)
  1011. PADDD X9, X6
  1012. PXOR X6, X3
  1013. MOVO X3, X12
  1014. PSLLL $0x0c, X12
  1015. PSRLL $0x14, X3
  1016. PXOR X12, X3
  1017. PADDD X3, X0
  1018. PXOR X0, X9
  1019. ROL8(X9, X12)
  1020. PADDD X9, X6
  1021. PXOR X6, X3
  1022. MOVO X3, X12
  1023. PSLLL $0x07, X12
  1024. PSRLL $0x19, X3
  1025. PXOR X12, X3
  1026. PADDD X4, X1
  1027. PXOR X1, X10
  1028. ROL16(X10, X12)
  1029. PADDD X10, X7
  1030. PXOR X7, X4
  1031. MOVO X4, X12
  1032. PSLLL $0x0c, X12
  1033. PSRLL $0x14, X4
  1034. PXOR X12, X4
  1035. PADDD X4, X1
  1036. PXOR X1, X10
  1037. ROL8(X10, X12)
  1038. PADDD X10, X7
  1039. PXOR X7, X4
  1040. MOVO X4, X12
  1041. PSLLL $0x07, X12
  1042. PSRLL $0x19, X4
  1043. PXOR X12, X4
  1044. PADDD X5, X2
  1045. PXOR X2, X11
  1046. ROL16(X11, X12)
  1047. PADDD X11, X8
  1048. PXOR X8, X5
  1049. MOVO X5, X12
  1050. PSLLL $0x0c, X12
  1051. PSRLL $0x14, X5
  1052. PXOR X12, X5
  1053. PADDD X5, X2
  1054. PXOR X2, X11
  1055. ROL8(X11, X12)
  1056. PADDD X11, X8
  1057. PXOR X8, X5
  1058. MOVO X5, X12
  1059. PSLLL $0x07, X12
  1060. PSRLL $0x19, X5
  1061. PXOR X12, X5
  1062. BYTE $0x66
  1063. BYTE $0x0f
  1064. BYTE $0x3a
  1065. BYTE $0x0f
  1066. BYTE $0xdb
  1067. BYTE $0x0c
  1068. BYTE $0x66
  1069. BYTE $0x0f
  1070. BYTE $0x3a
  1071. BYTE $0x0f
  1072. BYTE $0xe4
  1073. BYTE $0x0c
  1074. BYTE $0x66
  1075. BYTE $0x0f
  1076. BYTE $0x3a
  1077. BYTE $0x0f
  1078. BYTE $0xed
  1079. BYTE $0x0c
  1080. BYTE $0x66
  1081. BYTE $0x0f
  1082. BYTE $0x3a
  1083. BYTE $0x0f
  1084. BYTE $0xf6
  1085. BYTE $0x08
  1086. BYTE $0x66
  1087. BYTE $0x0f
  1088. BYTE $0x3a
  1089. BYTE $0x0f
  1090. BYTE $0xff
  1091. BYTE $0x08
  1092. BYTE $0x66
  1093. BYTE $0x45
  1094. BYTE $0x0f
  1095. BYTE $0x3a
  1096. BYTE $0x0f
  1097. BYTE $0xc0
  1098. BYTE $0x08
  1099. BYTE $0x66
  1100. BYTE $0x45
  1101. BYTE $0x0f
  1102. BYTE $0x3a
  1103. BYTE $0x0f
  1104. BYTE $0xc9
  1105. BYTE $0x04
  1106. BYTE $0x66
  1107. BYTE $0x45
  1108. BYTE $0x0f
  1109. BYTE $0x3a
  1110. BYTE $0x0f
  1111. BYTE $0xd2
  1112. BYTE $0x04
  1113. BYTE $0x66
  1114. BYTE $0x45
  1115. BYTE $0x0f
  1116. BYTE $0x3a
  1117. BYTE $0x0f
  1118. BYTE $0xdb
  1119. BYTE $0x04
  1120. DECQ R9
  1121. JNE openSSE128InnerCipherLoop
  1122. // A0|B0 hold the Poly1305 32-byte key, C0,D0 can be discarded
  1123. PADDL ·chacha20Constants<>+0(SB), X0
  1124. PADDL ·chacha20Constants<>+0(SB), X1
  1125. PADDL ·chacha20Constants<>+0(SB), X2
  1126. PADDL X13, X3
  1127. PADDL X13, X4
  1128. PADDL X13, X5
  1129. PADDL X14, X7
  1130. PADDL X14, X8
  1131. PADDL X15, X10
  1132. PADDL ·sseIncMask<>+0(SB), X15
  1133. PADDL X15, X11
  1134. // Clamp and store the key
  1135. PAND ·polyClampMask<>+0(SB), X0
  1136. MOVOU X0, (BP)
  1137. MOVOU X3, 16(BP)
  1138. // Hash
  1139. MOVQ ad_len+80(FP), R9
  1140. CALL polyHashADInternal<>(SB)
  1141. openSSE128Open:
  1142. CMPQ BX, $0x10
  1143. JB openSSETail16
  1144. SUBQ $0x10, BX
  1145. // Load for hashing
  1146. ADDQ (SI), R10
  1147. ADCQ 8(SI), R11
  1148. ADCQ $0x01, R12
  1149. // Load for decryption
  1150. MOVOU (SI), X12
  1151. PXOR X12, X1
  1152. MOVOU X1, (DI)
  1153. LEAQ 16(SI), SI
  1154. LEAQ 16(DI), DI
  1155. MOVQ (BP), AX
  1156. MOVQ AX, R15
  1157. MULQ R10
  1158. MOVQ AX, R13
  1159. MOVQ DX, R14
  1160. MOVQ (BP), AX
  1161. MULQ R11
  1162. IMULQ R12, R15
  1163. ADDQ AX, R14
  1164. ADCQ DX, R15
  1165. MOVQ 8(BP), AX
  1166. MOVQ AX, R8
  1167. MULQ R10
  1168. ADDQ AX, R14
  1169. ADCQ $0x00, DX
  1170. MOVQ DX, R10
  1171. MOVQ 8(BP), AX
  1172. MULQ R11
  1173. ADDQ AX, R15
  1174. ADCQ $0x00, DX
  1175. IMULQ R12, R8
  1176. ADDQ R10, R15
  1177. ADCQ DX, R8
  1178. MOVQ R13, R10
  1179. MOVQ R14, R11
  1180. MOVQ R15, R12
  1181. ANDQ $0x03, R12
  1182. MOVQ R15, R13
  1183. ANDQ $-4, R13
  1184. MOVQ R8, R14
  1185. SHRQ $0x02, R8, R15
  1186. SHRQ $0x02, R8
  1187. ADDQ R13, R10
  1188. ADCQ R14, R11
  1189. ADCQ $0x00, R12
  1190. ADDQ R15, R10
  1191. ADCQ R8, R11
  1192. ADCQ $0x00, R12
  1193. // Shift the stream "left"
  1194. MOVO X4, X1
  1195. MOVO X7, X4
  1196. MOVO X10, X7
  1197. MOVO X2, X10
  1198. MOVO X5, X2
  1199. MOVO X8, X5
  1200. MOVO X11, X8
  1201. JMP openSSE128Open
  1202. openSSETail16:
  1203. TESTQ BX, BX
  1204. JE openSSEFinalize
  1205. // We can safely load the CT from the end, because it is padded with the MAC
  1206. MOVQ BX, R9
  1207. SHLQ $0x04, R9
  1208. LEAQ ·andMask<>+0(SB), R13
  1209. MOVOU (SI), X12
  1210. ADDQ BX, SI
  1211. PAND -16(R13)(R9*1), X12
  1212. MOVO X12, 64(BP)
  1213. MOVQ X12, R13
  1214. MOVQ 72(BP), R14
  1215. PXOR X1, X12
  1216. // We can only store one byte at a time, since plaintext can be shorter than 16 bytes
  1217. openSSETail16Store:
  1218. MOVQ X12, R8
  1219. MOVB R8, (DI)
  1220. PSRLDQ $0x01, X12
  1221. INCQ DI
  1222. DECQ BX
  1223. JNE openSSETail16Store
  1224. ADDQ R13, R10
  1225. ADCQ R14, R11
  1226. ADCQ $0x01, R12
  1227. MOVQ (BP), AX
  1228. MOVQ AX, R15
  1229. MULQ R10
  1230. MOVQ AX, R13
  1231. MOVQ DX, R14
  1232. MOVQ (BP), AX
  1233. MULQ R11
  1234. IMULQ R12, R15
  1235. ADDQ AX, R14
  1236. ADCQ DX, R15
  1237. MOVQ 8(BP), AX
  1238. MOVQ AX, R8
  1239. MULQ R10
  1240. ADDQ AX, R14
  1241. ADCQ $0x00, DX
  1242. MOVQ DX, R10
  1243. MOVQ 8(BP), AX
  1244. MULQ R11
  1245. ADDQ AX, R15
  1246. ADCQ $0x00, DX
  1247. IMULQ R12, R8
  1248. ADDQ R10, R15
  1249. ADCQ DX, R8
  1250. MOVQ R13, R10
  1251. MOVQ R14, R11
  1252. MOVQ R15, R12
  1253. ANDQ $0x03, R12
  1254. MOVQ R15, R13
  1255. ANDQ $-4, R13
  1256. MOVQ R8, R14
  1257. SHRQ $0x02, R8, R15
  1258. SHRQ $0x02, R8
  1259. ADDQ R13, R10
  1260. ADCQ R14, R11
  1261. ADCQ $0x00, R12
  1262. ADDQ R15, R10
  1263. ADCQ R8, R11
  1264. ADCQ $0x00, R12
  1265. JMP openSSEFinalize
  1266. openSSETail64:
  1267. MOVO ·chacha20Constants<>+0(SB), X0
  1268. MOVO 32(BP), X3
  1269. MOVO 48(BP), X6
  1270. MOVO 128(BP), X9
  1271. PADDL ·sseIncMask<>+0(SB), X9
  1272. MOVO X9, 80(BP)
  1273. XORQ R9, R9
  1274. MOVQ BX, CX
  1275. CMPQ CX, $0x10
  1276. JB openSSETail64LoopB
  1277. openSSETail64LoopA:
  1278. ADDQ (SI)(R9*1), R10
  1279. ADCQ 8(SI)(R9*1), R11
  1280. ADCQ $0x01, R12
  1281. MOVQ (BP), AX
  1282. MOVQ AX, R15
  1283. MULQ R10
  1284. MOVQ AX, R13
  1285. MOVQ DX, R14
  1286. MOVQ (BP), AX
  1287. MULQ R11
  1288. IMULQ R12, R15
  1289. ADDQ AX, R14
  1290. ADCQ DX, R15
  1291. MOVQ 8(BP), AX
  1292. MOVQ AX, R8
  1293. MULQ R10
  1294. ADDQ AX, R14
  1295. ADCQ $0x00, DX
  1296. MOVQ DX, R10
  1297. MOVQ 8(BP), AX
  1298. MULQ R11
  1299. ADDQ AX, R15
  1300. ADCQ $0x00, DX
  1301. IMULQ R12, R8
  1302. ADDQ R10, R15
  1303. ADCQ DX, R8
  1304. MOVQ R13, R10
  1305. MOVQ R14, R11
  1306. MOVQ R15, R12
  1307. ANDQ $0x03, R12
  1308. MOVQ R15, R13
  1309. ANDQ $-4, R13
  1310. MOVQ R8, R14
  1311. SHRQ $0x02, R8, R15
  1312. SHRQ $0x02, R8
  1313. ADDQ R13, R10
  1314. ADCQ R14, R11
  1315. ADCQ $0x00, R12
  1316. ADDQ R15, R10
  1317. ADCQ R8, R11
  1318. ADCQ $0x00, R12
  1319. SUBQ $0x10, CX
  1320. openSSETail64LoopB:
  1321. ADDQ $0x10, R9
  1322. PADDD X3, X0
  1323. PXOR X0, X9
  1324. ROL16(X9, X12)
  1325. PADDD X9, X6
  1326. PXOR X6, X3
  1327. MOVO X3, X12
  1328. PSLLL $0x0c, X12
  1329. PSRLL $0x14, X3
  1330. PXOR X12, X3
  1331. PADDD X3, X0
  1332. PXOR X0, X9
  1333. ROL8(X9, X12)
  1334. PADDD X9, X6
  1335. PXOR X6, X3
  1336. MOVO X3, X12
  1337. PSLLL $0x07, X12
  1338. PSRLL $0x19, X3
  1339. PXOR X12, X3
  1340. BYTE $0x66
  1341. BYTE $0x0f
  1342. BYTE $0x3a
  1343. BYTE $0x0f
  1344. BYTE $0xdb
  1345. BYTE $0x04
  1346. BYTE $0x66
  1347. BYTE $0x0f
  1348. BYTE $0x3a
  1349. BYTE $0x0f
  1350. BYTE $0xf6
  1351. BYTE $0x08
  1352. BYTE $0x66
  1353. BYTE $0x45
  1354. BYTE $0x0f
  1355. BYTE $0x3a
  1356. BYTE $0x0f
  1357. BYTE $0xc9
  1358. BYTE $0x0c
  1359. PADDD X3, X0
  1360. PXOR X0, X9
  1361. ROL16(X9, X12)
  1362. PADDD X9, X6
  1363. PXOR X6, X3
  1364. MOVO X3, X12
  1365. PSLLL $0x0c, X12
  1366. PSRLL $0x14, X3
  1367. PXOR X12, X3
  1368. PADDD X3, X0
  1369. PXOR X0, X9
  1370. ROL8(X9, X12)
  1371. PADDD X9, X6
  1372. PXOR X6, X3
  1373. MOVO X3, X12
  1374. PSLLL $0x07, X12
  1375. PSRLL $0x19, X3
  1376. PXOR X12, X3
  1377. BYTE $0x66
  1378. BYTE $0x0f
  1379. BYTE $0x3a
  1380. BYTE $0x0f
  1381. BYTE $0xdb
  1382. BYTE $0x0c
  1383. BYTE $0x66
  1384. BYTE $0x0f
  1385. BYTE $0x3a
  1386. BYTE $0x0f
  1387. BYTE $0xf6
  1388. BYTE $0x08
  1389. BYTE $0x66
  1390. BYTE $0x45
  1391. BYTE $0x0f
  1392. BYTE $0x3a
  1393. BYTE $0x0f
  1394. BYTE $0xc9
  1395. BYTE $0x04
  1396. CMPQ CX, $0x10
  1397. JAE openSSETail64LoopA
  1398. CMPQ R9, $0xa0
  1399. JNE openSSETail64LoopB
  1400. PADDL ·chacha20Constants<>+0(SB), X0
  1401. PADDL 32(BP), X3
  1402. PADDL 48(BP), X6
  1403. PADDL 80(BP), X9
  1404. openSSETail64DecLoop:
  1405. CMPQ BX, $0x10
  1406. JB openSSETail64DecLoopDone
  1407. SUBQ $0x10, BX
  1408. MOVOU (SI), X12
  1409. PXOR X12, X0
  1410. MOVOU X0, (DI)
  1411. LEAQ 16(SI), SI
  1412. LEAQ 16(DI), DI
  1413. MOVO X3, X0
  1414. MOVO X6, X3
  1415. MOVO X9, X6
  1416. JMP openSSETail64DecLoop
  1417. openSSETail64DecLoopDone:
  1418. MOVO X0, X1
  1419. JMP openSSETail16
  1420. openSSETail128:
  1421. MOVO ·chacha20Constants<>+0(SB), X1
  1422. MOVO 32(BP), X4
  1423. MOVO 48(BP), X7
  1424. MOVO 128(BP), X10
  1425. PADDL ·sseIncMask<>+0(SB), X10
  1426. MOVO X10, 80(BP)
  1427. MOVO X1, X0
  1428. MOVO X4, X3
  1429. MOVO X7, X6
  1430. MOVO X10, X9
  1431. PADDL ·sseIncMask<>+0(SB), X9
  1432. MOVO X9, 96(BP)
  1433. XORQ R9, R9
  1434. MOVQ BX, CX
  1435. ANDQ $-16, CX
  1436. openSSETail128LoopA:
  1437. ADDQ (SI)(R9*1), R10
  1438. ADCQ 8(SI)(R9*1), R11
  1439. ADCQ $0x01, R12
  1440. MOVQ (BP), AX
  1441. MOVQ AX, R15
  1442. MULQ R10
  1443. MOVQ AX, R13
  1444. MOVQ DX, R14
  1445. MOVQ (BP), AX
  1446. MULQ R11
  1447. IMULQ R12, R15
  1448. ADDQ AX, R14
  1449. ADCQ DX, R15
  1450. MOVQ 8(BP), AX
  1451. MOVQ AX, R8
  1452. MULQ R10
  1453. ADDQ AX, R14
  1454. ADCQ $0x00, DX
  1455. MOVQ DX, R10
  1456. MOVQ 8(BP), AX
  1457. MULQ R11
  1458. ADDQ AX, R15
  1459. ADCQ $0x00, DX
  1460. IMULQ R12, R8
  1461. ADDQ R10, R15
  1462. ADCQ DX, R8
  1463. MOVQ R13, R10
  1464. MOVQ R14, R11
  1465. MOVQ R15, R12
  1466. ANDQ $0x03, R12
  1467. MOVQ R15, R13
  1468. ANDQ $-4, R13
  1469. MOVQ R8, R14
  1470. SHRQ $0x02, R8, R15
  1471. SHRQ $0x02, R8
  1472. ADDQ R13, R10
  1473. ADCQ R14, R11
  1474. ADCQ $0x00, R12
  1475. ADDQ R15, R10
  1476. ADCQ R8, R11
  1477. ADCQ $0x00, R12
  1478. openSSETail128LoopB:
  1479. ADDQ $0x10, R9
  1480. PADDD X3, X0
  1481. PXOR X0, X9
  1482. ROL16(X9, X12)
  1483. PADDD X9, X6
  1484. PXOR X6, X3
  1485. MOVO X3, X12
  1486. PSLLL $0x0c, X12
  1487. PSRLL $0x14, X3
  1488. PXOR X12, X3
  1489. PADDD X3, X0
  1490. PXOR X0, X9
  1491. ROL8(X9, X12)
  1492. PADDD X9, X6
  1493. PXOR X6, X3
  1494. MOVO X3, X12
  1495. PSLLL $0x07, X12
  1496. PSRLL $0x19, X3
  1497. PXOR X12, X3
  1498. PADDD X4, X1
  1499. PXOR X1, X10
  1500. ROL16(X10, X12)
  1501. PADDD X10, X7
  1502. PXOR X7, X4
  1503. MOVO X4, X12
  1504. PSLLL $0x0c, X12
  1505. PSRLL $0x14, X4
  1506. PXOR X12, X4
  1507. PADDD X4, X1
  1508. PXOR X1, X10
  1509. ROL8(X10, X12)
  1510. PADDD X10, X7
  1511. PXOR X7, X4
  1512. MOVO X4, X12
  1513. PSLLL $0x07, X12
  1514. PSRLL $0x19, X4
  1515. PXOR X12, X4
  1516. BYTE $0x66
  1517. BYTE $0x0f
  1518. BYTE $0x3a
  1519. BYTE $0x0f
  1520. BYTE $0xdb
  1521. BYTE $0x04
  1522. BYTE $0x66
  1523. BYTE $0x0f
  1524. BYTE $0x3a
  1525. BYTE $0x0f
  1526. BYTE $0xf6
  1527. BYTE $0x08
  1528. BYTE $0x66
  1529. BYTE $0x45
  1530. BYTE $0x0f
  1531. BYTE $0x3a
  1532. BYTE $0x0f
  1533. BYTE $0xc9
  1534. BYTE $0x0c
  1535. BYTE $0x66
  1536. BYTE $0x0f
  1537. BYTE $0x3a
  1538. BYTE $0x0f
  1539. BYTE $0xe4
  1540. BYTE $0x04
  1541. BYTE $0x66
  1542. BYTE $0x0f
  1543. BYTE $0x3a
  1544. BYTE $0x0f
  1545. BYTE $0xff
  1546. BYTE $0x08
  1547. BYTE $0x66
  1548. BYTE $0x45
  1549. BYTE $0x0f
  1550. BYTE $0x3a
  1551. BYTE $0x0f
  1552. BYTE $0xd2
  1553. BYTE $0x0c
  1554. PADDD X3, X0
  1555. PXOR X0, X9
  1556. ROL16(X9, X12)
  1557. PADDD X9, X6
  1558. PXOR X6, X3
  1559. MOVO X3, X12
  1560. PSLLL $0x0c, X12
  1561. PSRLL $0x14, X3
  1562. PXOR X12, X3
  1563. PADDD X3, X0
  1564. PXOR X0, X9
  1565. ROL8(X9, X12)
  1566. PADDD X9, X6
  1567. PXOR X6, X3
  1568. MOVO X3, X12
  1569. PSLLL $0x07, X12
  1570. PSRLL $0x19, X3
  1571. PXOR X12, X3
  1572. PADDD X4, X1
  1573. PXOR X1, X10
  1574. ROL16(X10, X12)
  1575. PADDD X10, X7
  1576. PXOR X7, X4
  1577. MOVO X4, X12
  1578. PSLLL $0x0c, X12
  1579. PSRLL $0x14, X4
  1580. PXOR X12, X4
  1581. PADDD X4, X1
  1582. PXOR X1, X10
  1583. ROL8(X10, X12)
  1584. PADDD X10, X7
  1585. PXOR X7, X4
  1586. MOVO X4, X12
  1587. PSLLL $0x07, X12
  1588. PSRLL $0x19, X4
  1589. PXOR X12, X4
  1590. BYTE $0x66
  1591. BYTE $0x0f
  1592. BYTE $0x3a
  1593. BYTE $0x0f
  1594. BYTE $0xdb
  1595. BYTE $0x0c
  1596. BYTE $0x66
  1597. BYTE $0x0f
  1598. BYTE $0x3a
  1599. BYTE $0x0f
  1600. BYTE $0xf6
  1601. BYTE $0x08
  1602. BYTE $0x66
  1603. BYTE $0x45
  1604. BYTE $0x0f
  1605. BYTE $0x3a
  1606. BYTE $0x0f
  1607. BYTE $0xc9
  1608. BYTE $0x04
  1609. BYTE $0x66
  1610. BYTE $0x0f
  1611. BYTE $0x3a
  1612. BYTE $0x0f
  1613. BYTE $0xe4
  1614. BYTE $0x0c
  1615. BYTE $0x66
  1616. BYTE $0x0f
  1617. BYTE $0x3a
  1618. BYTE $0x0f
  1619. BYTE $0xff
  1620. BYTE $0x08
  1621. BYTE $0x66
  1622. BYTE $0x45
  1623. BYTE $0x0f
  1624. BYTE $0x3a
  1625. BYTE $0x0f
  1626. BYTE $0xd2
  1627. BYTE $0x04
  1628. CMPQ R9, CX
  1629. JB openSSETail128LoopA
  1630. CMPQ R9, $0xa0
  1631. JNE openSSETail128LoopB
  1632. PADDL ·chacha20Constants<>+0(SB), X0
  1633. PADDL ·chacha20Constants<>+0(SB), X1
  1634. PADDL 32(BP), X3
  1635. PADDL 32(BP), X4
  1636. PADDL 48(BP), X6
  1637. PADDL 48(BP), X7
  1638. PADDL 96(BP), X9
  1639. PADDL 80(BP), X10
  1640. MOVOU (SI), X12
  1641. MOVOU 16(SI), X13
  1642. MOVOU 32(SI), X14
  1643. MOVOU 48(SI), X15
  1644. PXOR X12, X1
  1645. PXOR X13, X4
  1646. PXOR X14, X7
  1647. PXOR X15, X10
  1648. MOVOU X1, (DI)
  1649. MOVOU X4, 16(DI)
  1650. MOVOU X7, 32(DI)
  1651. MOVOU X10, 48(DI)
  1652. SUBQ $0x40, BX
  1653. LEAQ 64(SI), SI
  1654. LEAQ 64(DI), DI
  1655. JMP openSSETail64DecLoop
  1656. openSSETail192:
  1657. MOVO ·chacha20Constants<>+0(SB), X2
  1658. MOVO 32(BP), X5
  1659. MOVO 48(BP), X8
  1660. MOVO 128(BP), X11
  1661. PADDL ·sseIncMask<>+0(SB), X11
  1662. MOVO X11, 80(BP)
  1663. MOVO X2, X1
  1664. MOVO X5, X4
  1665. MOVO X8, X7
  1666. MOVO X11, X10
  1667. PADDL ·sseIncMask<>+0(SB), X10
  1668. MOVO X10, 96(BP)
  1669. MOVO X1, X0
  1670. MOVO X4, X3
  1671. MOVO X7, X6
  1672. MOVO X10, X9
  1673. PADDL ·sseIncMask<>+0(SB), X9
  1674. MOVO X9, 112(BP)
  1675. MOVQ BX, CX
  1676. MOVQ $0x000000a0, R9
  1677. CMPQ CX, $0xa0
  1678. CMOVQGT R9, CX
  1679. ANDQ $-16, CX
  1680. XORQ R9, R9
  1681. openSSLTail192LoopA:
  1682. ADDQ (SI)(R9*1), R10
  1683. ADCQ 8(SI)(R9*1), R11
  1684. ADCQ $0x01, R12
  1685. MOVQ (BP), AX
  1686. MOVQ AX, R15
  1687. MULQ R10
  1688. MOVQ AX, R13
  1689. MOVQ DX, R14
  1690. MOVQ (BP), AX
  1691. MULQ R11
  1692. IMULQ R12, R15
  1693. ADDQ AX, R14
  1694. ADCQ DX, R15
  1695. MOVQ 8(BP), AX
  1696. MOVQ AX, R8
  1697. MULQ R10
  1698. ADDQ AX, R14
  1699. ADCQ $0x00, DX
  1700. MOVQ DX, R10
  1701. MOVQ 8(BP), AX
  1702. MULQ R11
  1703. ADDQ AX, R15
  1704. ADCQ $0x00, DX
  1705. IMULQ R12, R8
  1706. ADDQ R10, R15
  1707. ADCQ DX, R8
  1708. MOVQ R13, R10
  1709. MOVQ R14, R11
  1710. MOVQ R15, R12
  1711. ANDQ $0x03, R12
  1712. MOVQ R15, R13
  1713. ANDQ $-4, R13
  1714. MOVQ R8, R14
  1715. SHRQ $0x02, R8, R15
  1716. SHRQ $0x02, R8
  1717. ADDQ R13, R10
  1718. ADCQ R14, R11
  1719. ADCQ $0x00, R12
  1720. ADDQ R15, R10
  1721. ADCQ R8, R11
  1722. ADCQ $0x00, R12
  1723. openSSLTail192LoopB:
  1724. ADDQ $0x10, R9
  1725. PADDD X3, X0
  1726. PXOR X0, X9
  1727. ROL16(X9, X12)
  1728. PADDD X9, X6
  1729. PXOR X6, X3
  1730. MOVO X3, X12
  1731. PSLLL $0x0c, X12
  1732. PSRLL $0x14, X3
  1733. PXOR X12, X3
  1734. PADDD X3, X0
  1735. PXOR X0, X9
  1736. ROL8(X9, X12)
  1737. PADDD X9, X6
  1738. PXOR X6, X3
  1739. MOVO X3, X12
  1740. PSLLL $0x07, X12
  1741. PSRLL $0x19, X3
  1742. PXOR X12, X3
  1743. PADDD X4, X1
  1744. PXOR X1, X10
  1745. ROL16(X10, X12)
  1746. PADDD X10, X7
  1747. PXOR X7, X4
  1748. MOVO X4, X12
  1749. PSLLL $0x0c, X12
  1750. PSRLL $0x14, X4
  1751. PXOR X12, X4
  1752. PADDD X4, X1
  1753. PXOR X1, X10
  1754. ROL8(X10, X12)
  1755. PADDD X10, X7
  1756. PXOR X7, X4
  1757. MOVO X4, X12
  1758. PSLLL $0x07, X12
  1759. PSRLL $0x19, X4
  1760. PXOR X12, X4
  1761. PADDD X5, X2
  1762. PXOR X2, X11
  1763. ROL16(X11, X12)
  1764. PADDD X11, X8
  1765. PXOR X8, X5
  1766. MOVO X5, X12
  1767. PSLLL $0x0c, X12
  1768. PSRLL $0x14, X5
  1769. PXOR X12, X5
  1770. PADDD X5, X2
  1771. PXOR X2, X11
  1772. ROL8(X11, X12)
  1773. PADDD X11, X8
  1774. PXOR X8, X5
  1775. MOVO X5, X12
  1776. PSLLL $0x07, X12
  1777. PSRLL $0x19, X5
  1778. PXOR X12, X5
  1779. BYTE $0x66
  1780. BYTE $0x0f
  1781. BYTE $0x3a
  1782. BYTE $0x0f
  1783. BYTE $0xdb
  1784. BYTE $0x04
  1785. BYTE $0x66
  1786. BYTE $0x0f
  1787. BYTE $0x3a
  1788. BYTE $0x0f
  1789. BYTE $0xf6
  1790. BYTE $0x08
  1791. BYTE $0x66
  1792. BYTE $0x45
  1793. BYTE $0x0f
  1794. BYTE $0x3a
  1795. BYTE $0x0f
  1796. BYTE $0xc9
  1797. BYTE $0x0c
  1798. BYTE $0x66
  1799. BYTE $0x0f
  1800. BYTE $0x3a
  1801. BYTE $0x0f
  1802. BYTE $0xe4
  1803. BYTE $0x04
  1804. BYTE $0x66
  1805. BYTE $0x0f
  1806. BYTE $0x3a
  1807. BYTE $0x0f
  1808. BYTE $0xff
  1809. BYTE $0x08
  1810. BYTE $0x66
  1811. BYTE $0x45
  1812. BYTE $0x0f
  1813. BYTE $0x3a
  1814. BYTE $0x0f
  1815. BYTE $0xd2
  1816. BYTE $0x0c
  1817. BYTE $0x66
  1818. BYTE $0x0f
  1819. BYTE $0x3a
  1820. BYTE $0x0f
  1821. BYTE $0xed
  1822. BYTE $0x04
  1823. BYTE $0x66
  1824. BYTE $0x45
  1825. BYTE $0x0f
  1826. BYTE $0x3a
  1827. BYTE $0x0f
  1828. BYTE $0xc0
  1829. BYTE $0x08
  1830. BYTE $0x66
  1831. BYTE $0x45
  1832. BYTE $0x0f
  1833. BYTE $0x3a
  1834. BYTE $0x0f
  1835. BYTE $0xdb
  1836. BYTE $0x0c
  1837. PADDD X3, X0
  1838. PXOR X0, X9
  1839. ROL16(X9, X12)
  1840. PADDD X9, X6
  1841. PXOR X6, X3
  1842. MOVO X3, X12
  1843. PSLLL $0x0c, X12
  1844. PSRLL $0x14, X3
  1845. PXOR X12, X3
  1846. PADDD X3, X0
  1847. PXOR X0, X9
  1848. ROL8(X9, X12)
  1849. PADDD X9, X6
  1850. PXOR X6, X3
  1851. MOVO X3, X12
  1852. PSLLL $0x07, X12
  1853. PSRLL $0x19, X3
  1854. PXOR X12, X3
  1855. PADDD X4, X1
  1856. PXOR X1, X10
  1857. ROL16(X10, X12)
  1858. PADDD X10, X7
  1859. PXOR X7, X4
  1860. MOVO X4, X12
  1861. PSLLL $0x0c, X12
  1862. PSRLL $0x14, X4
  1863. PXOR X12, X4
  1864. PADDD X4, X1
  1865. PXOR X1, X10
  1866. ROL8(X10, X12)
  1867. PADDD X10, X7
  1868. PXOR X7, X4
  1869. MOVO X4, X12
  1870. PSLLL $0x07, X12
  1871. PSRLL $0x19, X4
  1872. PXOR X12, X4
  1873. PADDD X5, X2
  1874. PXOR X2, X11
  1875. ROL16(X11, X12)
  1876. PADDD X11, X8
  1877. PXOR X8, X5
  1878. MOVO X5, X12
  1879. PSLLL $0x0c, X12
  1880. PSRLL $0x14, X5
  1881. PXOR X12, X5
  1882. PADDD X5, X2
  1883. PXOR X2, X11
  1884. ROL8(X11, X12)
  1885. PADDD X11, X8
  1886. PXOR X8, X5
  1887. MOVO X5, X12
  1888. PSLLL $0x07, X12
  1889. PSRLL $0x19, X5
  1890. PXOR X12, X5
  1891. BYTE $0x66
  1892. BYTE $0x0f
  1893. BYTE $0x3a
  1894. BYTE $0x0f
  1895. BYTE $0xdb
  1896. BYTE $0x0c
  1897. BYTE $0x66
  1898. BYTE $0x0f
  1899. BYTE $0x3a
  1900. BYTE $0x0f
  1901. BYTE $0xf6
  1902. BYTE $0x08
  1903. BYTE $0x66
  1904. BYTE $0x45
  1905. BYTE $0x0f
  1906. BYTE $0x3a
  1907. BYTE $0x0f
  1908. BYTE $0xc9
  1909. BYTE $0x04
  1910. BYTE $0x66
  1911. BYTE $0x0f
  1912. BYTE $0x3a
  1913. BYTE $0x0f
  1914. BYTE $0xe4
  1915. BYTE $0x0c
  1916. BYTE $0x66
  1917. BYTE $0x0f
  1918. BYTE $0x3a
  1919. BYTE $0x0f
  1920. BYTE $0xff
  1921. BYTE $0x08
  1922. BYTE $0x66
  1923. BYTE $0x45
  1924. BYTE $0x0f
  1925. BYTE $0x3a
  1926. BYTE $0x0f
  1927. BYTE $0xd2
  1928. BYTE $0x04
  1929. BYTE $0x66
  1930. BYTE $0x0f
  1931. BYTE $0x3a
  1932. BYTE $0x0f
  1933. BYTE $0xed
  1934. BYTE $0x0c
  1935. BYTE $0x66
  1936. BYTE $0x45
  1937. BYTE $0x0f
  1938. BYTE $0x3a
  1939. BYTE $0x0f
  1940. BYTE $0xc0
  1941. BYTE $0x08
  1942. BYTE $0x66
  1943. BYTE $0x45
  1944. BYTE $0x0f
  1945. BYTE $0x3a
  1946. BYTE $0x0f
  1947. BYTE $0xdb
  1948. BYTE $0x04
  1949. CMPQ R9, CX
  1950. JB openSSLTail192LoopA
  1951. CMPQ R9, $0xa0
  1952. JNE openSSLTail192LoopB
  1953. CMPQ BX, $0xb0
  1954. JB openSSLTail192Store
  1955. ADDQ 160(SI), R10
  1956. ADCQ 168(SI), R11
  1957. ADCQ $0x01, R12
  1958. MOVQ (BP), AX
  1959. MOVQ AX, R15
  1960. MULQ R10
  1961. MOVQ AX, R13
  1962. MOVQ DX, R14
  1963. MOVQ (BP), AX
  1964. MULQ R11
  1965. IMULQ R12, R15
  1966. ADDQ AX, R14
  1967. ADCQ DX, R15
  1968. MOVQ 8(BP), AX
  1969. MOVQ AX, R8
  1970. MULQ R10
  1971. ADDQ AX, R14
  1972. ADCQ $0x00, DX
  1973. MOVQ DX, R10
  1974. MOVQ 8(BP), AX
  1975. MULQ R11
  1976. ADDQ AX, R15
  1977. ADCQ $0x00, DX
  1978. IMULQ R12, R8
  1979. ADDQ R10, R15
  1980. ADCQ DX, R8
  1981. MOVQ R13, R10
  1982. MOVQ R14, R11
  1983. MOVQ R15, R12
  1984. ANDQ $0x03, R12
  1985. MOVQ R15, R13
  1986. ANDQ $-4, R13
  1987. MOVQ R8, R14
  1988. SHRQ $0x02, R8, R15
  1989. SHRQ $0x02, R8
  1990. ADDQ R13, R10
  1991. ADCQ R14, R11
  1992. ADCQ $0x00, R12
  1993. ADDQ R15, R10
  1994. ADCQ R8, R11
  1995. ADCQ $0x00, R12
  1996. CMPQ BX, $0xc0
  1997. JB openSSLTail192Store
  1998. ADDQ 176(SI), R10
  1999. ADCQ 184(SI), R11
  2000. ADCQ $0x01, R12
  2001. MOVQ (BP), AX
  2002. MOVQ AX, R15
  2003. MULQ R10
  2004. MOVQ AX, R13
  2005. MOVQ DX, R14
  2006. MOVQ (BP), AX
  2007. MULQ R11
  2008. IMULQ R12, R15
  2009. ADDQ AX, R14
  2010. ADCQ DX, R15
  2011. MOVQ 8(BP), AX
  2012. MOVQ AX, R8
  2013. MULQ R10
  2014. ADDQ AX, R14
  2015. ADCQ $0x00, DX
  2016. MOVQ DX, R10
  2017. MOVQ 8(BP), AX
  2018. MULQ R11
  2019. ADDQ AX, R15
  2020. ADCQ $0x00, DX
  2021. IMULQ R12, R8
  2022. ADDQ R10, R15
  2023. ADCQ DX, R8
  2024. MOVQ R13, R10
  2025. MOVQ R14, R11
  2026. MOVQ R15, R12
  2027. ANDQ $0x03, R12
  2028. MOVQ R15, R13
  2029. ANDQ $-4, R13
  2030. MOVQ R8, R14
  2031. SHRQ $0x02, R8, R15
  2032. SHRQ $0x02, R8
  2033. ADDQ R13, R10
  2034. ADCQ R14, R11
  2035. ADCQ $0x00, R12
  2036. ADDQ R15, R10
  2037. ADCQ R8, R11
  2038. ADCQ $0x00, R12
  2039. openSSLTail192Store:
  2040. PADDL ·chacha20Constants<>+0(SB), X0
  2041. PADDL ·chacha20Constants<>+0(SB), X1
  2042. PADDL ·chacha20Constants<>+0(SB), X2
  2043. PADDL 32(BP), X3
  2044. PADDL 32(BP), X4
  2045. PADDL 32(BP), X5
  2046. PADDL 48(BP), X6
  2047. PADDL 48(BP), X7
  2048. PADDL 48(BP), X8
  2049. PADDL 112(BP), X9
  2050. PADDL 96(BP), X10
  2051. PADDL 80(BP), X11
  2052. MOVOU (SI), X12
  2053. MOVOU 16(SI), X13
  2054. MOVOU 32(SI), X14
  2055. MOVOU 48(SI), X15
  2056. PXOR X12, X2
  2057. PXOR X13, X5
  2058. PXOR X14, X8
  2059. PXOR X15, X11
  2060. MOVOU X2, (DI)
  2061. MOVOU X5, 16(DI)
  2062. MOVOU X8, 32(DI)
  2063. MOVOU X11, 48(DI)
  2064. MOVOU 64(SI), X12
  2065. MOVOU 80(SI), X13
  2066. MOVOU 96(SI), X14
  2067. MOVOU 112(SI), X15
  2068. PXOR X12, X1
  2069. PXOR X13, X4
  2070. PXOR X14, X7
  2071. PXOR X15, X10
  2072. MOVOU X1, 64(DI)
  2073. MOVOU X4, 80(DI)
  2074. MOVOU X7, 96(DI)
  2075. MOVOU X10, 112(DI)
  2076. SUBQ $0x80, BX
  2077. LEAQ 128(SI), SI
  2078. LEAQ 128(DI), DI
  2079. JMP openSSETail64DecLoop
  2080. openSSETail256:
  2081. MOVO ·chacha20Constants<>+0(SB), X0
  2082. MOVO 32(BP), X3
  2083. MOVO 48(BP), X6
  2084. MOVO 128(BP), X9
  2085. PADDL ·sseIncMask<>+0(SB), X9
  2086. MOVO X0, X1
  2087. MOVO X3, X4
  2088. MOVO X6, X7
  2089. MOVO X9, X10
  2090. PADDL ·sseIncMask<>+0(SB), X10
  2091. MOVO X1, X2
  2092. MOVO X4, X5
  2093. MOVO X7, X8
  2094. MOVO X10, X11
  2095. PADDL ·sseIncMask<>+0(SB), X11
  2096. MOVO X2, X12
  2097. MOVO X5, X13
  2098. MOVO X8, X14
  2099. MOVO X11, X15
  2100. PADDL ·sseIncMask<>+0(SB), X15
  2101. // Store counters
  2102. MOVO X9, 80(BP)
  2103. MOVO X10, 96(BP)
  2104. MOVO X11, 112(BP)
  2105. MOVO X15, 128(BP)
  2106. XORQ R9, R9
  2107. openSSETail256Loop:
  2108. ADDQ (SI)(R9*1), R10
  2109. ADCQ 8(SI)(R9*1), R11
  2110. ADCQ $0x01, R12
  2111. MOVO X14, 64(BP)
  2112. PADDD X3, X0
  2113. PXOR X0, X9
  2114. ROL16(X9, X14)
  2115. PADDD X9, X6
  2116. PXOR X6, X3
  2117. MOVO X3, X14
  2118. PSLLL $0x0c, X14
  2119. PSRLL $0x14, X3
  2120. PXOR X14, X3
  2121. PADDD X3, X0
  2122. PXOR X0, X9
  2123. ROL8(X9, X14)
  2124. PADDD X9, X6
  2125. PXOR X6, X3
  2126. MOVO X3, X14
  2127. PSLLL $0x07, X14
  2128. PSRLL $0x19, X3
  2129. PXOR X14, X3
  2130. PADDD X4, X1
  2131. PXOR X1, X10
  2132. ROL16(X10, X14)
  2133. PADDD X10, X7
  2134. PXOR X7, X4
  2135. MOVO X4, X14
  2136. PSLLL $0x0c, X14
  2137. PSRLL $0x14, X4
  2138. PXOR X14, X4
  2139. PADDD X4, X1
  2140. PXOR X1, X10
  2141. ROL8(X10, X14)
  2142. PADDD X10, X7
  2143. PXOR X7, X4
  2144. MOVO X4, X14
  2145. PSLLL $0x07, X14
  2146. PSRLL $0x19, X4
  2147. PXOR X14, X4
  2148. PADDD X5, X2
  2149. PXOR X2, X11
  2150. ROL16(X11, X14)
  2151. PADDD X11, X8
  2152. PXOR X8, X5
  2153. MOVO X5, X14
  2154. PSLLL $0x0c, X14
  2155. PSRLL $0x14, X5
  2156. PXOR X14, X5
  2157. PADDD X5, X2
  2158. PXOR X2, X11
  2159. ROL8(X11, X14)
  2160. PADDD X11, X8
  2161. PXOR X8, X5
  2162. MOVO X5, X14
  2163. PSLLL $0x07, X14
  2164. PSRLL $0x19, X5
  2165. PXOR X14, X5
  2166. MOVO 64(BP), X14
  2167. MOVO X7, 64(BP)
  2168. PADDD X13, X12
  2169. PXOR X12, X15
  2170. ROL16(X15, X7)
  2171. PADDD X15, X14
  2172. PXOR X14, X13
  2173. MOVO X13, X7
  2174. PSLLL $0x0c, X7
  2175. PSRLL $0x14, X13
  2176. PXOR X7, X13
  2177. PADDD X13, X12
  2178. PXOR X12, X15
  2179. ROL8(X15, X7)
  2180. PADDD X15, X14
  2181. PXOR X14, X13
  2182. MOVO X13, X7
  2183. PSLLL $0x07, X7
  2184. PSRLL $0x19, X13
  2185. PXOR X7, X13
  2186. MOVO 64(BP), X7
  2187. BYTE $0x66
  2188. BYTE $0x0f
  2189. BYTE $0x3a
  2190. BYTE $0x0f
  2191. BYTE $0xdb
  2192. BYTE $0x04
  2193. BYTE $0x66
  2194. BYTE $0x0f
  2195. BYTE $0x3a
  2196. BYTE $0x0f
  2197. BYTE $0xe4
  2198. BYTE $0x04
  2199. BYTE $0x66
  2200. BYTE $0x0f
  2201. BYTE $0x3a
  2202. BYTE $0x0f
  2203. BYTE $0xed
  2204. BYTE $0x04
  2205. BYTE $0x66
  2206. BYTE $0x45
  2207. BYTE $0x0f
  2208. BYTE $0x3a
  2209. BYTE $0x0f
  2210. BYTE $0xed
  2211. BYTE $0x04
  2212. BYTE $0x66
  2213. BYTE $0x0f
  2214. BYTE $0x3a
  2215. BYTE $0x0f
  2216. BYTE $0xf6
  2217. BYTE $0x08
  2218. BYTE $0x66
  2219. BYTE $0x0f
  2220. BYTE $0x3a
  2221. BYTE $0x0f
  2222. BYTE $0xff
  2223. BYTE $0x08
  2224. BYTE $0x66
  2225. BYTE $0x45
  2226. BYTE $0x0f
  2227. BYTE $0x3a
  2228. BYTE $0x0f
  2229. BYTE $0xc0
  2230. BYTE $0x08
  2231. BYTE $0x66
  2232. BYTE $0x45
  2233. BYTE $0x0f
  2234. BYTE $0x3a
  2235. BYTE $0x0f
  2236. BYTE $0xf6
  2237. BYTE $0x08
  2238. BYTE $0x66
  2239. BYTE $0x45
  2240. BYTE $0x0f
  2241. BYTE $0x3a
  2242. BYTE $0x0f
  2243. BYTE $0xc9
  2244. BYTE $0x0c
  2245. BYTE $0x66
  2246. BYTE $0x45
  2247. BYTE $0x0f
  2248. BYTE $0x3a
  2249. BYTE $0x0f
  2250. BYTE $0xd2
  2251. BYTE $0x0c
  2252. BYTE $0x66
  2253. BYTE $0x45
  2254. BYTE $0x0f
  2255. BYTE $0x3a
  2256. BYTE $0x0f
  2257. BYTE $0xdb
  2258. BYTE $0x0c
  2259. BYTE $0x66
  2260. BYTE $0x45
  2261. BYTE $0x0f
  2262. BYTE $0x3a
  2263. BYTE $0x0f
  2264. BYTE $0xff
  2265. BYTE $0x0c
  2266. MOVQ (BP), AX
  2267. MOVQ AX, R15
  2268. MULQ R10
  2269. MOVQ AX, R13
  2270. MOVQ DX, R14
  2271. MOVQ (BP), AX
  2272. MULQ R11
  2273. IMULQ R12, R15
  2274. ADDQ AX, R14
  2275. ADCQ DX, R15
  2276. MOVQ 8(BP), AX
  2277. MOVQ AX, R8
  2278. MULQ R10
  2279. ADDQ AX, R14
  2280. ADCQ $0x00, DX
  2281. MOVQ DX, R10
  2282. MOVQ 8(BP), AX
  2283. MULQ R11
  2284. ADDQ AX, R15
  2285. ADCQ $0x00, DX
  2286. MOVO X14, 64(BP)
  2287. PADDD X3, X0
  2288. PXOR X0, X9
  2289. ROL16(X9, X14)
  2290. PADDD X9, X6
  2291. PXOR X6, X3
  2292. MOVO X3, X14
  2293. PSLLL $0x0c, X14
  2294. PSRLL $0x14, X3
  2295. PXOR X14, X3
  2296. PADDD X3, X0
  2297. PXOR X0, X9
  2298. ROL8(X9, X14)
  2299. PADDD X9, X6
  2300. PXOR X6, X3
  2301. MOVO X3, X14
  2302. PSLLL $0x07, X14
  2303. PSRLL $0x19, X3
  2304. PXOR X14, X3
  2305. PADDD X4, X1
  2306. PXOR X1, X10
  2307. ROL16(X10, X14)
  2308. PADDD X10, X7
  2309. PXOR X7, X4
  2310. MOVO X4, X14
  2311. PSLLL $0x0c, X14
  2312. PSRLL $0x14, X4
  2313. PXOR X14, X4
  2314. PADDD X4, X1
  2315. PXOR X1, X10
  2316. ROL8(X10, X14)
  2317. PADDD X10, X7
  2318. PXOR X7, X4
  2319. MOVO X4, X14
  2320. PSLLL $0x07, X14
  2321. PSRLL $0x19, X4
  2322. PXOR X14, X4
  2323. PADDD X5, X2
  2324. PXOR X2, X11
  2325. ROL16(X11, X14)
  2326. PADDD X11, X8
  2327. PXOR X8, X5
  2328. MOVO X5, X14
  2329. PSLLL $0x0c, X14
  2330. PSRLL $0x14, X5
  2331. PXOR X14, X5
  2332. PADDD X5, X2
  2333. PXOR X2, X11
  2334. ROL8(X11, X14)
  2335. PADDD X11, X8
  2336. PXOR X8, X5
  2337. MOVO X5, X14
  2338. PSLLL $0x07, X14
  2339. PSRLL $0x19, X5
  2340. PXOR X14, X5
  2341. MOVO 64(BP), X14
  2342. MOVO X7, 64(BP)
  2343. PADDD X13, X12
  2344. PXOR X12, X15
  2345. ROL16(X15, X7)
  2346. PADDD X15, X14
  2347. PXOR X14, X13
  2348. MOVO X13, X7
  2349. PSLLL $0x0c, X7
  2350. PSRLL $0x14, X13
  2351. PXOR X7, X13
  2352. PADDD X13, X12
  2353. PXOR X12, X15
  2354. ROL8(X15, X7)
  2355. PADDD X15, X14
  2356. PXOR X14, X13
  2357. MOVO X13, X7
  2358. PSLLL $0x07, X7
  2359. PSRLL $0x19, X13
  2360. PXOR X7, X13
  2361. MOVO 64(BP), X7
  2362. IMULQ R12, R8
  2363. ADDQ R10, R15
  2364. ADCQ DX, R8
  2365. MOVQ R13, R10
  2366. MOVQ R14, R11
  2367. MOVQ R15, R12
  2368. ANDQ $0x03, R12
  2369. MOVQ R15, R13
  2370. ANDQ $-4, R13
  2371. MOVQ R8, R14
  2372. SHRQ $0x02, R8, R15
  2373. SHRQ $0x02, R8
  2374. ADDQ R13, R10
  2375. ADCQ R14, R11
  2376. ADCQ $0x00, R12
  2377. ADDQ R15, R10
  2378. ADCQ R8, R11
  2379. ADCQ $0x00, R12
  2380. BYTE $0x66
  2381. BYTE $0x0f
  2382. BYTE $0x3a
  2383. BYTE $0x0f
  2384. BYTE $0xdb
  2385. BYTE $0x0c
  2386. BYTE $0x66
  2387. BYTE $0x0f
  2388. BYTE $0x3a
  2389. BYTE $0x0f
  2390. BYTE $0xe4
  2391. BYTE $0x0c
  2392. BYTE $0x66
  2393. BYTE $0x0f
  2394. BYTE $0x3a
  2395. BYTE $0x0f
  2396. BYTE $0xed
  2397. BYTE $0x0c
  2398. BYTE $0x66
  2399. BYTE $0x45
  2400. BYTE $0x0f
  2401. BYTE $0x3a
  2402. BYTE $0x0f
  2403. BYTE $0xed
  2404. BYTE $0x0c
  2405. BYTE $0x66
  2406. BYTE $0x0f
  2407. BYTE $0x3a
  2408. BYTE $0x0f
  2409. BYTE $0xf6
  2410. BYTE $0x08
  2411. BYTE $0x66
  2412. BYTE $0x0f
  2413. BYTE $0x3a
  2414. BYTE $0x0f
  2415. BYTE $0xff
  2416. BYTE $0x08
  2417. BYTE $0x66
  2418. BYTE $0x45
  2419. BYTE $0x0f
  2420. BYTE $0x3a
  2421. BYTE $0x0f
  2422. BYTE $0xc0
  2423. BYTE $0x08
  2424. BYTE $0x66
  2425. BYTE $0x45
  2426. BYTE $0x0f
  2427. BYTE $0x3a
  2428. BYTE $0x0f
  2429. BYTE $0xf6
  2430. BYTE $0x08
  2431. BYTE $0x66
  2432. BYTE $0x45
  2433. BYTE $0x0f
  2434. BYTE $0x3a
  2435. BYTE $0x0f
  2436. BYTE $0xc9
  2437. BYTE $0x04
  2438. BYTE $0x66
  2439. BYTE $0x45
  2440. BYTE $0x0f
  2441. BYTE $0x3a
  2442. BYTE $0x0f
  2443. BYTE $0xd2
  2444. BYTE $0x04
  2445. BYTE $0x66
  2446. BYTE $0x45
  2447. BYTE $0x0f
  2448. BYTE $0x3a
  2449. BYTE $0x0f
  2450. BYTE $0xdb
  2451. BYTE $0x04
  2452. BYTE $0x66
  2453. BYTE $0x45
  2454. BYTE $0x0f
  2455. BYTE $0x3a
  2456. BYTE $0x0f
  2457. BYTE $0xff
  2458. BYTE $0x04
  2459. ADDQ $0x10, R9
  2460. CMPQ R9, $0xa0
  2461. JB openSSETail256Loop
  2462. MOVQ BX, CX
  2463. ANDQ $-16, CX
  2464. openSSETail256HashLoop:
  2465. ADDQ (SI)(R9*1), R10
  2466. ADCQ 8(SI)(R9*1), R11
  2467. ADCQ $0x01, R12
  2468. MOVQ (BP), AX
  2469. MOVQ AX, R15
  2470. MULQ R10
  2471. MOVQ AX, R13
  2472. MOVQ DX, R14
  2473. MOVQ (BP), AX
  2474. MULQ R11
  2475. IMULQ R12, R15
  2476. ADDQ AX, R14
  2477. ADCQ DX, R15
  2478. MOVQ 8(BP), AX
  2479. MOVQ AX, R8
  2480. MULQ R10
  2481. ADDQ AX, R14
  2482. ADCQ $0x00, DX
  2483. MOVQ DX, R10
  2484. MOVQ 8(BP), AX
  2485. MULQ R11
  2486. ADDQ AX, R15
  2487. ADCQ $0x00, DX
  2488. IMULQ R12, R8
  2489. ADDQ R10, R15
  2490. ADCQ DX, R8
  2491. MOVQ R13, R10
  2492. MOVQ R14, R11
  2493. MOVQ R15, R12
  2494. ANDQ $0x03, R12
  2495. MOVQ R15, R13
  2496. ANDQ $-4, R13
  2497. MOVQ R8, R14
  2498. SHRQ $0x02, R8, R15
  2499. SHRQ $0x02, R8
  2500. ADDQ R13, R10
  2501. ADCQ R14, R11
  2502. ADCQ $0x00, R12
  2503. ADDQ R15, R10
  2504. ADCQ R8, R11
  2505. ADCQ $0x00, R12
  2506. ADDQ $0x10, R9
  2507. CMPQ R9, CX
  2508. JB openSSETail256HashLoop
  2509. // Add in the state
  2510. PADDD ·chacha20Constants<>+0(SB), X0
  2511. PADDD ·chacha20Constants<>+0(SB), X1
  2512. PADDD ·chacha20Constants<>+0(SB), X2
  2513. PADDD ·chacha20Constants<>+0(SB), X12
  2514. PADDD 32(BP), X3
  2515. PADDD 32(BP), X4
  2516. PADDD 32(BP), X5
  2517. PADDD 32(BP), X13
  2518. PADDD 48(BP), X6
  2519. PADDD 48(BP), X7
  2520. PADDD 48(BP), X8
  2521. PADDD 48(BP), X14
  2522. PADDD 80(BP), X9
  2523. PADDD 96(BP), X10
  2524. PADDD 112(BP), X11
  2525. PADDD 128(BP), X15
  2526. MOVO X15, 64(BP)
  2527. // Load - xor - store
  2528. MOVOU (SI), X15
  2529. PXOR X15, X0
  2530. MOVOU 16(SI), X15
  2531. PXOR X15, X3
  2532. MOVOU 32(SI), X15
  2533. PXOR X15, X6
  2534. MOVOU 48(SI), X15
  2535. PXOR X15, X9
  2536. MOVOU X0, (DI)
  2537. MOVOU X3, 16(DI)
  2538. MOVOU X6, 32(DI)
  2539. MOVOU X9, 48(DI)
  2540. MOVOU 64(SI), X0
  2541. MOVOU 80(SI), X3
  2542. MOVOU 96(SI), X6
  2543. MOVOU 112(SI), X9
  2544. PXOR X0, X1
  2545. PXOR X3, X4
  2546. PXOR X6, X7
  2547. PXOR X9, X10
  2548. MOVOU X1, 64(DI)
  2549. MOVOU X4, 80(DI)
  2550. MOVOU X7, 96(DI)
  2551. MOVOU X10, 112(DI)
  2552. MOVOU 128(SI), X0
  2553. MOVOU 144(SI), X3
  2554. MOVOU 160(SI), X6
  2555. MOVOU 176(SI), X9
  2556. PXOR X0, X2
  2557. PXOR X3, X5
  2558. PXOR X6, X8
  2559. PXOR X9, X11
  2560. MOVOU X2, 128(DI)
  2561. MOVOU X5, 144(DI)
  2562. MOVOU X8, 160(DI)
  2563. MOVOU X11, 176(DI)
  2564. LEAQ 192(SI), SI
  2565. LEAQ 192(DI), DI
  2566. SUBQ $0xc0, BX
  2567. MOVO X12, X0
  2568. MOVO X13, X3
  2569. MOVO X14, X6
  2570. MOVO 64(BP), X9
  2571. JMP openSSETail64DecLoop
  2572. chacha20Poly1305Open_AVX2:
  2573. VZEROUPPER
  2574. VMOVDQU ·chacha20Constants<>+0(SB), Y0
  2575. BYTE $0xc4
  2576. BYTE $0x42
  2577. BYTE $0x7d
  2578. BYTE $0x5a
  2579. BYTE $0x70
  2580. BYTE $0x10
  2581. BYTE $0xc4
  2582. BYTE $0x42
  2583. BYTE $0x7d
  2584. BYTE $0x5a
  2585. BYTE $0x60
  2586. BYTE $0x20
  2587. BYTE $0xc4
  2588. BYTE $0xc2
  2589. BYTE $0x7d
  2590. BYTE $0x5a
  2591. BYTE $0x60
  2592. BYTE $0x30
  2593. VPADDD ·avx2InitMask<>+0(SB), Y4, Y4
  2594. // Special optimization, for very short buffers
  2595. CMPQ BX, $0xc0
  2596. JBE openAVX2192
  2597. CMPQ BX, $0x00000140
  2598. JBE openAVX2320
  2599. // For the general key prepare the key first - as a byproduct we have 64 bytes of cipher stream
  2600. VMOVDQA Y14, 32(BP)
  2601. VMOVDQA Y12, 64(BP)
  2602. VMOVDQA Y4, 192(BP)
  2603. MOVQ $0x0000000a, R9
  2604. openAVX2PreparePolyKey:
  2605. VPADDD Y14, Y0, Y0
  2606. VPXOR Y0, Y4, Y4
  2607. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  2608. VPADDD Y4, Y12, Y12
  2609. VPXOR Y12, Y14, Y14
  2610. VPSLLD $0x0c, Y14, Y3
  2611. VPSRLD $0x14, Y14, Y14
  2612. VPXOR Y3, Y14, Y14
  2613. VPADDD Y14, Y0, Y0
  2614. VPXOR Y0, Y4, Y4
  2615. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  2616. VPADDD Y4, Y12, Y12
  2617. VPXOR Y12, Y14, Y14
  2618. VPSLLD $0x07, Y14, Y3
  2619. VPSRLD $0x19, Y14, Y14
  2620. VPXOR Y3, Y14, Y14
  2621. VPALIGNR $0x04, Y14, Y14, Y14
  2622. VPALIGNR $0x08, Y12, Y12, Y12
  2623. VPALIGNR $0x0c, Y4, Y4, Y4
  2624. VPADDD Y14, Y0, Y0
  2625. VPXOR Y0, Y4, Y4
  2626. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  2627. VPADDD Y4, Y12, Y12
  2628. VPXOR Y12, Y14, Y14
  2629. VPSLLD $0x0c, Y14, Y3
  2630. VPSRLD $0x14, Y14, Y14
  2631. VPXOR Y3, Y14, Y14
  2632. VPADDD Y14, Y0, Y0
  2633. VPXOR Y0, Y4, Y4
  2634. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  2635. VPADDD Y4, Y12, Y12
  2636. VPXOR Y12, Y14, Y14
  2637. VPSLLD $0x07, Y14, Y3
  2638. VPSRLD $0x19, Y14, Y14
  2639. VPXOR Y3, Y14, Y14
  2640. VPALIGNR $0x0c, Y14, Y14, Y14
  2641. VPALIGNR $0x08, Y12, Y12, Y12
  2642. VPALIGNR $0x04, Y4, Y4, Y4
  2643. DECQ R9
  2644. JNE openAVX2PreparePolyKey
  2645. VPADDD ·chacha20Constants<>+0(SB), Y0, Y0
  2646. VPADDD 32(BP), Y14, Y14
  2647. VPADDD 64(BP), Y12, Y12
  2648. VPADDD 192(BP), Y4, Y4
  2649. VPERM2I128 $0x02, Y0, Y14, Y3
  2650. // Clamp and store poly key
  2651. VPAND ·polyClampMask<>+0(SB), Y3, Y3
  2652. VMOVDQA Y3, (BP)
  2653. // Stream for the first 64 bytes
  2654. VPERM2I128 $0x13, Y0, Y14, Y0
  2655. VPERM2I128 $0x13, Y12, Y4, Y14
  2656. // Hash AD + first 64 bytes
  2657. MOVQ ad_len+80(FP), R9
  2658. CALL polyHashADInternal<>(SB)
  2659. XORQ CX, CX
  2660. openAVX2InitialHash64:
  2661. ADDQ (SI)(CX*1), R10
  2662. ADCQ 8(SI)(CX*1), R11
  2663. ADCQ $0x01, R12
  2664. MOVQ (BP), DX
  2665. MOVQ DX, R15
  2666. MULXQ R10, R13, R14
  2667. IMULQ R12, R15
  2668. MULXQ R11, AX, DX
  2669. ADDQ AX, R14
  2670. ADCQ DX, R15
  2671. MOVQ 8(BP), DX
  2672. MULXQ R10, R10, AX
  2673. ADDQ R10, R14
  2674. MULXQ R11, R11, R8
  2675. ADCQ R11, R15
  2676. ADCQ $0x00, R8
  2677. IMULQ R12, DX
  2678. ADDQ AX, R15
  2679. ADCQ DX, R8
  2680. MOVQ R13, R10
  2681. MOVQ R14, R11
  2682. MOVQ R15, R12
  2683. ANDQ $0x03, R12
  2684. MOVQ R15, R13
  2685. ANDQ $-4, R13
  2686. MOVQ R8, R14
  2687. SHRQ $0x02, R8, R15
  2688. SHRQ $0x02, R8
  2689. ADDQ R13, R10
  2690. ADCQ R14, R11
  2691. ADCQ $0x00, R12
  2692. ADDQ R15, R10
  2693. ADCQ R8, R11
  2694. ADCQ $0x00, R12
  2695. ADDQ $0x10, CX
  2696. CMPQ CX, $0x40
  2697. JNE openAVX2InitialHash64
  2698. // Decrypt the first 64 bytes
  2699. VPXOR (SI), Y0, Y0
  2700. VPXOR 32(SI), Y14, Y14
  2701. VMOVDQU Y0, (DI)
  2702. VMOVDQU Y14, 32(DI)
  2703. LEAQ 64(SI), SI
  2704. LEAQ 64(DI), DI
  2705. SUBQ $0x40, BX
  2706. openAVX2MainLoop:
  2707. CMPQ BX, $0x00000200
  2708. JB openAVX2MainLoopDone
  2709. // Load state, increment counter blocks, store the incremented counters
  2710. VMOVDQU ·chacha20Constants<>+0(SB), Y0
  2711. VMOVDQA Y0, Y5
  2712. VMOVDQA Y0, Y6
  2713. VMOVDQA Y0, Y7
  2714. VMOVDQA 32(BP), Y14
  2715. VMOVDQA Y14, Y9
  2716. VMOVDQA Y14, Y10
  2717. VMOVDQA Y14, Y11
  2718. VMOVDQA 64(BP), Y12
  2719. VMOVDQA Y12, Y13
  2720. VMOVDQA Y12, Y8
  2721. VMOVDQA Y12, Y15
  2722. VMOVDQA 192(BP), Y4
  2723. VPADDD ·avx2IncMask<>+0(SB), Y4, Y4
  2724. VPADDD ·avx2IncMask<>+0(SB), Y4, Y1
  2725. VPADDD ·avx2IncMask<>+0(SB), Y1, Y2
  2726. VPADDD ·avx2IncMask<>+0(SB), Y2, Y3
  2727. VMOVDQA Y4, 96(BP)
  2728. VMOVDQA Y1, 128(BP)
  2729. VMOVDQA Y2, 160(BP)
  2730. VMOVDQA Y3, 192(BP)
  2731. XORQ CX, CX
  2732. openAVX2InternalLoop:
  2733. ADDQ (SI)(CX*1), R10
  2734. ADCQ 8(SI)(CX*1), R11
  2735. ADCQ $0x01, R12
  2736. VPADDD Y14, Y0, Y0
  2737. VPADDD Y9, Y5, Y5
  2738. VPADDD Y10, Y6, Y6
  2739. VPADDD Y11, Y7, Y7
  2740. MOVQ (BP), DX
  2741. MOVQ DX, R15
  2742. MULXQ R10, R13, R14
  2743. IMULQ R12, R15
  2744. MULXQ R11, AX, DX
  2745. ADDQ AX, R14
  2746. ADCQ DX, R15
  2747. VPXOR Y0, Y4, Y4
  2748. VPXOR Y5, Y1, Y1
  2749. VPXOR Y6, Y2, Y2
  2750. VPXOR Y7, Y3, Y3
  2751. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  2752. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  2753. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  2754. VPSHUFB ·rol16<>+0(SB), Y3, Y3
  2755. MOVQ 8(BP), DX
  2756. MULXQ R10, R10, AX
  2757. ADDQ R10, R14
  2758. MULXQ R11, R11, R8
  2759. ADCQ R11, R15
  2760. ADCQ $0x00, R8
  2761. VPADDD Y4, Y12, Y12
  2762. VPADDD Y1, Y13, Y13
  2763. VPADDD Y2, Y8, Y8
  2764. VPADDD Y3, Y15, Y15
  2765. VPXOR Y12, Y14, Y14
  2766. VPXOR Y13, Y9, Y9
  2767. VPXOR Y8, Y10, Y10
  2768. VPXOR Y15, Y11, Y11
  2769. IMULQ R12, DX
  2770. ADDQ AX, R15
  2771. ADCQ DX, R8
  2772. VMOVDQA Y15, 224(BP)
  2773. VPSLLD $0x0c, Y14, Y15
  2774. VPSRLD $0x14, Y14, Y14
  2775. VPXOR Y15, Y14, Y14
  2776. VPSLLD $0x0c, Y9, Y15
  2777. VPSRLD $0x14, Y9, Y9
  2778. VPXOR Y15, Y9, Y9
  2779. VPSLLD $0x0c, Y10, Y15
  2780. VPSRLD $0x14, Y10, Y10
  2781. VPXOR Y15, Y10, Y10
  2782. VPSLLD $0x0c, Y11, Y15
  2783. VPSRLD $0x14, Y11, Y11
  2784. VPXOR Y15, Y11, Y11
  2785. VMOVDQA 224(BP), Y15
  2786. MOVQ R13, R10
  2787. MOVQ R14, R11
  2788. MOVQ R15, R12
  2789. ANDQ $0x03, R12
  2790. MOVQ R15, R13
  2791. ANDQ $-4, R13
  2792. MOVQ R8, R14
  2793. SHRQ $0x02, R8, R15
  2794. SHRQ $0x02, R8
  2795. ADDQ R13, R10
  2796. ADCQ R14, R11
  2797. ADCQ $0x00, R12
  2798. ADDQ R15, R10
  2799. ADCQ R8, R11
  2800. ADCQ $0x00, R12
  2801. VPADDD Y14, Y0, Y0
  2802. VPADDD Y9, Y5, Y5
  2803. VPADDD Y10, Y6, Y6
  2804. VPADDD Y11, Y7, Y7
  2805. VPXOR Y0, Y4, Y4
  2806. VPXOR Y5, Y1, Y1
  2807. VPXOR Y6, Y2, Y2
  2808. VPXOR Y7, Y3, Y3
  2809. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  2810. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  2811. VPSHUFB ·rol8<>+0(SB), Y2, Y2
  2812. VPSHUFB ·rol8<>+0(SB), Y3, Y3
  2813. ADDQ 16(SI)(CX*1), R10
  2814. ADCQ 24(SI)(CX*1), R11
  2815. ADCQ $0x01, R12
  2816. VPADDD Y4, Y12, Y12
  2817. VPADDD Y1, Y13, Y13
  2818. VPADDD Y2, Y8, Y8
  2819. VPADDD Y3, Y15, Y15
  2820. MOVQ (BP), DX
  2821. MOVQ DX, R15
  2822. MULXQ R10, R13, R14
  2823. IMULQ R12, R15
  2824. MULXQ R11, AX, DX
  2825. ADDQ AX, R14
  2826. ADCQ DX, R15
  2827. VPXOR Y12, Y14, Y14
  2828. VPXOR Y13, Y9, Y9
  2829. VPXOR Y8, Y10, Y10
  2830. VPXOR Y15, Y11, Y11
  2831. VMOVDQA Y15, 224(BP)
  2832. VPSLLD $0x07, Y14, Y15
  2833. VPSRLD $0x19, Y14, Y14
  2834. VPXOR Y15, Y14, Y14
  2835. VPSLLD $0x07, Y9, Y15
  2836. VPSRLD $0x19, Y9, Y9
  2837. VPXOR Y15, Y9, Y9
  2838. VPSLLD $0x07, Y10, Y15
  2839. VPSRLD $0x19, Y10, Y10
  2840. VPXOR Y15, Y10, Y10
  2841. VPSLLD $0x07, Y11, Y15
  2842. VPSRLD $0x19, Y11, Y11
  2843. VPXOR Y15, Y11, Y11
  2844. VMOVDQA 224(BP), Y15
  2845. MOVQ 8(BP), DX
  2846. MULXQ R10, R10, AX
  2847. ADDQ R10, R14
  2848. MULXQ R11, R11, R8
  2849. ADCQ R11, R15
  2850. ADCQ $0x00, R8
  2851. VPALIGNR $0x04, Y14, Y14, Y14
  2852. VPALIGNR $0x04, Y9, Y9, Y9
  2853. VPALIGNR $0x04, Y10, Y10, Y10
  2854. VPALIGNR $0x04, Y11, Y11, Y11
  2855. VPALIGNR $0x08, Y12, Y12, Y12
  2856. VPALIGNR $0x08, Y13, Y13, Y13
  2857. VPALIGNR $0x08, Y8, Y8, Y8
  2858. VPALIGNR $0x08, Y15, Y15, Y15
  2859. VPALIGNR $0x0c, Y4, Y4, Y4
  2860. VPALIGNR $0x0c, Y1, Y1, Y1
  2861. VPALIGNR $0x0c, Y2, Y2, Y2
  2862. VPALIGNR $0x0c, Y3, Y3, Y3
  2863. VPADDD Y14, Y0, Y0
  2864. VPADDD Y9, Y5, Y5
  2865. VPADDD Y10, Y6, Y6
  2866. VPADDD Y11, Y7, Y7
  2867. IMULQ R12, DX
  2868. ADDQ AX, R15
  2869. ADCQ DX, R8
  2870. VPXOR Y0, Y4, Y4
  2871. VPXOR Y5, Y1, Y1
  2872. VPXOR Y6, Y2, Y2
  2873. VPXOR Y7, Y3, Y3
  2874. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  2875. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  2876. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  2877. VPSHUFB ·rol16<>+0(SB), Y3, Y3
  2878. MOVQ R13, R10
  2879. MOVQ R14, R11
  2880. MOVQ R15, R12
  2881. ANDQ $0x03, R12
  2882. MOVQ R15, R13
  2883. ANDQ $-4, R13
  2884. MOVQ R8, R14
  2885. SHRQ $0x02, R8, R15
  2886. SHRQ $0x02, R8
  2887. ADDQ R13, R10
  2888. ADCQ R14, R11
  2889. ADCQ $0x00, R12
  2890. ADDQ R15, R10
  2891. ADCQ R8, R11
  2892. ADCQ $0x00, R12
  2893. VPADDD Y4, Y12, Y12
  2894. VPADDD Y1, Y13, Y13
  2895. VPADDD Y2, Y8, Y8
  2896. VPADDD Y3, Y15, Y15
  2897. VPXOR Y12, Y14, Y14
  2898. VPXOR Y13, Y9, Y9
  2899. VPXOR Y8, Y10, Y10
  2900. VPXOR Y15, Y11, Y11
  2901. ADDQ 32(SI)(CX*1), R10
  2902. ADCQ 40(SI)(CX*1), R11
  2903. ADCQ $0x01, R12
  2904. LEAQ 48(CX), CX
  2905. VMOVDQA Y15, 224(BP)
  2906. VPSLLD $0x0c, Y14, Y15
  2907. VPSRLD $0x14, Y14, Y14
  2908. VPXOR Y15, Y14, Y14
  2909. VPSLLD $0x0c, Y9, Y15
  2910. VPSRLD $0x14, Y9, Y9
  2911. VPXOR Y15, Y9, Y9
  2912. VPSLLD $0x0c, Y10, Y15
  2913. VPSRLD $0x14, Y10, Y10
  2914. VPXOR Y15, Y10, Y10
  2915. VPSLLD $0x0c, Y11, Y15
  2916. VPSRLD $0x14, Y11, Y11
  2917. VPXOR Y15, Y11, Y11
  2918. VMOVDQA 224(BP), Y15
  2919. MOVQ (BP), DX
  2920. MOVQ DX, R15
  2921. MULXQ R10, R13, R14
  2922. IMULQ R12, R15
  2923. MULXQ R11, AX, DX
  2924. ADDQ AX, R14
  2925. ADCQ DX, R15
  2926. VPADDD Y14, Y0, Y0
  2927. VPADDD Y9, Y5, Y5
  2928. VPADDD Y10, Y6, Y6
  2929. VPADDD Y11, Y7, Y7
  2930. VPXOR Y0, Y4, Y4
  2931. VPXOR Y5, Y1, Y1
  2932. VPXOR Y6, Y2, Y2
  2933. VPXOR Y7, Y3, Y3
  2934. MOVQ 8(BP), DX
  2935. MULXQ R10, R10, AX
  2936. ADDQ R10, R14
  2937. MULXQ R11, R11, R8
  2938. ADCQ R11, R15
  2939. ADCQ $0x00, R8
  2940. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  2941. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  2942. VPSHUFB ·rol8<>+0(SB), Y2, Y2
  2943. VPSHUFB ·rol8<>+0(SB), Y3, Y3
  2944. VPADDD Y4, Y12, Y12
  2945. VPADDD Y1, Y13, Y13
  2946. VPADDD Y2, Y8, Y8
  2947. VPADDD Y3, Y15, Y15
  2948. IMULQ R12, DX
  2949. ADDQ AX, R15
  2950. ADCQ DX, R8
  2951. VPXOR Y12, Y14, Y14
  2952. VPXOR Y13, Y9, Y9
  2953. VPXOR Y8, Y10, Y10
  2954. VPXOR Y15, Y11, Y11
  2955. VMOVDQA Y15, 224(BP)
  2956. VPSLLD $0x07, Y14, Y15
  2957. VPSRLD $0x19, Y14, Y14
  2958. VPXOR Y15, Y14, Y14
  2959. VPSLLD $0x07, Y9, Y15
  2960. VPSRLD $0x19, Y9, Y9
  2961. VPXOR Y15, Y9, Y9
  2962. VPSLLD $0x07, Y10, Y15
  2963. VPSRLD $0x19, Y10, Y10
  2964. VPXOR Y15, Y10, Y10
  2965. VPSLLD $0x07, Y11, Y15
  2966. VPSRLD $0x19, Y11, Y11
  2967. VPXOR Y15, Y11, Y11
  2968. VMOVDQA 224(BP), Y15
  2969. MOVQ R13, R10
  2970. MOVQ R14, R11
  2971. MOVQ R15, R12
  2972. ANDQ $0x03, R12
  2973. MOVQ R15, R13
  2974. ANDQ $-4, R13
  2975. MOVQ R8, R14
  2976. SHRQ $0x02, R8, R15
  2977. SHRQ $0x02, R8
  2978. ADDQ R13, R10
  2979. ADCQ R14, R11
  2980. ADCQ $0x00, R12
  2981. ADDQ R15, R10
  2982. ADCQ R8, R11
  2983. ADCQ $0x00, R12
  2984. VPALIGNR $0x0c, Y14, Y14, Y14
  2985. VPALIGNR $0x0c, Y9, Y9, Y9
  2986. VPALIGNR $0x0c, Y10, Y10, Y10
  2987. VPALIGNR $0x0c, Y11, Y11, Y11
  2988. VPALIGNR $0x08, Y12, Y12, Y12
  2989. VPALIGNR $0x08, Y13, Y13, Y13
  2990. VPALIGNR $0x08, Y8, Y8, Y8
  2991. VPALIGNR $0x08, Y15, Y15, Y15
  2992. VPALIGNR $0x04, Y4, Y4, Y4
  2993. VPALIGNR $0x04, Y1, Y1, Y1
  2994. VPALIGNR $0x04, Y2, Y2, Y2
  2995. VPALIGNR $0x04, Y3, Y3, Y3
  2996. CMPQ CX, $0x000001e0
  2997. JNE openAVX2InternalLoop
  2998. VPADDD ·chacha20Constants<>+0(SB), Y0, Y0
  2999. VPADDD ·chacha20Constants<>+0(SB), Y5, Y5
  3000. VPADDD ·chacha20Constants<>+0(SB), Y6, Y6
  3001. VPADDD ·chacha20Constants<>+0(SB), Y7, Y7
  3002. VPADDD 32(BP), Y14, Y14
  3003. VPADDD 32(BP), Y9, Y9
  3004. VPADDD 32(BP), Y10, Y10
  3005. VPADDD 32(BP), Y11, Y11
  3006. VPADDD 64(BP), Y12, Y12
  3007. VPADDD 64(BP), Y13, Y13
  3008. VPADDD 64(BP), Y8, Y8
  3009. VPADDD 64(BP), Y15, Y15
  3010. VPADDD 96(BP), Y4, Y4
  3011. VPADDD 128(BP), Y1, Y1
  3012. VPADDD 160(BP), Y2, Y2
  3013. VPADDD 192(BP), Y3, Y3
  3014. VMOVDQA Y15, 224(BP)
  3015. // We only hashed 480 of the 512 bytes available - hash the remaining 32 here
  3016. ADDQ 480(SI), R10
  3017. ADCQ 488(SI), R11
  3018. ADCQ $0x01, R12
  3019. MOVQ (BP), DX
  3020. MOVQ DX, R15
  3021. MULXQ R10, R13, R14
  3022. IMULQ R12, R15
  3023. MULXQ R11, AX, DX
  3024. ADDQ AX, R14
  3025. ADCQ DX, R15
  3026. MOVQ 8(BP), DX
  3027. MULXQ R10, R10, AX
  3028. ADDQ R10, R14
  3029. MULXQ R11, R11, R8
  3030. ADCQ R11, R15
  3031. ADCQ $0x00, R8
  3032. IMULQ R12, DX
  3033. ADDQ AX, R15
  3034. ADCQ DX, R8
  3035. MOVQ R13, R10
  3036. MOVQ R14, R11
  3037. MOVQ R15, R12
  3038. ANDQ $0x03, R12
  3039. MOVQ R15, R13
  3040. ANDQ $-4, R13
  3041. MOVQ R8, R14
  3042. SHRQ $0x02, R8, R15
  3043. SHRQ $0x02, R8
  3044. ADDQ R13, R10
  3045. ADCQ R14, R11
  3046. ADCQ $0x00, R12
  3047. ADDQ R15, R10
  3048. ADCQ R8, R11
  3049. ADCQ $0x00, R12
  3050. VPERM2I128 $0x02, Y0, Y14, Y15
  3051. VPERM2I128 $0x13, Y0, Y14, Y14
  3052. VPERM2I128 $0x02, Y12, Y4, Y0
  3053. VPERM2I128 $0x13, Y12, Y4, Y12
  3054. VPXOR (SI), Y15, Y15
  3055. VPXOR 32(SI), Y0, Y0
  3056. VPXOR 64(SI), Y14, Y14
  3057. VPXOR 96(SI), Y12, Y12
  3058. VMOVDQU Y15, (DI)
  3059. VMOVDQU Y0, 32(DI)
  3060. VMOVDQU Y14, 64(DI)
  3061. VMOVDQU Y12, 96(DI)
  3062. VPERM2I128 $0x02, Y5, Y9, Y0
  3063. VPERM2I128 $0x02, Y13, Y1, Y14
  3064. VPERM2I128 $0x13, Y5, Y9, Y12
  3065. VPERM2I128 $0x13, Y13, Y1, Y4
  3066. VPXOR 128(SI), Y0, Y0
  3067. VPXOR 160(SI), Y14, Y14
  3068. VPXOR 192(SI), Y12, Y12
  3069. VPXOR 224(SI), Y4, Y4
  3070. VMOVDQU Y0, 128(DI)
  3071. VMOVDQU Y14, 160(DI)
  3072. VMOVDQU Y12, 192(DI)
  3073. VMOVDQU Y4, 224(DI)
  3074. // and here
  3075. ADDQ 496(SI), R10
  3076. ADCQ 504(SI), R11
  3077. ADCQ $0x01, R12
  3078. MOVQ (BP), DX
  3079. MOVQ DX, R15
  3080. MULXQ R10, R13, R14
  3081. IMULQ R12, R15
  3082. MULXQ R11, AX, DX
  3083. ADDQ AX, R14
  3084. ADCQ DX, R15
  3085. MOVQ 8(BP), DX
  3086. MULXQ R10, R10, AX
  3087. ADDQ R10, R14
  3088. MULXQ R11, R11, R8
  3089. ADCQ R11, R15
  3090. ADCQ $0x00, R8
  3091. IMULQ R12, DX
  3092. ADDQ AX, R15
  3093. ADCQ DX, R8
  3094. MOVQ R13, R10
  3095. MOVQ R14, R11
  3096. MOVQ R15, R12
  3097. ANDQ $0x03, R12
  3098. MOVQ R15, R13
  3099. ANDQ $-4, R13
  3100. MOVQ R8, R14
  3101. SHRQ $0x02, R8, R15
  3102. SHRQ $0x02, R8
  3103. ADDQ R13, R10
  3104. ADCQ R14, R11
  3105. ADCQ $0x00, R12
  3106. ADDQ R15, R10
  3107. ADCQ R8, R11
  3108. ADCQ $0x00, R12
  3109. VPERM2I128 $0x02, Y6, Y10, Y0
  3110. VPERM2I128 $0x02, Y8, Y2, Y14
  3111. VPERM2I128 $0x13, Y6, Y10, Y12
  3112. VPERM2I128 $0x13, Y8, Y2, Y4
  3113. VPXOR 256(SI), Y0, Y0
  3114. VPXOR 288(SI), Y14, Y14
  3115. VPXOR 320(SI), Y12, Y12
  3116. VPXOR 352(SI), Y4, Y4
  3117. VMOVDQU Y0, 256(DI)
  3118. VMOVDQU Y14, 288(DI)
  3119. VMOVDQU Y12, 320(DI)
  3120. VMOVDQU Y4, 352(DI)
  3121. VPERM2I128 $0x02, Y7, Y11, Y0
  3122. VPERM2I128 $0x02, 224(BP), Y3, Y14
  3123. VPERM2I128 $0x13, Y7, Y11, Y12
  3124. VPERM2I128 $0x13, 224(BP), Y3, Y4
  3125. VPXOR 384(SI), Y0, Y0
  3126. VPXOR 416(SI), Y14, Y14
  3127. VPXOR 448(SI), Y12, Y12
  3128. VPXOR 480(SI), Y4, Y4
  3129. VMOVDQU Y0, 384(DI)
  3130. VMOVDQU Y14, 416(DI)
  3131. VMOVDQU Y12, 448(DI)
  3132. VMOVDQU Y4, 480(DI)
  3133. LEAQ 512(SI), SI
  3134. LEAQ 512(DI), DI
  3135. SUBQ $0x00000200, BX
  3136. JMP openAVX2MainLoop
  3137. openAVX2MainLoopDone:
  3138. // Handle the various tail sizes efficiently
  3139. TESTQ BX, BX
  3140. JE openSSEFinalize
  3141. CMPQ BX, $0x80
  3142. JBE openAVX2Tail128
  3143. CMPQ BX, $0x00000100
  3144. JBE openAVX2Tail256
  3145. CMPQ BX, $0x00000180
  3146. JBE openAVX2Tail384
  3147. JMP openAVX2Tail512
  3148. openAVX2192:
  3149. VMOVDQA Y0, Y5
  3150. VMOVDQA Y14, Y9
  3151. VMOVDQA Y12, Y13
  3152. VPADDD ·avx2IncMask<>+0(SB), Y4, Y1
  3153. VMOVDQA Y0, Y6
  3154. VMOVDQA Y14, Y10
  3155. VMOVDQA Y12, Y8
  3156. VMOVDQA Y4, Y2
  3157. VMOVDQA Y1, Y15
  3158. MOVQ $0x0000000a, R9
  3159. openAVX2192InnerCipherLoop:
  3160. VPADDD Y14, Y0, Y0
  3161. VPXOR Y0, Y4, Y4
  3162. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  3163. VPADDD Y4, Y12, Y12
  3164. VPXOR Y12, Y14, Y14
  3165. VPSLLD $0x0c, Y14, Y3
  3166. VPSRLD $0x14, Y14, Y14
  3167. VPXOR Y3, Y14, Y14
  3168. VPADDD Y14, Y0, Y0
  3169. VPXOR Y0, Y4, Y4
  3170. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  3171. VPADDD Y4, Y12, Y12
  3172. VPXOR Y12, Y14, Y14
  3173. VPSLLD $0x07, Y14, Y3
  3174. VPSRLD $0x19, Y14, Y14
  3175. VPXOR Y3, Y14, Y14
  3176. VPADDD Y9, Y5, Y5
  3177. VPXOR Y5, Y1, Y1
  3178. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  3179. VPADDD Y1, Y13, Y13
  3180. VPXOR Y13, Y9, Y9
  3181. VPSLLD $0x0c, Y9, Y3
  3182. VPSRLD $0x14, Y9, Y9
  3183. VPXOR Y3, Y9, Y9
  3184. VPADDD Y9, Y5, Y5
  3185. VPXOR Y5, Y1, Y1
  3186. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  3187. VPADDD Y1, Y13, Y13
  3188. VPXOR Y13, Y9, Y9
  3189. VPSLLD $0x07, Y9, Y3
  3190. VPSRLD $0x19, Y9, Y9
  3191. VPXOR Y3, Y9, Y9
  3192. VPALIGNR $0x04, Y14, Y14, Y14
  3193. VPALIGNR $0x04, Y9, Y9, Y9
  3194. VPALIGNR $0x08, Y12, Y12, Y12
  3195. VPALIGNR $0x08, Y13, Y13, Y13
  3196. VPALIGNR $0x0c, Y4, Y4, Y4
  3197. VPALIGNR $0x0c, Y1, Y1, Y1
  3198. VPADDD Y14, Y0, Y0
  3199. VPXOR Y0, Y4, Y4
  3200. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  3201. VPADDD Y4, Y12, Y12
  3202. VPXOR Y12, Y14, Y14
  3203. VPSLLD $0x0c, Y14, Y3
  3204. VPSRLD $0x14, Y14, Y14
  3205. VPXOR Y3, Y14, Y14
  3206. VPADDD Y14, Y0, Y0
  3207. VPXOR Y0, Y4, Y4
  3208. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  3209. VPADDD Y4, Y12, Y12
  3210. VPXOR Y12, Y14, Y14
  3211. VPSLLD $0x07, Y14, Y3
  3212. VPSRLD $0x19, Y14, Y14
  3213. VPXOR Y3, Y14, Y14
  3214. VPADDD Y9, Y5, Y5
  3215. VPXOR Y5, Y1, Y1
  3216. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  3217. VPADDD Y1, Y13, Y13
  3218. VPXOR Y13, Y9, Y9
  3219. VPSLLD $0x0c, Y9, Y3
  3220. VPSRLD $0x14, Y9, Y9
  3221. VPXOR Y3, Y9, Y9
  3222. VPADDD Y9, Y5, Y5
  3223. VPXOR Y5, Y1, Y1
  3224. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  3225. VPADDD Y1, Y13, Y13
  3226. VPXOR Y13, Y9, Y9
  3227. VPSLLD $0x07, Y9, Y3
  3228. VPSRLD $0x19, Y9, Y9
  3229. VPXOR Y3, Y9, Y9
  3230. VPALIGNR $0x0c, Y14, Y14, Y14
  3231. VPALIGNR $0x0c, Y9, Y9, Y9
  3232. VPALIGNR $0x08, Y12, Y12, Y12
  3233. VPALIGNR $0x08, Y13, Y13, Y13
  3234. VPALIGNR $0x04, Y4, Y4, Y4
  3235. VPALIGNR $0x04, Y1, Y1, Y1
  3236. DECQ R9
  3237. JNE openAVX2192InnerCipherLoop
  3238. VPADDD Y6, Y0, Y0
  3239. VPADDD Y6, Y5, Y5
  3240. VPADDD Y10, Y14, Y14
  3241. VPADDD Y10, Y9, Y9
  3242. VPADDD Y8, Y12, Y12
  3243. VPADDD Y8, Y13, Y13
  3244. VPADDD Y2, Y4, Y4
  3245. VPADDD Y15, Y1, Y1
  3246. VPERM2I128 $0x02, Y0, Y14, Y3
  3247. // Clamp and store poly key
  3248. VPAND ·polyClampMask<>+0(SB), Y3, Y3
  3249. VMOVDQA Y3, (BP)
  3250. // Stream for up to 192 bytes
  3251. VPERM2I128 $0x13, Y0, Y14, Y0
  3252. VPERM2I128 $0x13, Y12, Y4, Y14
  3253. VPERM2I128 $0x02, Y5, Y9, Y12
  3254. VPERM2I128 $0x02, Y13, Y1, Y4
  3255. VPERM2I128 $0x13, Y5, Y9, Y5
  3256. VPERM2I128 $0x13, Y13, Y1, Y9
  3257. openAVX2ShortOpen:
  3258. // Hash
  3259. MOVQ ad_len+80(FP), R9
  3260. CALL polyHashADInternal<>(SB)
  3261. openAVX2ShortOpenLoop:
  3262. CMPQ BX, $0x20
  3263. JB openAVX2ShortTail32
  3264. SUBQ $0x20, BX
  3265. // Load for hashing
  3266. ADDQ (SI), R10
  3267. ADCQ 8(SI), R11
  3268. ADCQ $0x01, R12
  3269. MOVQ (BP), DX
  3270. MOVQ DX, R15
  3271. MULXQ R10, R13, R14
  3272. IMULQ R12, R15
  3273. MULXQ R11, AX, DX
  3274. ADDQ AX, R14
  3275. ADCQ DX, R15
  3276. MOVQ 8(BP), DX
  3277. MULXQ R10, R10, AX
  3278. ADDQ R10, R14
  3279. MULXQ R11, R11, R8
  3280. ADCQ R11, R15
  3281. ADCQ $0x00, R8
  3282. IMULQ R12, DX
  3283. ADDQ AX, R15
  3284. ADCQ DX, R8
  3285. MOVQ R13, R10
  3286. MOVQ R14, R11
  3287. MOVQ R15, R12
  3288. ANDQ $0x03, R12
  3289. MOVQ R15, R13
  3290. ANDQ $-4, R13
  3291. MOVQ R8, R14
  3292. SHRQ $0x02, R8, R15
  3293. SHRQ $0x02, R8
  3294. ADDQ R13, R10
  3295. ADCQ R14, R11
  3296. ADCQ $0x00, R12
  3297. ADDQ R15, R10
  3298. ADCQ R8, R11
  3299. ADCQ $0x00, R12
  3300. ADDQ 16(SI), R10
  3301. ADCQ 24(SI), R11
  3302. ADCQ $0x01, R12
  3303. MOVQ (BP), DX
  3304. MOVQ DX, R15
  3305. MULXQ R10, R13, R14
  3306. IMULQ R12, R15
  3307. MULXQ R11, AX, DX
  3308. ADDQ AX, R14
  3309. ADCQ DX, R15
  3310. MOVQ 8(BP), DX
  3311. MULXQ R10, R10, AX
  3312. ADDQ R10, R14
  3313. MULXQ R11, R11, R8
  3314. ADCQ R11, R15
  3315. ADCQ $0x00, R8
  3316. IMULQ R12, DX
  3317. ADDQ AX, R15
  3318. ADCQ DX, R8
  3319. MOVQ R13, R10
  3320. MOVQ R14, R11
  3321. MOVQ R15, R12
  3322. ANDQ $0x03, R12
  3323. MOVQ R15, R13
  3324. ANDQ $-4, R13
  3325. MOVQ R8, R14
  3326. SHRQ $0x02, R8, R15
  3327. SHRQ $0x02, R8
  3328. ADDQ R13, R10
  3329. ADCQ R14, R11
  3330. ADCQ $0x00, R12
  3331. ADDQ R15, R10
  3332. ADCQ R8, R11
  3333. ADCQ $0x00, R12
  3334. // Load for decryption
  3335. VPXOR (SI), Y0, Y0
  3336. VMOVDQU Y0, (DI)
  3337. LEAQ 32(SI), SI
  3338. LEAQ 32(DI), DI
  3339. // Shift stream left
  3340. VMOVDQA Y14, Y0
  3341. VMOVDQA Y12, Y14
  3342. VMOVDQA Y4, Y12
  3343. VMOVDQA Y5, Y4
  3344. VMOVDQA Y9, Y5
  3345. VMOVDQA Y13, Y9
  3346. VMOVDQA Y1, Y13
  3347. VMOVDQA Y6, Y1
  3348. VMOVDQA Y10, Y6
  3349. JMP openAVX2ShortOpenLoop
  3350. openAVX2ShortTail32:
  3351. CMPQ BX, $0x10
  3352. VMOVDQA X0, X1
  3353. JB openAVX2ShortDone
  3354. SUBQ $0x10, BX
  3355. // Load for hashing
  3356. ADDQ (SI), R10
  3357. ADCQ 8(SI), R11
  3358. ADCQ $0x01, R12
  3359. MOVQ (BP), DX
  3360. MOVQ DX, R15
  3361. MULXQ R10, R13, R14
  3362. IMULQ R12, R15
  3363. MULXQ R11, AX, DX
  3364. ADDQ AX, R14
  3365. ADCQ DX, R15
  3366. MOVQ 8(BP), DX
  3367. MULXQ R10, R10, AX
  3368. ADDQ R10, R14
  3369. MULXQ R11, R11, R8
  3370. ADCQ R11, R15
  3371. ADCQ $0x00, R8
  3372. IMULQ R12, DX
  3373. ADDQ AX, R15
  3374. ADCQ DX, R8
  3375. MOVQ R13, R10
  3376. MOVQ R14, R11
  3377. MOVQ R15, R12
  3378. ANDQ $0x03, R12
  3379. MOVQ R15, R13
  3380. ANDQ $-4, R13
  3381. MOVQ R8, R14
  3382. SHRQ $0x02, R8, R15
  3383. SHRQ $0x02, R8
  3384. ADDQ R13, R10
  3385. ADCQ R14, R11
  3386. ADCQ $0x00, R12
  3387. ADDQ R15, R10
  3388. ADCQ R8, R11
  3389. ADCQ $0x00, R12
  3390. // Load for decryption
  3391. VPXOR (SI), X0, X12
  3392. VMOVDQU X12, (DI)
  3393. LEAQ 16(SI), SI
  3394. LEAQ 16(DI), DI
  3395. VPERM2I128 $0x11, Y0, Y0, Y0
  3396. VMOVDQA X0, X1
  3397. openAVX2ShortDone:
  3398. VZEROUPPER
  3399. JMP openSSETail16
  3400. openAVX2320:
  3401. VMOVDQA Y0, Y5
  3402. VMOVDQA Y14, Y9
  3403. VMOVDQA Y12, Y13
  3404. VPADDD ·avx2IncMask<>+0(SB), Y4, Y1
  3405. VMOVDQA Y0, Y6
  3406. VMOVDQA Y14, Y10
  3407. VMOVDQA Y12, Y8
  3408. VPADDD ·avx2IncMask<>+0(SB), Y1, Y2
  3409. VMOVDQA Y14, Y7
  3410. VMOVDQA Y12, Y11
  3411. VMOVDQA Y4, Y15
  3412. MOVQ $0x0000000a, R9
  3413. openAVX2320InnerCipherLoop:
  3414. VPADDD Y14, Y0, Y0
  3415. VPXOR Y0, Y4, Y4
  3416. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  3417. VPADDD Y4, Y12, Y12
  3418. VPXOR Y12, Y14, Y14
  3419. VPSLLD $0x0c, Y14, Y3
  3420. VPSRLD $0x14, Y14, Y14
  3421. VPXOR Y3, Y14, Y14
  3422. VPADDD Y14, Y0, Y0
  3423. VPXOR Y0, Y4, Y4
  3424. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  3425. VPADDD Y4, Y12, Y12
  3426. VPXOR Y12, Y14, Y14
  3427. VPSLLD $0x07, Y14, Y3
  3428. VPSRLD $0x19, Y14, Y14
  3429. VPXOR Y3, Y14, Y14
  3430. VPADDD Y9, Y5, Y5
  3431. VPXOR Y5, Y1, Y1
  3432. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  3433. VPADDD Y1, Y13, Y13
  3434. VPXOR Y13, Y9, Y9
  3435. VPSLLD $0x0c, Y9, Y3
  3436. VPSRLD $0x14, Y9, Y9
  3437. VPXOR Y3, Y9, Y9
  3438. VPADDD Y9, Y5, Y5
  3439. VPXOR Y5, Y1, Y1
  3440. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  3441. VPADDD Y1, Y13, Y13
  3442. VPXOR Y13, Y9, Y9
  3443. VPSLLD $0x07, Y9, Y3
  3444. VPSRLD $0x19, Y9, Y9
  3445. VPXOR Y3, Y9, Y9
  3446. VPADDD Y10, Y6, Y6
  3447. VPXOR Y6, Y2, Y2
  3448. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  3449. VPADDD Y2, Y8, Y8
  3450. VPXOR Y8, Y10, Y10
  3451. VPSLLD $0x0c, Y10, Y3
  3452. VPSRLD $0x14, Y10, Y10
  3453. VPXOR Y3, Y10, Y10
  3454. VPADDD Y10, Y6, Y6
  3455. VPXOR Y6, Y2, Y2
  3456. VPSHUFB ·rol8<>+0(SB), Y2, Y2
  3457. VPADDD Y2, Y8, Y8
  3458. VPXOR Y8, Y10, Y10
  3459. VPSLLD $0x07, Y10, Y3
  3460. VPSRLD $0x19, Y10, Y10
  3461. VPXOR Y3, Y10, Y10
  3462. VPALIGNR $0x04, Y14, Y14, Y14
  3463. VPALIGNR $0x04, Y9, Y9, Y9
  3464. VPALIGNR $0x04, Y10, Y10, Y10
  3465. VPALIGNR $0x08, Y12, Y12, Y12
  3466. VPALIGNR $0x08, Y13, Y13, Y13
  3467. VPALIGNR $0x08, Y8, Y8, Y8
  3468. VPALIGNR $0x0c, Y4, Y4, Y4
  3469. VPALIGNR $0x0c, Y1, Y1, Y1
  3470. VPALIGNR $0x0c, Y2, Y2, Y2
  3471. VPADDD Y14, Y0, Y0
  3472. VPXOR Y0, Y4, Y4
  3473. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  3474. VPADDD Y4, Y12, Y12
  3475. VPXOR Y12, Y14, Y14
  3476. VPSLLD $0x0c, Y14, Y3
  3477. VPSRLD $0x14, Y14, Y14
  3478. VPXOR Y3, Y14, Y14
  3479. VPADDD Y14, Y0, Y0
  3480. VPXOR Y0, Y4, Y4
  3481. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  3482. VPADDD Y4, Y12, Y12
  3483. VPXOR Y12, Y14, Y14
  3484. VPSLLD $0x07, Y14, Y3
  3485. VPSRLD $0x19, Y14, Y14
  3486. VPXOR Y3, Y14, Y14
  3487. VPADDD Y9, Y5, Y5
  3488. VPXOR Y5, Y1, Y1
  3489. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  3490. VPADDD Y1, Y13, Y13
  3491. VPXOR Y13, Y9, Y9
  3492. VPSLLD $0x0c, Y9, Y3
  3493. VPSRLD $0x14, Y9, Y9
  3494. VPXOR Y3, Y9, Y9
  3495. VPADDD Y9, Y5, Y5
  3496. VPXOR Y5, Y1, Y1
  3497. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  3498. VPADDD Y1, Y13, Y13
  3499. VPXOR Y13, Y9, Y9
  3500. VPSLLD $0x07, Y9, Y3
  3501. VPSRLD $0x19, Y9, Y9
  3502. VPXOR Y3, Y9, Y9
  3503. VPADDD Y10, Y6, Y6
  3504. VPXOR Y6, Y2, Y2
  3505. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  3506. VPADDD Y2, Y8, Y8
  3507. VPXOR Y8, Y10, Y10
  3508. VPSLLD $0x0c, Y10, Y3
  3509. VPSRLD $0x14, Y10, Y10
  3510. VPXOR Y3, Y10, Y10
  3511. VPADDD Y10, Y6, Y6
  3512. VPXOR Y6, Y2, Y2
  3513. VPSHUFB ·rol8<>+0(SB), Y2, Y2
  3514. VPADDD Y2, Y8, Y8
  3515. VPXOR Y8, Y10, Y10
  3516. VPSLLD $0x07, Y10, Y3
  3517. VPSRLD $0x19, Y10, Y10
  3518. VPXOR Y3, Y10, Y10
  3519. VPALIGNR $0x0c, Y14, Y14, Y14
  3520. VPALIGNR $0x0c, Y9, Y9, Y9
  3521. VPALIGNR $0x0c, Y10, Y10, Y10
  3522. VPALIGNR $0x08, Y12, Y12, Y12
  3523. VPALIGNR $0x08, Y13, Y13, Y13
  3524. VPALIGNR $0x08, Y8, Y8, Y8
  3525. VPALIGNR $0x04, Y4, Y4, Y4
  3526. VPALIGNR $0x04, Y1, Y1, Y1
  3527. VPALIGNR $0x04, Y2, Y2, Y2
  3528. DECQ R9
  3529. JNE openAVX2320InnerCipherLoop
  3530. VMOVDQA ·chacha20Constants<>+0(SB), Y3
  3531. VPADDD Y3, Y0, Y0
  3532. VPADDD Y3, Y5, Y5
  3533. VPADDD Y3, Y6, Y6
  3534. VPADDD Y7, Y14, Y14
  3535. VPADDD Y7, Y9, Y9
  3536. VPADDD Y7, Y10, Y10
  3537. VPADDD Y11, Y12, Y12
  3538. VPADDD Y11, Y13, Y13
  3539. VPADDD Y11, Y8, Y8
  3540. VMOVDQA ·avx2IncMask<>+0(SB), Y3
  3541. VPADDD Y15, Y4, Y4
  3542. VPADDD Y3, Y15, Y15
  3543. VPADDD Y15, Y1, Y1
  3544. VPADDD Y3, Y15, Y15
  3545. VPADDD Y15, Y2, Y2
  3546. // Clamp and store poly key
  3547. VPERM2I128 $0x02, Y0, Y14, Y3
  3548. VPAND ·polyClampMask<>+0(SB), Y3, Y3
  3549. VMOVDQA Y3, (BP)
  3550. // Stream for up to 320 bytes
  3551. VPERM2I128 $0x13, Y0, Y14, Y0
  3552. VPERM2I128 $0x13, Y12, Y4, Y14
  3553. VPERM2I128 $0x02, Y5, Y9, Y12
  3554. VPERM2I128 $0x02, Y13, Y1, Y4
  3555. VPERM2I128 $0x13, Y5, Y9, Y5
  3556. VPERM2I128 $0x13, Y13, Y1, Y9
  3557. VPERM2I128 $0x02, Y6, Y10, Y13
  3558. VPERM2I128 $0x02, Y8, Y2, Y1
  3559. VPERM2I128 $0x13, Y6, Y10, Y6
  3560. VPERM2I128 $0x13, Y8, Y2, Y10
  3561. JMP openAVX2ShortOpen
  3562. openAVX2Tail128:
  3563. // Need to decrypt up to 128 bytes - prepare two blocks
  3564. VMOVDQA ·chacha20Constants<>+0(SB), Y5
  3565. VMOVDQA 32(BP), Y9
  3566. VMOVDQA 64(BP), Y13
  3567. VMOVDQA 192(BP), Y1
  3568. VPADDD ·avx2IncMask<>+0(SB), Y1, Y1
  3569. VMOVDQA Y1, Y4
  3570. XORQ R9, R9
  3571. MOVQ BX, CX
  3572. ANDQ $-16, CX
  3573. TESTQ CX, CX
  3574. JE openAVX2Tail128LoopB
  3575. openAVX2Tail128LoopA:
  3576. ADDQ (SI)(R9*1), R10
  3577. ADCQ 8(SI)(R9*1), R11
  3578. ADCQ $0x01, R12
  3579. MOVQ (BP), DX
  3580. MOVQ DX, R15
  3581. MULXQ R10, R13, R14
  3582. IMULQ R12, R15
  3583. MULXQ R11, AX, DX
  3584. ADDQ AX, R14
  3585. ADCQ DX, R15
  3586. MOVQ 8(BP), DX
  3587. MULXQ R10, R10, AX
  3588. ADDQ R10, R14
  3589. MULXQ R11, R11, R8
  3590. ADCQ R11, R15
  3591. ADCQ $0x00, R8
  3592. IMULQ R12, DX
  3593. ADDQ AX, R15
  3594. ADCQ DX, R8
  3595. MOVQ R13, R10
  3596. MOVQ R14, R11
  3597. MOVQ R15, R12
  3598. ANDQ $0x03, R12
  3599. MOVQ R15, R13
  3600. ANDQ $-4, R13
  3601. MOVQ R8, R14
  3602. SHRQ $0x02, R8, R15
  3603. SHRQ $0x02, R8
  3604. ADDQ R13, R10
  3605. ADCQ R14, R11
  3606. ADCQ $0x00, R12
  3607. ADDQ R15, R10
  3608. ADCQ R8, R11
  3609. ADCQ $0x00, R12
  3610. openAVX2Tail128LoopB:
  3611. ADDQ $0x10, R9
  3612. VPADDD Y9, Y5, Y5
  3613. VPXOR Y5, Y1, Y1
  3614. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  3615. VPADDD Y1, Y13, Y13
  3616. VPXOR Y13, Y9, Y9
  3617. VPSLLD $0x0c, Y9, Y3
  3618. VPSRLD $0x14, Y9, Y9
  3619. VPXOR Y3, Y9, Y9
  3620. VPADDD Y9, Y5, Y5
  3621. VPXOR Y5, Y1, Y1
  3622. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  3623. VPADDD Y1, Y13, Y13
  3624. VPXOR Y13, Y9, Y9
  3625. VPSLLD $0x07, Y9, Y3
  3626. VPSRLD $0x19, Y9, Y9
  3627. VPXOR Y3, Y9, Y9
  3628. VPALIGNR $0x04, Y9, Y9, Y9
  3629. VPALIGNR $0x08, Y13, Y13, Y13
  3630. VPALIGNR $0x0c, Y1, Y1, Y1
  3631. VPADDD Y9, Y5, Y5
  3632. VPXOR Y5, Y1, Y1
  3633. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  3634. VPADDD Y1, Y13, Y13
  3635. VPXOR Y13, Y9, Y9
  3636. VPSLLD $0x0c, Y9, Y3
  3637. VPSRLD $0x14, Y9, Y9
  3638. VPXOR Y3, Y9, Y9
  3639. VPADDD Y9, Y5, Y5
  3640. VPXOR Y5, Y1, Y1
  3641. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  3642. VPADDD Y1, Y13, Y13
  3643. VPXOR Y13, Y9, Y9
  3644. VPSLLD $0x07, Y9, Y3
  3645. VPSRLD $0x19, Y9, Y9
  3646. VPXOR Y3, Y9, Y9
  3647. VPALIGNR $0x0c, Y9, Y9, Y9
  3648. VPALIGNR $0x08, Y13, Y13, Y13
  3649. VPALIGNR $0x04, Y1, Y1, Y1
  3650. CMPQ R9, CX
  3651. JB openAVX2Tail128LoopA
  3652. CMPQ R9, $0xa0
  3653. JNE openAVX2Tail128LoopB
  3654. VPADDD ·chacha20Constants<>+0(SB), Y5, Y5
  3655. VPADDD 32(BP), Y9, Y9
  3656. VPADDD 64(BP), Y13, Y13
  3657. VPADDD Y4, Y1, Y1
  3658. VPERM2I128 $0x02, Y5, Y9, Y0
  3659. VPERM2I128 $0x02, Y13, Y1, Y14
  3660. VPERM2I128 $0x13, Y5, Y9, Y12
  3661. VPERM2I128 $0x13, Y13, Y1, Y4
  3662. openAVX2TailLoop:
  3663. CMPQ BX, $0x20
  3664. JB openAVX2Tail
  3665. SUBQ $0x20, BX
  3666. // Load for decryption
  3667. VPXOR (SI), Y0, Y0
  3668. VMOVDQU Y0, (DI)
  3669. LEAQ 32(SI), SI
  3670. LEAQ 32(DI), DI
  3671. VMOVDQA Y14, Y0
  3672. VMOVDQA Y12, Y14
  3673. VMOVDQA Y4, Y12
  3674. JMP openAVX2TailLoop
  3675. openAVX2Tail:
  3676. CMPQ BX, $0x10
  3677. VMOVDQA X0, X1
  3678. JB openAVX2TailDone
  3679. SUBQ $0x10, BX
  3680. // Load for decryption
  3681. VPXOR (SI), X0, X12
  3682. VMOVDQU X12, (DI)
  3683. LEAQ 16(SI), SI
  3684. LEAQ 16(DI), DI
  3685. VPERM2I128 $0x11, Y0, Y0, Y0
  3686. VMOVDQA X0, X1
  3687. openAVX2TailDone:
  3688. VZEROUPPER
  3689. JMP openSSETail16
  3690. openAVX2Tail256:
  3691. VMOVDQA ·chacha20Constants<>+0(SB), Y0
  3692. VMOVDQA Y0, Y5
  3693. VMOVDQA 32(BP), Y14
  3694. VMOVDQA Y14, Y9
  3695. VMOVDQA 64(BP), Y12
  3696. VMOVDQA Y12, Y13
  3697. VMOVDQA 192(BP), Y4
  3698. VPADDD ·avx2IncMask<>+0(SB), Y4, Y4
  3699. VPADDD ·avx2IncMask<>+0(SB), Y4, Y1
  3700. VMOVDQA Y4, Y7
  3701. VMOVDQA Y1, Y11
  3702. // Compute the number of iterations that will hash data
  3703. MOVQ BX, 224(BP)
  3704. MOVQ BX, CX
  3705. SUBQ $0x80, CX
  3706. SHRQ $0x04, CX
  3707. MOVQ $0x0000000a, R9
  3708. CMPQ CX, $0x0a
  3709. CMOVQGT R9, CX
  3710. MOVQ SI, BX
  3711. XORQ R9, R9
  3712. openAVX2Tail256LoopA:
  3713. ADDQ (BX), R10
  3714. ADCQ 8(BX), R11
  3715. ADCQ $0x01, R12
  3716. MOVQ (BP), DX
  3717. MOVQ DX, R15
  3718. MULXQ R10, R13, R14
  3719. IMULQ R12, R15
  3720. MULXQ R11, AX, DX
  3721. ADDQ AX, R14
  3722. ADCQ DX, R15
  3723. MOVQ 8(BP), DX
  3724. MULXQ R10, R10, AX
  3725. ADDQ R10, R14
  3726. MULXQ R11, R11, R8
  3727. ADCQ R11, R15
  3728. ADCQ $0x00, R8
  3729. IMULQ R12, DX
  3730. ADDQ AX, R15
  3731. ADCQ DX, R8
  3732. MOVQ R13, R10
  3733. MOVQ R14, R11
  3734. MOVQ R15, R12
  3735. ANDQ $0x03, R12
  3736. MOVQ R15, R13
  3737. ANDQ $-4, R13
  3738. MOVQ R8, R14
  3739. SHRQ $0x02, R8, R15
  3740. SHRQ $0x02, R8
  3741. ADDQ R13, R10
  3742. ADCQ R14, R11
  3743. ADCQ $0x00, R12
  3744. ADDQ R15, R10
  3745. ADCQ R8, R11
  3746. ADCQ $0x00, R12
  3747. LEAQ 16(BX), BX
  3748. openAVX2Tail256LoopB:
  3749. VPADDD Y14, Y0, Y0
  3750. VPXOR Y0, Y4, Y4
  3751. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  3752. VPADDD Y4, Y12, Y12
  3753. VPXOR Y12, Y14, Y14
  3754. VPSLLD $0x0c, Y14, Y3
  3755. VPSRLD $0x14, Y14, Y14
  3756. VPXOR Y3, Y14, Y14
  3757. VPADDD Y14, Y0, Y0
  3758. VPXOR Y0, Y4, Y4
  3759. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  3760. VPADDD Y4, Y12, Y12
  3761. VPXOR Y12, Y14, Y14
  3762. VPSLLD $0x07, Y14, Y3
  3763. VPSRLD $0x19, Y14, Y14
  3764. VPXOR Y3, Y14, Y14
  3765. VPADDD Y9, Y5, Y5
  3766. VPXOR Y5, Y1, Y1
  3767. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  3768. VPADDD Y1, Y13, Y13
  3769. VPXOR Y13, Y9, Y9
  3770. VPSLLD $0x0c, Y9, Y3
  3771. VPSRLD $0x14, Y9, Y9
  3772. VPXOR Y3, Y9, Y9
  3773. VPADDD Y9, Y5, Y5
  3774. VPXOR Y5, Y1, Y1
  3775. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  3776. VPADDD Y1, Y13, Y13
  3777. VPXOR Y13, Y9, Y9
  3778. VPSLLD $0x07, Y9, Y3
  3779. VPSRLD $0x19, Y9, Y9
  3780. VPXOR Y3, Y9, Y9
  3781. VPALIGNR $0x04, Y14, Y14, Y14
  3782. VPALIGNR $0x04, Y9, Y9, Y9
  3783. VPALIGNR $0x08, Y12, Y12, Y12
  3784. VPALIGNR $0x08, Y13, Y13, Y13
  3785. VPALIGNR $0x0c, Y4, Y4, Y4
  3786. VPALIGNR $0x0c, Y1, Y1, Y1
  3787. INCQ R9
  3788. VPADDD Y14, Y0, Y0
  3789. VPXOR Y0, Y4, Y4
  3790. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  3791. VPADDD Y4, Y12, Y12
  3792. VPXOR Y12, Y14, Y14
  3793. VPSLLD $0x0c, Y14, Y3
  3794. VPSRLD $0x14, Y14, Y14
  3795. VPXOR Y3, Y14, Y14
  3796. VPADDD Y14, Y0, Y0
  3797. VPXOR Y0, Y4, Y4
  3798. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  3799. VPADDD Y4, Y12, Y12
  3800. VPXOR Y12, Y14, Y14
  3801. VPSLLD $0x07, Y14, Y3
  3802. VPSRLD $0x19, Y14, Y14
  3803. VPXOR Y3, Y14, Y14
  3804. VPADDD Y9, Y5, Y5
  3805. VPXOR Y5, Y1, Y1
  3806. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  3807. VPADDD Y1, Y13, Y13
  3808. VPXOR Y13, Y9, Y9
  3809. VPSLLD $0x0c, Y9, Y3
  3810. VPSRLD $0x14, Y9, Y9
  3811. VPXOR Y3, Y9, Y9
  3812. VPADDD Y9, Y5, Y5
  3813. VPXOR Y5, Y1, Y1
  3814. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  3815. VPADDD Y1, Y13, Y13
  3816. VPXOR Y13, Y9, Y9
  3817. VPSLLD $0x07, Y9, Y3
  3818. VPSRLD $0x19, Y9, Y9
  3819. VPXOR Y3, Y9, Y9
  3820. VPALIGNR $0x0c, Y14, Y14, Y14
  3821. VPALIGNR $0x0c, Y9, Y9, Y9
  3822. VPALIGNR $0x08, Y12, Y12, Y12
  3823. VPALIGNR $0x08, Y13, Y13, Y13
  3824. VPALIGNR $0x04, Y4, Y4, Y4
  3825. VPALIGNR $0x04, Y1, Y1, Y1
  3826. CMPQ R9, CX
  3827. JB openAVX2Tail256LoopA
  3828. CMPQ R9, $0x0a
  3829. JNE openAVX2Tail256LoopB
  3830. MOVQ BX, R9
  3831. SUBQ SI, BX
  3832. MOVQ BX, CX
  3833. MOVQ 224(BP), BX
  3834. openAVX2Tail256Hash:
  3835. ADDQ $0x10, CX
  3836. CMPQ CX, BX
  3837. JGT openAVX2Tail256HashEnd
  3838. ADDQ (R9), R10
  3839. ADCQ 8(R9), R11
  3840. ADCQ $0x01, R12
  3841. MOVQ (BP), DX
  3842. MOVQ DX, R15
  3843. MULXQ R10, R13, R14
  3844. IMULQ R12, R15
  3845. MULXQ R11, AX, DX
  3846. ADDQ AX, R14
  3847. ADCQ DX, R15
  3848. MOVQ 8(BP), DX
  3849. MULXQ R10, R10, AX
  3850. ADDQ R10, R14
  3851. MULXQ R11, R11, R8
  3852. ADCQ R11, R15
  3853. ADCQ $0x00, R8
  3854. IMULQ R12, DX
  3855. ADDQ AX, R15
  3856. ADCQ DX, R8
  3857. MOVQ R13, R10
  3858. MOVQ R14, R11
  3859. MOVQ R15, R12
  3860. ANDQ $0x03, R12
  3861. MOVQ R15, R13
  3862. ANDQ $-4, R13
  3863. MOVQ R8, R14
  3864. SHRQ $0x02, R8, R15
  3865. SHRQ $0x02, R8
  3866. ADDQ R13, R10
  3867. ADCQ R14, R11
  3868. ADCQ $0x00, R12
  3869. ADDQ R15, R10
  3870. ADCQ R8, R11
  3871. ADCQ $0x00, R12
  3872. LEAQ 16(R9), R9
  3873. JMP openAVX2Tail256Hash
  3874. openAVX2Tail256HashEnd:
  3875. VPADDD ·chacha20Constants<>+0(SB), Y0, Y0
  3876. VPADDD ·chacha20Constants<>+0(SB), Y5, Y5
  3877. VPADDD 32(BP), Y14, Y14
  3878. VPADDD 32(BP), Y9, Y9
  3879. VPADDD 64(BP), Y12, Y12
  3880. VPADDD 64(BP), Y13, Y13
  3881. VPADDD Y7, Y4, Y4
  3882. VPADDD Y11, Y1, Y1
  3883. VPERM2I128 $0x02, Y0, Y14, Y6
  3884. VPERM2I128 $0x02, Y12, Y4, Y10
  3885. VPERM2I128 $0x13, Y0, Y14, Y8
  3886. VPERM2I128 $0x13, Y12, Y4, Y2
  3887. VPERM2I128 $0x02, Y5, Y9, Y0
  3888. VPERM2I128 $0x02, Y13, Y1, Y14
  3889. VPERM2I128 $0x13, Y5, Y9, Y12
  3890. VPERM2I128 $0x13, Y13, Y1, Y4
  3891. VPXOR (SI), Y6, Y6
  3892. VPXOR 32(SI), Y10, Y10
  3893. VPXOR 64(SI), Y8, Y8
  3894. VPXOR 96(SI), Y2, Y2
  3895. VMOVDQU Y6, (DI)
  3896. VMOVDQU Y10, 32(DI)
  3897. VMOVDQU Y8, 64(DI)
  3898. VMOVDQU Y2, 96(DI)
  3899. LEAQ 128(SI), SI
  3900. LEAQ 128(DI), DI
  3901. SUBQ $0x80, BX
  3902. JMP openAVX2TailLoop
  3903. openAVX2Tail384:
  3904. // Need to decrypt up to 384 bytes - prepare six blocks
  3905. VMOVDQA ·chacha20Constants<>+0(SB), Y0
  3906. VMOVDQA Y0, Y5
  3907. VMOVDQA Y0, Y6
  3908. VMOVDQA 32(BP), Y14
  3909. VMOVDQA Y14, Y9
  3910. VMOVDQA Y14, Y10
  3911. VMOVDQA 64(BP), Y12
  3912. VMOVDQA Y12, Y13
  3913. VMOVDQA Y12, Y8
  3914. VMOVDQA 192(BP), Y4
  3915. VPADDD ·avx2IncMask<>+0(SB), Y4, Y4
  3916. VPADDD ·avx2IncMask<>+0(SB), Y4, Y1
  3917. VPADDD ·avx2IncMask<>+0(SB), Y1, Y2
  3918. VMOVDQA Y4, 96(BP)
  3919. VMOVDQA Y1, 128(BP)
  3920. VMOVDQA Y2, 160(BP)
  3921. // Compute the number of iterations that will hash two blocks of data
  3922. MOVQ BX, 224(BP)
  3923. MOVQ BX, CX
  3924. SUBQ $0x00000100, CX
  3925. SHRQ $0x04, CX
  3926. ADDQ $0x06, CX
  3927. MOVQ $0x0000000a, R9
  3928. CMPQ CX, $0x0a
  3929. CMOVQGT R9, CX
  3930. MOVQ SI, BX
  3931. XORQ R9, R9
  3932. openAVX2Tail384LoopB:
  3933. ADDQ (BX), R10
  3934. ADCQ 8(BX), R11
  3935. ADCQ $0x01, R12
  3936. MOVQ (BP), DX
  3937. MOVQ DX, R15
  3938. MULXQ R10, R13, R14
  3939. IMULQ R12, R15
  3940. MULXQ R11, AX, DX
  3941. ADDQ AX, R14
  3942. ADCQ DX, R15
  3943. MOVQ 8(BP), DX
  3944. MULXQ R10, R10, AX
  3945. ADDQ R10, R14
  3946. MULXQ R11, R11, R8
  3947. ADCQ R11, R15
  3948. ADCQ $0x00, R8
  3949. IMULQ R12, DX
  3950. ADDQ AX, R15
  3951. ADCQ DX, R8
  3952. MOVQ R13, R10
  3953. MOVQ R14, R11
  3954. MOVQ R15, R12
  3955. ANDQ $0x03, R12
  3956. MOVQ R15, R13
  3957. ANDQ $-4, R13
  3958. MOVQ R8, R14
  3959. SHRQ $0x02, R8, R15
  3960. SHRQ $0x02, R8
  3961. ADDQ R13, R10
  3962. ADCQ R14, R11
  3963. ADCQ $0x00, R12
  3964. ADDQ R15, R10
  3965. ADCQ R8, R11
  3966. ADCQ $0x00, R12
  3967. LEAQ 16(BX), BX
  3968. openAVX2Tail384LoopA:
  3969. VPADDD Y14, Y0, Y0
  3970. VPXOR Y0, Y4, Y4
  3971. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  3972. VPADDD Y4, Y12, Y12
  3973. VPXOR Y12, Y14, Y14
  3974. VPSLLD $0x0c, Y14, Y3
  3975. VPSRLD $0x14, Y14, Y14
  3976. VPXOR Y3, Y14, Y14
  3977. VPADDD Y14, Y0, Y0
  3978. VPXOR Y0, Y4, Y4
  3979. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  3980. VPADDD Y4, Y12, Y12
  3981. VPXOR Y12, Y14, Y14
  3982. VPSLLD $0x07, Y14, Y3
  3983. VPSRLD $0x19, Y14, Y14
  3984. VPXOR Y3, Y14, Y14
  3985. VPADDD Y9, Y5, Y5
  3986. VPXOR Y5, Y1, Y1
  3987. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  3988. VPADDD Y1, Y13, Y13
  3989. VPXOR Y13, Y9, Y9
  3990. VPSLLD $0x0c, Y9, Y3
  3991. VPSRLD $0x14, Y9, Y9
  3992. VPXOR Y3, Y9, Y9
  3993. VPADDD Y9, Y5, Y5
  3994. VPXOR Y5, Y1, Y1
  3995. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  3996. VPADDD Y1, Y13, Y13
  3997. VPXOR Y13, Y9, Y9
  3998. VPSLLD $0x07, Y9, Y3
  3999. VPSRLD $0x19, Y9, Y9
  4000. VPXOR Y3, Y9, Y9
  4001. VPADDD Y10, Y6, Y6
  4002. VPXOR Y6, Y2, Y2
  4003. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  4004. VPADDD Y2, Y8, Y8
  4005. VPXOR Y8, Y10, Y10
  4006. VPSLLD $0x0c, Y10, Y3
  4007. VPSRLD $0x14, Y10, Y10
  4008. VPXOR Y3, Y10, Y10
  4009. VPADDD Y10, Y6, Y6
  4010. VPXOR Y6, Y2, Y2
  4011. VPSHUFB ·rol8<>+0(SB), Y2, Y2
  4012. VPADDD Y2, Y8, Y8
  4013. VPXOR Y8, Y10, Y10
  4014. VPSLLD $0x07, Y10, Y3
  4015. VPSRLD $0x19, Y10, Y10
  4016. VPXOR Y3, Y10, Y10
  4017. VPALIGNR $0x04, Y14, Y14, Y14
  4018. VPALIGNR $0x04, Y9, Y9, Y9
  4019. VPALIGNR $0x04, Y10, Y10, Y10
  4020. VPALIGNR $0x08, Y12, Y12, Y12
  4021. VPALIGNR $0x08, Y13, Y13, Y13
  4022. VPALIGNR $0x08, Y8, Y8, Y8
  4023. VPALIGNR $0x0c, Y4, Y4, Y4
  4024. VPALIGNR $0x0c, Y1, Y1, Y1
  4025. VPALIGNR $0x0c, Y2, Y2, Y2
  4026. ADDQ (BX), R10
  4027. ADCQ 8(BX), R11
  4028. ADCQ $0x01, R12
  4029. MOVQ (BP), DX
  4030. MOVQ DX, R15
  4031. MULXQ R10, R13, R14
  4032. IMULQ R12, R15
  4033. MULXQ R11, AX, DX
  4034. ADDQ AX, R14
  4035. ADCQ DX, R15
  4036. MOVQ 8(BP), DX
  4037. MULXQ R10, R10, AX
  4038. ADDQ R10, R14
  4039. MULXQ R11, R11, R8
  4040. ADCQ R11, R15
  4041. ADCQ $0x00, R8
  4042. IMULQ R12, DX
  4043. ADDQ AX, R15
  4044. ADCQ DX, R8
  4045. MOVQ R13, R10
  4046. MOVQ R14, R11
  4047. MOVQ R15, R12
  4048. ANDQ $0x03, R12
  4049. MOVQ R15, R13
  4050. ANDQ $-4, R13
  4051. MOVQ R8, R14
  4052. SHRQ $0x02, R8, R15
  4053. SHRQ $0x02, R8
  4054. ADDQ R13, R10
  4055. ADCQ R14, R11
  4056. ADCQ $0x00, R12
  4057. ADDQ R15, R10
  4058. ADCQ R8, R11
  4059. ADCQ $0x00, R12
  4060. LEAQ 16(BX), BX
  4061. INCQ R9
  4062. VPADDD Y14, Y0, Y0
  4063. VPXOR Y0, Y4, Y4
  4064. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  4065. VPADDD Y4, Y12, Y12
  4066. VPXOR Y12, Y14, Y14
  4067. VPSLLD $0x0c, Y14, Y3
  4068. VPSRLD $0x14, Y14, Y14
  4069. VPXOR Y3, Y14, Y14
  4070. VPADDD Y14, Y0, Y0
  4071. VPXOR Y0, Y4, Y4
  4072. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  4073. VPADDD Y4, Y12, Y12
  4074. VPXOR Y12, Y14, Y14
  4075. VPSLLD $0x07, Y14, Y3
  4076. VPSRLD $0x19, Y14, Y14
  4077. VPXOR Y3, Y14, Y14
  4078. VPADDD Y9, Y5, Y5
  4079. VPXOR Y5, Y1, Y1
  4080. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  4081. VPADDD Y1, Y13, Y13
  4082. VPXOR Y13, Y9, Y9
  4083. VPSLLD $0x0c, Y9, Y3
  4084. VPSRLD $0x14, Y9, Y9
  4085. VPXOR Y3, Y9, Y9
  4086. VPADDD Y9, Y5, Y5
  4087. VPXOR Y5, Y1, Y1
  4088. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  4089. VPADDD Y1, Y13, Y13
  4090. VPXOR Y13, Y9, Y9
  4091. VPSLLD $0x07, Y9, Y3
  4092. VPSRLD $0x19, Y9, Y9
  4093. VPXOR Y3, Y9, Y9
  4094. VPADDD Y10, Y6, Y6
  4095. VPXOR Y6, Y2, Y2
  4096. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  4097. VPADDD Y2, Y8, Y8
  4098. VPXOR Y8, Y10, Y10
  4099. VPSLLD $0x0c, Y10, Y3
  4100. VPSRLD $0x14, Y10, Y10
  4101. VPXOR Y3, Y10, Y10
  4102. VPADDD Y10, Y6, Y6
  4103. VPXOR Y6, Y2, Y2
  4104. VPSHUFB ·rol8<>+0(SB), Y2, Y2
  4105. VPADDD Y2, Y8, Y8
  4106. VPXOR Y8, Y10, Y10
  4107. VPSLLD $0x07, Y10, Y3
  4108. VPSRLD $0x19, Y10, Y10
  4109. VPXOR Y3, Y10, Y10
  4110. VPALIGNR $0x0c, Y14, Y14, Y14
  4111. VPALIGNR $0x0c, Y9, Y9, Y9
  4112. VPALIGNR $0x0c, Y10, Y10, Y10
  4113. VPALIGNR $0x08, Y12, Y12, Y12
  4114. VPALIGNR $0x08, Y13, Y13, Y13
  4115. VPALIGNR $0x08, Y8, Y8, Y8
  4116. VPALIGNR $0x04, Y4, Y4, Y4
  4117. VPALIGNR $0x04, Y1, Y1, Y1
  4118. VPALIGNR $0x04, Y2, Y2, Y2
  4119. CMPQ R9, CX
  4120. JB openAVX2Tail384LoopB
  4121. CMPQ R9, $0x0a
  4122. JNE openAVX2Tail384LoopA
  4123. MOVQ BX, R9
  4124. SUBQ SI, BX
  4125. MOVQ BX, CX
  4126. MOVQ 224(BP), BX
  4127. openAVX2Tail384Hash:
  4128. ADDQ $0x10, CX
  4129. CMPQ CX, BX
  4130. JGT openAVX2Tail384HashEnd
  4131. ADDQ (R9), R10
  4132. ADCQ 8(R9), R11
  4133. ADCQ $0x01, R12
  4134. MOVQ (BP), DX
  4135. MOVQ DX, R15
  4136. MULXQ R10, R13, R14
  4137. IMULQ R12, R15
  4138. MULXQ R11, AX, DX
  4139. ADDQ AX, R14
  4140. ADCQ DX, R15
  4141. MOVQ 8(BP), DX
  4142. MULXQ R10, R10, AX
  4143. ADDQ R10, R14
  4144. MULXQ R11, R11, R8
  4145. ADCQ R11, R15
  4146. ADCQ $0x00, R8
  4147. IMULQ R12, DX
  4148. ADDQ AX, R15
  4149. ADCQ DX, R8
  4150. MOVQ R13, R10
  4151. MOVQ R14, R11
  4152. MOVQ R15, R12
  4153. ANDQ $0x03, R12
  4154. MOVQ R15, R13
  4155. ANDQ $-4, R13
  4156. MOVQ R8, R14
  4157. SHRQ $0x02, R8, R15
  4158. SHRQ $0x02, R8
  4159. ADDQ R13, R10
  4160. ADCQ R14, R11
  4161. ADCQ $0x00, R12
  4162. ADDQ R15, R10
  4163. ADCQ R8, R11
  4164. ADCQ $0x00, R12
  4165. LEAQ 16(R9), R9
  4166. JMP openAVX2Tail384Hash
  4167. openAVX2Tail384HashEnd:
  4168. VPADDD ·chacha20Constants<>+0(SB), Y0, Y0
  4169. VPADDD ·chacha20Constants<>+0(SB), Y5, Y5
  4170. VPADDD ·chacha20Constants<>+0(SB), Y6, Y6
  4171. VPADDD 32(BP), Y14, Y14
  4172. VPADDD 32(BP), Y9, Y9
  4173. VPADDD 32(BP), Y10, Y10
  4174. VPADDD 64(BP), Y12, Y12
  4175. VPADDD 64(BP), Y13, Y13
  4176. VPADDD 64(BP), Y8, Y8
  4177. VPADDD 96(BP), Y4, Y4
  4178. VPADDD 128(BP), Y1, Y1
  4179. VPADDD 160(BP), Y2, Y2
  4180. VPERM2I128 $0x02, Y0, Y14, Y3
  4181. VPERM2I128 $0x02, Y12, Y4, Y7
  4182. VPERM2I128 $0x13, Y0, Y14, Y11
  4183. VPERM2I128 $0x13, Y12, Y4, Y15
  4184. VPXOR (SI), Y3, Y3
  4185. VPXOR 32(SI), Y7, Y7
  4186. VPXOR 64(SI), Y11, Y11
  4187. VPXOR 96(SI), Y15, Y15
  4188. VMOVDQU Y3, (DI)
  4189. VMOVDQU Y7, 32(DI)
  4190. VMOVDQU Y11, 64(DI)
  4191. VMOVDQU Y15, 96(DI)
  4192. VPERM2I128 $0x02, Y5, Y9, Y3
  4193. VPERM2I128 $0x02, Y13, Y1, Y7
  4194. VPERM2I128 $0x13, Y5, Y9, Y11
  4195. VPERM2I128 $0x13, Y13, Y1, Y15
  4196. VPXOR 128(SI), Y3, Y3
  4197. VPXOR 160(SI), Y7, Y7
  4198. VPXOR 192(SI), Y11, Y11
  4199. VPXOR 224(SI), Y15, Y15
  4200. VMOVDQU Y3, 128(DI)
  4201. VMOVDQU Y7, 160(DI)
  4202. VMOVDQU Y11, 192(DI)
  4203. VMOVDQU Y15, 224(DI)
  4204. VPERM2I128 $0x02, Y6, Y10, Y0
  4205. VPERM2I128 $0x02, Y8, Y2, Y14
  4206. VPERM2I128 $0x13, Y6, Y10, Y12
  4207. VPERM2I128 $0x13, Y8, Y2, Y4
  4208. LEAQ 256(SI), SI
  4209. LEAQ 256(DI), DI
  4210. SUBQ $0x00000100, BX
  4211. JMP openAVX2TailLoop
  4212. openAVX2Tail512:
  4213. VMOVDQU ·chacha20Constants<>+0(SB), Y0
  4214. VMOVDQA Y0, Y5
  4215. VMOVDQA Y0, Y6
  4216. VMOVDQA Y0, Y7
  4217. VMOVDQA 32(BP), Y14
  4218. VMOVDQA Y14, Y9
  4219. VMOVDQA Y14, Y10
  4220. VMOVDQA Y14, Y11
  4221. VMOVDQA 64(BP), Y12
  4222. VMOVDQA Y12, Y13
  4223. VMOVDQA Y12, Y8
  4224. VMOVDQA Y12, Y15
  4225. VMOVDQA 192(BP), Y4
  4226. VPADDD ·avx2IncMask<>+0(SB), Y4, Y4
  4227. VPADDD ·avx2IncMask<>+0(SB), Y4, Y1
  4228. VPADDD ·avx2IncMask<>+0(SB), Y1, Y2
  4229. VPADDD ·avx2IncMask<>+0(SB), Y2, Y3
  4230. VMOVDQA Y4, 96(BP)
  4231. VMOVDQA Y1, 128(BP)
  4232. VMOVDQA Y2, 160(BP)
  4233. VMOVDQA Y3, 192(BP)
  4234. XORQ CX, CX
  4235. MOVQ SI, R9
  4236. openAVX2Tail512LoopB:
  4237. ADDQ (R9), R10
  4238. ADCQ 8(R9), R11
  4239. ADCQ $0x01, R12
  4240. MOVQ (BP), DX
  4241. MOVQ DX, R15
  4242. MULXQ R10, R13, R14
  4243. IMULQ R12, R15
  4244. MULXQ R11, AX, DX
  4245. ADDQ AX, R14
  4246. ADCQ DX, R15
  4247. MOVQ 8(BP), DX
  4248. MULXQ R10, R10, AX
  4249. ADDQ R10, R14
  4250. MULXQ R11, R11, R8
  4251. ADCQ R11, R15
  4252. ADCQ $0x00, R8
  4253. IMULQ R12, DX
  4254. ADDQ AX, R15
  4255. ADCQ DX, R8
  4256. MOVQ R13, R10
  4257. MOVQ R14, R11
  4258. MOVQ R15, R12
  4259. ANDQ $0x03, R12
  4260. MOVQ R15, R13
  4261. ANDQ $-4, R13
  4262. MOVQ R8, R14
  4263. SHRQ $0x02, R8, R15
  4264. SHRQ $0x02, R8
  4265. ADDQ R13, R10
  4266. ADCQ R14, R11
  4267. ADCQ $0x00, R12
  4268. ADDQ R15, R10
  4269. ADCQ R8, R11
  4270. ADCQ $0x00, R12
  4271. LEAQ 16(R9), R9
  4272. openAVX2Tail512LoopA:
  4273. VPADDD Y14, Y0, Y0
  4274. VPADDD Y9, Y5, Y5
  4275. VPADDD Y10, Y6, Y6
  4276. VPADDD Y11, Y7, Y7
  4277. VPXOR Y0, Y4, Y4
  4278. VPXOR Y5, Y1, Y1
  4279. VPXOR Y6, Y2, Y2
  4280. VPXOR Y7, Y3, Y3
  4281. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  4282. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  4283. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  4284. VPSHUFB ·rol16<>+0(SB), Y3, Y3
  4285. VPADDD Y4, Y12, Y12
  4286. VPADDD Y1, Y13, Y13
  4287. VPADDD Y2, Y8, Y8
  4288. VPADDD Y3, Y15, Y15
  4289. VPXOR Y12, Y14, Y14
  4290. VPXOR Y13, Y9, Y9
  4291. VPXOR Y8, Y10, Y10
  4292. VPXOR Y15, Y11, Y11
  4293. VMOVDQA Y15, 224(BP)
  4294. VPSLLD $0x0c, Y14, Y15
  4295. VPSRLD $0x14, Y14, Y14
  4296. VPXOR Y15, Y14, Y14
  4297. VPSLLD $0x0c, Y9, Y15
  4298. VPSRLD $0x14, Y9, Y9
  4299. VPXOR Y15, Y9, Y9
  4300. VPSLLD $0x0c, Y10, Y15
  4301. VPSRLD $0x14, Y10, Y10
  4302. VPXOR Y15, Y10, Y10
  4303. VPSLLD $0x0c, Y11, Y15
  4304. VPSRLD $0x14, Y11, Y11
  4305. VPXOR Y15, Y11, Y11
  4306. VMOVDQA 224(BP), Y15
  4307. ADDQ (R9), R10
  4308. ADCQ 8(R9), R11
  4309. ADCQ $0x01, R12
  4310. MOVQ (BP), DX
  4311. MOVQ DX, R15
  4312. MULXQ R10, R13, R14
  4313. IMULQ R12, R15
  4314. MULXQ R11, AX, DX
  4315. ADDQ AX, R14
  4316. ADCQ DX, R15
  4317. MOVQ 8(BP), DX
  4318. MULXQ R10, R10, AX
  4319. ADDQ R10, R14
  4320. MULXQ R11, R11, R8
  4321. ADCQ R11, R15
  4322. ADCQ $0x00, R8
  4323. IMULQ R12, DX
  4324. ADDQ AX, R15
  4325. ADCQ DX, R8
  4326. MOVQ R13, R10
  4327. MOVQ R14, R11
  4328. MOVQ R15, R12
  4329. ANDQ $0x03, R12
  4330. MOVQ R15, R13
  4331. ANDQ $-4, R13
  4332. MOVQ R8, R14
  4333. SHRQ $0x02, R8, R15
  4334. SHRQ $0x02, R8
  4335. ADDQ R13, R10
  4336. ADCQ R14, R11
  4337. ADCQ $0x00, R12
  4338. ADDQ R15, R10
  4339. ADCQ R8, R11
  4340. ADCQ $0x00, R12
  4341. VPADDD Y14, Y0, Y0
  4342. VPADDD Y9, Y5, Y5
  4343. VPADDD Y10, Y6, Y6
  4344. VPADDD Y11, Y7, Y7
  4345. VPXOR Y0, Y4, Y4
  4346. VPXOR Y5, Y1, Y1
  4347. VPXOR Y6, Y2, Y2
  4348. VPXOR Y7, Y3, Y3
  4349. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  4350. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  4351. VPSHUFB ·rol8<>+0(SB), Y2, Y2
  4352. VPSHUFB ·rol8<>+0(SB), Y3, Y3
  4353. VPADDD Y4, Y12, Y12
  4354. VPADDD Y1, Y13, Y13
  4355. VPADDD Y2, Y8, Y8
  4356. VPADDD Y3, Y15, Y15
  4357. VPXOR Y12, Y14, Y14
  4358. VPXOR Y13, Y9, Y9
  4359. VPXOR Y8, Y10, Y10
  4360. VPXOR Y15, Y11, Y11
  4361. VMOVDQA Y15, 224(BP)
  4362. VPSLLD $0x07, Y14, Y15
  4363. VPSRLD $0x19, Y14, Y14
  4364. VPXOR Y15, Y14, Y14
  4365. VPSLLD $0x07, Y9, Y15
  4366. VPSRLD $0x19, Y9, Y9
  4367. VPXOR Y15, Y9, Y9
  4368. VPSLLD $0x07, Y10, Y15
  4369. VPSRLD $0x19, Y10, Y10
  4370. VPXOR Y15, Y10, Y10
  4371. VPSLLD $0x07, Y11, Y15
  4372. VPSRLD $0x19, Y11, Y11
  4373. VPXOR Y15, Y11, Y11
  4374. VMOVDQA 224(BP), Y15
  4375. VPALIGNR $0x04, Y14, Y14, Y14
  4376. VPALIGNR $0x04, Y9, Y9, Y9
  4377. VPALIGNR $0x04, Y10, Y10, Y10
  4378. VPALIGNR $0x04, Y11, Y11, Y11
  4379. VPALIGNR $0x08, Y12, Y12, Y12
  4380. VPALIGNR $0x08, Y13, Y13, Y13
  4381. VPALIGNR $0x08, Y8, Y8, Y8
  4382. VPALIGNR $0x08, Y15, Y15, Y15
  4383. VPALIGNR $0x0c, Y4, Y4, Y4
  4384. VPALIGNR $0x0c, Y1, Y1, Y1
  4385. VPALIGNR $0x0c, Y2, Y2, Y2
  4386. VPALIGNR $0x0c, Y3, Y3, Y3
  4387. VPADDD Y14, Y0, Y0
  4388. VPADDD Y9, Y5, Y5
  4389. VPADDD Y10, Y6, Y6
  4390. VPADDD Y11, Y7, Y7
  4391. VPXOR Y0, Y4, Y4
  4392. VPXOR Y5, Y1, Y1
  4393. VPXOR Y6, Y2, Y2
  4394. VPXOR Y7, Y3, Y3
  4395. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  4396. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  4397. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  4398. VPSHUFB ·rol16<>+0(SB), Y3, Y3
  4399. VPADDD Y4, Y12, Y12
  4400. VPADDD Y1, Y13, Y13
  4401. VPADDD Y2, Y8, Y8
  4402. VPADDD Y3, Y15, Y15
  4403. VPXOR Y12, Y14, Y14
  4404. VPXOR Y13, Y9, Y9
  4405. VPXOR Y8, Y10, Y10
  4406. VPXOR Y15, Y11, Y11
  4407. ADDQ 16(R9), R10
  4408. ADCQ 24(R9), R11
  4409. ADCQ $0x01, R12
  4410. MOVQ (BP), DX
  4411. MOVQ DX, R15
  4412. MULXQ R10, R13, R14
  4413. IMULQ R12, R15
  4414. MULXQ R11, AX, DX
  4415. ADDQ AX, R14
  4416. ADCQ DX, R15
  4417. MOVQ 8(BP), DX
  4418. MULXQ R10, R10, AX
  4419. ADDQ R10, R14
  4420. MULXQ R11, R11, R8
  4421. ADCQ R11, R15
  4422. ADCQ $0x00, R8
  4423. IMULQ R12, DX
  4424. ADDQ AX, R15
  4425. ADCQ DX, R8
  4426. MOVQ R13, R10
  4427. MOVQ R14, R11
  4428. MOVQ R15, R12
  4429. ANDQ $0x03, R12
  4430. MOVQ R15, R13
  4431. ANDQ $-4, R13
  4432. MOVQ R8, R14
  4433. SHRQ $0x02, R8, R15
  4434. SHRQ $0x02, R8
  4435. ADDQ R13, R10
  4436. ADCQ R14, R11
  4437. ADCQ $0x00, R12
  4438. ADDQ R15, R10
  4439. ADCQ R8, R11
  4440. ADCQ $0x00, R12
  4441. LEAQ 32(R9), R9
  4442. VMOVDQA Y15, 224(BP)
  4443. VPSLLD $0x0c, Y14, Y15
  4444. VPSRLD $0x14, Y14, Y14
  4445. VPXOR Y15, Y14, Y14
  4446. VPSLLD $0x0c, Y9, Y15
  4447. VPSRLD $0x14, Y9, Y9
  4448. VPXOR Y15, Y9, Y9
  4449. VPSLLD $0x0c, Y10, Y15
  4450. VPSRLD $0x14, Y10, Y10
  4451. VPXOR Y15, Y10, Y10
  4452. VPSLLD $0x0c, Y11, Y15
  4453. VPSRLD $0x14, Y11, Y11
  4454. VPXOR Y15, Y11, Y11
  4455. VMOVDQA 224(BP), Y15
  4456. VPADDD Y14, Y0, Y0
  4457. VPADDD Y9, Y5, Y5
  4458. VPADDD Y10, Y6, Y6
  4459. VPADDD Y11, Y7, Y7
  4460. VPXOR Y0, Y4, Y4
  4461. VPXOR Y5, Y1, Y1
  4462. VPXOR Y6, Y2, Y2
  4463. VPXOR Y7, Y3, Y3
  4464. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  4465. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  4466. VPSHUFB ·rol8<>+0(SB), Y2, Y2
  4467. VPSHUFB ·rol8<>+0(SB), Y3, Y3
  4468. VPADDD Y4, Y12, Y12
  4469. VPADDD Y1, Y13, Y13
  4470. VPADDD Y2, Y8, Y8
  4471. VPADDD Y3, Y15, Y15
  4472. VPXOR Y12, Y14, Y14
  4473. VPXOR Y13, Y9, Y9
  4474. VPXOR Y8, Y10, Y10
  4475. VPXOR Y15, Y11, Y11
  4476. VMOVDQA Y15, 224(BP)
  4477. VPSLLD $0x07, Y14, Y15
  4478. VPSRLD $0x19, Y14, Y14
  4479. VPXOR Y15, Y14, Y14
  4480. VPSLLD $0x07, Y9, Y15
  4481. VPSRLD $0x19, Y9, Y9
  4482. VPXOR Y15, Y9, Y9
  4483. VPSLLD $0x07, Y10, Y15
  4484. VPSRLD $0x19, Y10, Y10
  4485. VPXOR Y15, Y10, Y10
  4486. VPSLLD $0x07, Y11, Y15
  4487. VPSRLD $0x19, Y11, Y11
  4488. VPXOR Y15, Y11, Y11
  4489. VMOVDQA 224(BP), Y15
  4490. VPALIGNR $0x0c, Y14, Y14, Y14
  4491. VPALIGNR $0x0c, Y9, Y9, Y9
  4492. VPALIGNR $0x0c, Y10, Y10, Y10
  4493. VPALIGNR $0x0c, Y11, Y11, Y11
  4494. VPALIGNR $0x08, Y12, Y12, Y12
  4495. VPALIGNR $0x08, Y13, Y13, Y13
  4496. VPALIGNR $0x08, Y8, Y8, Y8
  4497. VPALIGNR $0x08, Y15, Y15, Y15
  4498. VPALIGNR $0x04, Y4, Y4, Y4
  4499. VPALIGNR $0x04, Y1, Y1, Y1
  4500. VPALIGNR $0x04, Y2, Y2, Y2
  4501. VPALIGNR $0x04, Y3, Y3, Y3
  4502. INCQ CX
  4503. CMPQ CX, $0x04
  4504. JLT openAVX2Tail512LoopB
  4505. CMPQ CX, $0x0a
  4506. JNE openAVX2Tail512LoopA
  4507. MOVQ BX, CX
  4508. SUBQ $0x00000180, CX
  4509. ANDQ $-16, CX
  4510. openAVX2Tail512HashLoop:
  4511. TESTQ CX, CX
  4512. JE openAVX2Tail512HashEnd
  4513. ADDQ (R9), R10
  4514. ADCQ 8(R9), R11
  4515. ADCQ $0x01, R12
  4516. MOVQ (BP), DX
  4517. MOVQ DX, R15
  4518. MULXQ R10, R13, R14
  4519. IMULQ R12, R15
  4520. MULXQ R11, AX, DX
  4521. ADDQ AX, R14
  4522. ADCQ DX, R15
  4523. MOVQ 8(BP), DX
  4524. MULXQ R10, R10, AX
  4525. ADDQ R10, R14
  4526. MULXQ R11, R11, R8
  4527. ADCQ R11, R15
  4528. ADCQ $0x00, R8
  4529. IMULQ R12, DX
  4530. ADDQ AX, R15
  4531. ADCQ DX, R8
  4532. MOVQ R13, R10
  4533. MOVQ R14, R11
  4534. MOVQ R15, R12
  4535. ANDQ $0x03, R12
  4536. MOVQ R15, R13
  4537. ANDQ $-4, R13
  4538. MOVQ R8, R14
  4539. SHRQ $0x02, R8, R15
  4540. SHRQ $0x02, R8
  4541. ADDQ R13, R10
  4542. ADCQ R14, R11
  4543. ADCQ $0x00, R12
  4544. ADDQ R15, R10
  4545. ADCQ R8, R11
  4546. ADCQ $0x00, R12
  4547. LEAQ 16(R9), R9
  4548. SUBQ $0x10, CX
  4549. JMP openAVX2Tail512HashLoop
  4550. openAVX2Tail512HashEnd:
  4551. VPADDD ·chacha20Constants<>+0(SB), Y0, Y0
  4552. VPADDD ·chacha20Constants<>+0(SB), Y5, Y5
  4553. VPADDD ·chacha20Constants<>+0(SB), Y6, Y6
  4554. VPADDD ·chacha20Constants<>+0(SB), Y7, Y7
  4555. VPADDD 32(BP), Y14, Y14
  4556. VPADDD 32(BP), Y9, Y9
  4557. VPADDD 32(BP), Y10, Y10
  4558. VPADDD 32(BP), Y11, Y11
  4559. VPADDD 64(BP), Y12, Y12
  4560. VPADDD 64(BP), Y13, Y13
  4561. VPADDD 64(BP), Y8, Y8
  4562. VPADDD 64(BP), Y15, Y15
  4563. VPADDD 96(BP), Y4, Y4
  4564. VPADDD 128(BP), Y1, Y1
  4565. VPADDD 160(BP), Y2, Y2
  4566. VPADDD 192(BP), Y3, Y3
  4567. VMOVDQA Y15, 224(BP)
  4568. VPERM2I128 $0x02, Y0, Y14, Y15
  4569. VPERM2I128 $0x13, Y0, Y14, Y14
  4570. VPERM2I128 $0x02, Y12, Y4, Y0
  4571. VPERM2I128 $0x13, Y12, Y4, Y12
  4572. VPXOR (SI), Y15, Y15
  4573. VPXOR 32(SI), Y0, Y0
  4574. VPXOR 64(SI), Y14, Y14
  4575. VPXOR 96(SI), Y12, Y12
  4576. VMOVDQU Y15, (DI)
  4577. VMOVDQU Y0, 32(DI)
  4578. VMOVDQU Y14, 64(DI)
  4579. VMOVDQU Y12, 96(DI)
  4580. VPERM2I128 $0x02, Y5, Y9, Y0
  4581. VPERM2I128 $0x02, Y13, Y1, Y14
  4582. VPERM2I128 $0x13, Y5, Y9, Y12
  4583. VPERM2I128 $0x13, Y13, Y1, Y4
  4584. VPXOR 128(SI), Y0, Y0
  4585. VPXOR 160(SI), Y14, Y14
  4586. VPXOR 192(SI), Y12, Y12
  4587. VPXOR 224(SI), Y4, Y4
  4588. VMOVDQU Y0, 128(DI)
  4589. VMOVDQU Y14, 160(DI)
  4590. VMOVDQU Y12, 192(DI)
  4591. VMOVDQU Y4, 224(DI)
  4592. VPERM2I128 $0x02, Y6, Y10, Y0
  4593. VPERM2I128 $0x02, Y8, Y2, Y14
  4594. VPERM2I128 $0x13, Y6, Y10, Y12
  4595. VPERM2I128 $0x13, Y8, Y2, Y4
  4596. VPXOR 256(SI), Y0, Y0
  4597. VPXOR 288(SI), Y14, Y14
  4598. VPXOR 320(SI), Y12, Y12
  4599. VPXOR 352(SI), Y4, Y4
  4600. VMOVDQU Y0, 256(DI)
  4601. VMOVDQU Y14, 288(DI)
  4602. VMOVDQU Y12, 320(DI)
  4603. VMOVDQU Y4, 352(DI)
  4604. VPERM2I128 $0x02, Y7, Y11, Y0
  4605. VPERM2I128 $0x02, 224(BP), Y3, Y14
  4606. VPERM2I128 $0x13, Y7, Y11, Y12
  4607. VPERM2I128 $0x13, 224(BP), Y3, Y4
  4608. LEAQ 384(SI), SI
  4609. LEAQ 384(DI), DI
  4610. SUBQ $0x00000180, BX
  4611. JMP openAVX2TailLoop
  4612. DATA ·chacha20Constants<>+0(SB)/4, $0x61707865
  4613. DATA ·chacha20Constants<>+4(SB)/4, $0x3320646e
  4614. DATA ·chacha20Constants<>+8(SB)/4, $0x79622d32
  4615. DATA ·chacha20Constants<>+12(SB)/4, $0x6b206574
  4616. DATA ·chacha20Constants<>+16(SB)/4, $0x61707865
  4617. DATA ·chacha20Constants<>+20(SB)/4, $0x3320646e
  4618. DATA ·chacha20Constants<>+24(SB)/4, $0x79622d32
  4619. DATA ·chacha20Constants<>+28(SB)/4, $0x6b206574
  4620. GLOBL ·chacha20Constants<>(SB), RODATA|NOPTR, $32
  4621. DATA ·polyClampMask<>+0(SB)/8, $0x0ffffffc0fffffff
  4622. DATA ·polyClampMask<>+8(SB)/8, $0x0ffffffc0ffffffc
  4623. DATA ·polyClampMask<>+16(SB)/8, $0xffffffffffffffff
  4624. DATA ·polyClampMask<>+24(SB)/8, $0xffffffffffffffff
  4625. GLOBL ·polyClampMask<>(SB), RODATA|NOPTR, $32
  4626. DATA ·sseIncMask<>+0(SB)/8, $0x0000000000000001
  4627. DATA ·sseIncMask<>+8(SB)/8, $0x0000000000000000
  4628. GLOBL ·sseIncMask<>(SB), RODATA|NOPTR, $16
  4629. DATA ·andMask<>+0(SB)/8, $0x00000000000000ff
  4630. DATA ·andMask<>+8(SB)/8, $0x0000000000000000
  4631. DATA ·andMask<>+16(SB)/8, $0x000000000000ffff
  4632. DATA ·andMask<>+24(SB)/8, $0x0000000000000000
  4633. DATA ·andMask<>+32(SB)/8, $0x0000000000ffffff
  4634. DATA ·andMask<>+40(SB)/8, $0x0000000000000000
  4635. DATA ·andMask<>+48(SB)/8, $0x00000000ffffffff
  4636. DATA ·andMask<>+56(SB)/8, $0x0000000000000000
  4637. DATA ·andMask<>+64(SB)/8, $0x000000ffffffffff
  4638. DATA ·andMask<>+72(SB)/8, $0x0000000000000000
  4639. DATA ·andMask<>+80(SB)/8, $0x0000ffffffffffff
  4640. DATA ·andMask<>+88(SB)/8, $0x0000000000000000
  4641. DATA ·andMask<>+96(SB)/8, $0x00ffffffffffffff
  4642. DATA ·andMask<>+104(SB)/8, $0x0000000000000000
  4643. DATA ·andMask<>+112(SB)/8, $0xffffffffffffffff
  4644. DATA ·andMask<>+120(SB)/8, $0x0000000000000000
  4645. DATA ·andMask<>+128(SB)/8, $0xffffffffffffffff
  4646. DATA ·andMask<>+136(SB)/8, $0x00000000000000ff
  4647. DATA ·andMask<>+144(SB)/8, $0xffffffffffffffff
  4648. DATA ·andMask<>+152(SB)/8, $0x000000000000ffff
  4649. DATA ·andMask<>+160(SB)/8, $0xffffffffffffffff
  4650. DATA ·andMask<>+168(SB)/8, $0x0000000000ffffff
  4651. DATA ·andMask<>+176(SB)/8, $0xffffffffffffffff
  4652. DATA ·andMask<>+184(SB)/8, $0x00000000ffffffff
  4653. DATA ·andMask<>+192(SB)/8, $0xffffffffffffffff
  4654. DATA ·andMask<>+200(SB)/8, $0x000000ffffffffff
  4655. DATA ·andMask<>+208(SB)/8, $0xffffffffffffffff
  4656. DATA ·andMask<>+216(SB)/8, $0x0000ffffffffffff
  4657. DATA ·andMask<>+224(SB)/8, $0xffffffffffffffff
  4658. DATA ·andMask<>+232(SB)/8, $0x00ffffffffffffff
  4659. GLOBL ·andMask<>(SB), RODATA|NOPTR, $240
  4660. DATA ·avx2InitMask<>+0(SB)/8, $0x0000000000000000
  4661. DATA ·avx2InitMask<>+8(SB)/8, $0x0000000000000000
  4662. DATA ·avx2InitMask<>+16(SB)/8, $0x0000000000000001
  4663. DATA ·avx2InitMask<>+24(SB)/8, $0x0000000000000000
  4664. GLOBL ·avx2InitMask<>(SB), RODATA|NOPTR, $32
  4665. DATA ·rol16<>+0(SB)/8, $0x0504070601000302
  4666. DATA ·rol16<>+8(SB)/8, $0x0d0c0f0e09080b0a
  4667. DATA ·rol16<>+16(SB)/8, $0x0504070601000302
  4668. DATA ·rol16<>+24(SB)/8, $0x0d0c0f0e09080b0a
  4669. GLOBL ·rol16<>(SB), RODATA|NOPTR, $32
  4670. DATA ·rol8<>+0(SB)/8, $0x0605040702010003
  4671. DATA ·rol8<>+8(SB)/8, $0x0e0d0c0f0a09080b
  4672. DATA ·rol8<>+16(SB)/8, $0x0605040702010003
  4673. DATA ·rol8<>+24(SB)/8, $0x0e0d0c0f0a09080b
  4674. GLOBL ·rol8<>(SB), RODATA|NOPTR, $32
  4675. DATA ·avx2IncMask<>+0(SB)/8, $0x0000000000000002
  4676. DATA ·avx2IncMask<>+8(SB)/8, $0x0000000000000000
  4677. DATA ·avx2IncMask<>+16(SB)/8, $0x0000000000000002
  4678. DATA ·avx2IncMask<>+24(SB)/8, $0x0000000000000000
  4679. GLOBL ·avx2IncMask<>(SB), RODATA|NOPTR, $32
  4680. // func chacha20Poly1305Seal(dst []byte, key []uint32, src []byte, ad []byte)
  4681. // Requires: AVX, AVX2, BMI2, CMOV, SSE2
  4682. TEXT ·chacha20Poly1305Seal(SB), $288-96
  4683. MOVQ SP, BP
  4684. ADDQ $0x20, BP
  4685. ANDQ $-32, BP
  4686. MOVQ dst_base+0(FP), DI
  4687. MOVQ key_base+24(FP), R8
  4688. MOVQ src_base+48(FP), SI
  4689. MOVQ src_len+56(FP), BX
  4690. MOVQ ad_base+72(FP), CX
  4691. CMPB ·useAVX2+0(SB), $0x01
  4692. JE chacha20Poly1305Seal_AVX2
  4693. // Special optimization, for very short buffers
  4694. CMPQ BX, $0x80
  4695. JBE sealSSE128
  4696. // In the seal case - prepare the poly key + 3 blocks of stream in the first iteration
  4697. MOVOU ·chacha20Constants<>+0(SB), X0
  4698. MOVOU 16(R8), X3
  4699. MOVOU 32(R8), X6
  4700. MOVOU 48(R8), X9
  4701. // Store state on stack for future use
  4702. MOVO X3, 32(BP)
  4703. MOVO X6, 48(BP)
  4704. // Load state, increment counter blocks
  4705. MOVO X0, X1
  4706. MOVO X3, X4
  4707. MOVO X6, X7
  4708. MOVO X9, X10
  4709. PADDL ·sseIncMask<>+0(SB), X10
  4710. MOVO X1, X2
  4711. MOVO X4, X5
  4712. MOVO X7, X8
  4713. MOVO X10, X11
  4714. PADDL ·sseIncMask<>+0(SB), X11
  4715. MOVO X2, X12
  4716. MOVO X5, X13
  4717. MOVO X8, X14
  4718. MOVO X11, X15
  4719. PADDL ·sseIncMask<>+0(SB), X15
  4720. // Store counters
  4721. MOVO X9, 80(BP)
  4722. MOVO X10, 96(BP)
  4723. MOVO X11, 112(BP)
  4724. MOVO X15, 128(BP)
  4725. MOVQ $0x0000000a, R9
  4726. sealSSEIntroLoop:
  4727. MOVO X14, 64(BP)
  4728. PADDD X3, X0
  4729. PXOR X0, X9
  4730. ROL16(X9, X14)
  4731. PADDD X9, X6
  4732. PXOR X6, X3
  4733. MOVO X3, X14
  4734. PSLLL $0x0c, X14
  4735. PSRLL $0x14, X3
  4736. PXOR X14, X3
  4737. PADDD X3, X0
  4738. PXOR X0, X9
  4739. ROL8(X9, X14)
  4740. PADDD X9, X6
  4741. PXOR X6, X3
  4742. MOVO X3, X14
  4743. PSLLL $0x07, X14
  4744. PSRLL $0x19, X3
  4745. PXOR X14, X3
  4746. PADDD X4, X1
  4747. PXOR X1, X10
  4748. ROL16(X10, X14)
  4749. PADDD X10, X7
  4750. PXOR X7, X4
  4751. MOVO X4, X14
  4752. PSLLL $0x0c, X14
  4753. PSRLL $0x14, X4
  4754. PXOR X14, X4
  4755. PADDD X4, X1
  4756. PXOR X1, X10
  4757. ROL8(X10, X14)
  4758. PADDD X10, X7
  4759. PXOR X7, X4
  4760. MOVO X4, X14
  4761. PSLLL $0x07, X14
  4762. PSRLL $0x19, X4
  4763. PXOR X14, X4
  4764. PADDD X5, X2
  4765. PXOR X2, X11
  4766. ROL16(X11, X14)
  4767. PADDD X11, X8
  4768. PXOR X8, X5
  4769. MOVO X5, X14
  4770. PSLLL $0x0c, X14
  4771. PSRLL $0x14, X5
  4772. PXOR X14, X5
  4773. PADDD X5, X2
  4774. PXOR X2, X11
  4775. ROL8(X11, X14)
  4776. PADDD X11, X8
  4777. PXOR X8, X5
  4778. MOVO X5, X14
  4779. PSLLL $0x07, X14
  4780. PSRLL $0x19, X5
  4781. PXOR X14, X5
  4782. MOVO 64(BP), X14
  4783. MOVO X7, 64(BP)
  4784. PADDD X13, X12
  4785. PXOR X12, X15
  4786. ROL16(X15, X7)
  4787. PADDD X15, X14
  4788. PXOR X14, X13
  4789. MOVO X13, X7
  4790. PSLLL $0x0c, X7
  4791. PSRLL $0x14, X13
  4792. PXOR X7, X13
  4793. PADDD X13, X12
  4794. PXOR X12, X15
  4795. ROL8(X15, X7)
  4796. PADDD X15, X14
  4797. PXOR X14, X13
  4798. MOVO X13, X7
  4799. PSLLL $0x07, X7
  4800. PSRLL $0x19, X13
  4801. PXOR X7, X13
  4802. MOVO 64(BP), X7
  4803. BYTE $0x66
  4804. BYTE $0x0f
  4805. BYTE $0x3a
  4806. BYTE $0x0f
  4807. BYTE $0xdb
  4808. BYTE $0x04
  4809. BYTE $0x66
  4810. BYTE $0x0f
  4811. BYTE $0x3a
  4812. BYTE $0x0f
  4813. BYTE $0xe4
  4814. BYTE $0x04
  4815. BYTE $0x66
  4816. BYTE $0x0f
  4817. BYTE $0x3a
  4818. BYTE $0x0f
  4819. BYTE $0xed
  4820. BYTE $0x04
  4821. BYTE $0x66
  4822. BYTE $0x45
  4823. BYTE $0x0f
  4824. BYTE $0x3a
  4825. BYTE $0x0f
  4826. BYTE $0xed
  4827. BYTE $0x04
  4828. BYTE $0x66
  4829. BYTE $0x0f
  4830. BYTE $0x3a
  4831. BYTE $0x0f
  4832. BYTE $0xf6
  4833. BYTE $0x08
  4834. BYTE $0x66
  4835. BYTE $0x0f
  4836. BYTE $0x3a
  4837. BYTE $0x0f
  4838. BYTE $0xff
  4839. BYTE $0x08
  4840. BYTE $0x66
  4841. BYTE $0x45
  4842. BYTE $0x0f
  4843. BYTE $0x3a
  4844. BYTE $0x0f
  4845. BYTE $0xc0
  4846. BYTE $0x08
  4847. BYTE $0x66
  4848. BYTE $0x45
  4849. BYTE $0x0f
  4850. BYTE $0x3a
  4851. BYTE $0x0f
  4852. BYTE $0xf6
  4853. BYTE $0x08
  4854. BYTE $0x66
  4855. BYTE $0x45
  4856. BYTE $0x0f
  4857. BYTE $0x3a
  4858. BYTE $0x0f
  4859. BYTE $0xc9
  4860. BYTE $0x0c
  4861. BYTE $0x66
  4862. BYTE $0x45
  4863. BYTE $0x0f
  4864. BYTE $0x3a
  4865. BYTE $0x0f
  4866. BYTE $0xd2
  4867. BYTE $0x0c
  4868. BYTE $0x66
  4869. BYTE $0x45
  4870. BYTE $0x0f
  4871. BYTE $0x3a
  4872. BYTE $0x0f
  4873. BYTE $0xdb
  4874. BYTE $0x0c
  4875. BYTE $0x66
  4876. BYTE $0x45
  4877. BYTE $0x0f
  4878. BYTE $0x3a
  4879. BYTE $0x0f
  4880. BYTE $0xff
  4881. BYTE $0x0c
  4882. MOVO X14, 64(BP)
  4883. PADDD X3, X0
  4884. PXOR X0, X9
  4885. ROL16(X9, X14)
  4886. PADDD X9, X6
  4887. PXOR X6, X3
  4888. MOVO X3, X14
  4889. PSLLL $0x0c, X14
  4890. PSRLL $0x14, X3
  4891. PXOR X14, X3
  4892. PADDD X3, X0
  4893. PXOR X0, X9
  4894. ROL8(X9, X14)
  4895. PADDD X9, X6
  4896. PXOR X6, X3
  4897. MOVO X3, X14
  4898. PSLLL $0x07, X14
  4899. PSRLL $0x19, X3
  4900. PXOR X14, X3
  4901. PADDD X4, X1
  4902. PXOR X1, X10
  4903. ROL16(X10, X14)
  4904. PADDD X10, X7
  4905. PXOR X7, X4
  4906. MOVO X4, X14
  4907. PSLLL $0x0c, X14
  4908. PSRLL $0x14, X4
  4909. PXOR X14, X4
  4910. PADDD X4, X1
  4911. PXOR X1, X10
  4912. ROL8(X10, X14)
  4913. PADDD X10, X7
  4914. PXOR X7, X4
  4915. MOVO X4, X14
  4916. PSLLL $0x07, X14
  4917. PSRLL $0x19, X4
  4918. PXOR X14, X4
  4919. PADDD X5, X2
  4920. PXOR X2, X11
  4921. ROL16(X11, X14)
  4922. PADDD X11, X8
  4923. PXOR X8, X5
  4924. MOVO X5, X14
  4925. PSLLL $0x0c, X14
  4926. PSRLL $0x14, X5
  4927. PXOR X14, X5
  4928. PADDD X5, X2
  4929. PXOR X2, X11
  4930. ROL8(X11, X14)
  4931. PADDD X11, X8
  4932. PXOR X8, X5
  4933. MOVO X5, X14
  4934. PSLLL $0x07, X14
  4935. PSRLL $0x19, X5
  4936. PXOR X14, X5
  4937. MOVO 64(BP), X14
  4938. MOVO X7, 64(BP)
  4939. PADDD X13, X12
  4940. PXOR X12, X15
  4941. ROL16(X15, X7)
  4942. PADDD X15, X14
  4943. PXOR X14, X13
  4944. MOVO X13, X7
  4945. PSLLL $0x0c, X7
  4946. PSRLL $0x14, X13
  4947. PXOR X7, X13
  4948. PADDD X13, X12
  4949. PXOR X12, X15
  4950. ROL8(X15, X7)
  4951. PADDD X15, X14
  4952. PXOR X14, X13
  4953. MOVO X13, X7
  4954. PSLLL $0x07, X7
  4955. PSRLL $0x19, X13
  4956. PXOR X7, X13
  4957. MOVO 64(BP), X7
  4958. BYTE $0x66
  4959. BYTE $0x0f
  4960. BYTE $0x3a
  4961. BYTE $0x0f
  4962. BYTE $0xdb
  4963. BYTE $0x0c
  4964. BYTE $0x66
  4965. BYTE $0x0f
  4966. BYTE $0x3a
  4967. BYTE $0x0f
  4968. BYTE $0xe4
  4969. BYTE $0x0c
  4970. BYTE $0x66
  4971. BYTE $0x0f
  4972. BYTE $0x3a
  4973. BYTE $0x0f
  4974. BYTE $0xed
  4975. BYTE $0x0c
  4976. BYTE $0x66
  4977. BYTE $0x45
  4978. BYTE $0x0f
  4979. BYTE $0x3a
  4980. BYTE $0x0f
  4981. BYTE $0xed
  4982. BYTE $0x0c
  4983. BYTE $0x66
  4984. BYTE $0x0f
  4985. BYTE $0x3a
  4986. BYTE $0x0f
  4987. BYTE $0xf6
  4988. BYTE $0x08
  4989. BYTE $0x66
  4990. BYTE $0x0f
  4991. BYTE $0x3a
  4992. BYTE $0x0f
  4993. BYTE $0xff
  4994. BYTE $0x08
  4995. BYTE $0x66
  4996. BYTE $0x45
  4997. BYTE $0x0f
  4998. BYTE $0x3a
  4999. BYTE $0x0f
  5000. BYTE $0xc0
  5001. BYTE $0x08
  5002. BYTE $0x66
  5003. BYTE $0x45
  5004. BYTE $0x0f
  5005. BYTE $0x3a
  5006. BYTE $0x0f
  5007. BYTE $0xf6
  5008. BYTE $0x08
  5009. BYTE $0x66
  5010. BYTE $0x45
  5011. BYTE $0x0f
  5012. BYTE $0x3a
  5013. BYTE $0x0f
  5014. BYTE $0xc9
  5015. BYTE $0x04
  5016. BYTE $0x66
  5017. BYTE $0x45
  5018. BYTE $0x0f
  5019. BYTE $0x3a
  5020. BYTE $0x0f
  5021. BYTE $0xd2
  5022. BYTE $0x04
  5023. BYTE $0x66
  5024. BYTE $0x45
  5025. BYTE $0x0f
  5026. BYTE $0x3a
  5027. BYTE $0x0f
  5028. BYTE $0xdb
  5029. BYTE $0x04
  5030. BYTE $0x66
  5031. BYTE $0x45
  5032. BYTE $0x0f
  5033. BYTE $0x3a
  5034. BYTE $0x0f
  5035. BYTE $0xff
  5036. BYTE $0x04
  5037. DECQ R9
  5038. JNE sealSSEIntroLoop
  5039. // Add in the state
  5040. PADDD ·chacha20Constants<>+0(SB), X0
  5041. PADDD ·chacha20Constants<>+0(SB), X1
  5042. PADDD ·chacha20Constants<>+0(SB), X2
  5043. PADDD ·chacha20Constants<>+0(SB), X12
  5044. PADDD 32(BP), X3
  5045. PADDD 32(BP), X4
  5046. PADDD 32(BP), X5
  5047. PADDD 32(BP), X13
  5048. PADDD 48(BP), X7
  5049. PADDD 48(BP), X8
  5050. PADDD 48(BP), X14
  5051. PADDD 96(BP), X10
  5052. PADDD 112(BP), X11
  5053. PADDD 128(BP), X15
  5054. // Clamp and store the key
  5055. PAND ·polyClampMask<>+0(SB), X0
  5056. MOVO X0, (BP)
  5057. MOVO X3, 16(BP)
  5058. // Hash AAD
  5059. MOVQ ad_len+80(FP), R9
  5060. CALL polyHashADInternal<>(SB)
  5061. MOVOU (SI), X0
  5062. MOVOU 16(SI), X3
  5063. MOVOU 32(SI), X6
  5064. MOVOU 48(SI), X9
  5065. PXOR X0, X1
  5066. PXOR X3, X4
  5067. PXOR X6, X7
  5068. PXOR X9, X10
  5069. MOVOU X1, (DI)
  5070. MOVOU X4, 16(DI)
  5071. MOVOU X7, 32(DI)
  5072. MOVOU X10, 48(DI)
  5073. MOVOU 64(SI), X0
  5074. MOVOU 80(SI), X3
  5075. MOVOU 96(SI), X6
  5076. MOVOU 112(SI), X9
  5077. PXOR X0, X2
  5078. PXOR X3, X5
  5079. PXOR X6, X8
  5080. PXOR X9, X11
  5081. MOVOU X2, 64(DI)
  5082. MOVOU X5, 80(DI)
  5083. MOVOU X8, 96(DI)
  5084. MOVOU X11, 112(DI)
  5085. MOVQ $0x00000080, CX
  5086. SUBQ $0x80, BX
  5087. LEAQ 128(SI), SI
  5088. MOVO X12, X1
  5089. MOVO X13, X4
  5090. MOVO X14, X7
  5091. MOVO X15, X10
  5092. CMPQ BX, $0x40
  5093. JBE sealSSE128SealHash
  5094. MOVOU (SI), X0
  5095. MOVOU 16(SI), X3
  5096. MOVOU 32(SI), X6
  5097. MOVOU 48(SI), X9
  5098. PXOR X0, X12
  5099. PXOR X3, X13
  5100. PXOR X6, X14
  5101. PXOR X9, X15
  5102. MOVOU X12, 128(DI)
  5103. MOVOU X13, 144(DI)
  5104. MOVOU X14, 160(DI)
  5105. MOVOU X15, 176(DI)
  5106. ADDQ $0x40, CX
  5107. SUBQ $0x40, BX
  5108. LEAQ 64(SI), SI
  5109. MOVQ $0x00000002, CX
  5110. MOVQ $0x00000008, R9
  5111. CMPQ BX, $0x40
  5112. JBE sealSSETail64
  5113. CMPQ BX, $0x80
  5114. JBE sealSSETail128
  5115. CMPQ BX, $0xc0
  5116. JBE sealSSETail192
  5117. sealSSEMainLoop:
  5118. // Load state, increment counter blocks
  5119. MOVO ·chacha20Constants<>+0(SB), X0
  5120. MOVO 32(BP), X3
  5121. MOVO 48(BP), X6
  5122. MOVO 128(BP), X9
  5123. PADDL ·sseIncMask<>+0(SB), X9
  5124. MOVO X0, X1
  5125. MOVO X3, X4
  5126. MOVO X6, X7
  5127. MOVO X9, X10
  5128. PADDL ·sseIncMask<>+0(SB), X10
  5129. MOVO X1, X2
  5130. MOVO X4, X5
  5131. MOVO X7, X8
  5132. MOVO X10, X11
  5133. PADDL ·sseIncMask<>+0(SB), X11
  5134. MOVO X2, X12
  5135. MOVO X5, X13
  5136. MOVO X8, X14
  5137. MOVO X11, X15
  5138. PADDL ·sseIncMask<>+0(SB), X15
  5139. // Store counters
  5140. MOVO X9, 80(BP)
  5141. MOVO X10, 96(BP)
  5142. MOVO X11, 112(BP)
  5143. MOVO X15, 128(BP)
  5144. sealSSEInnerLoop:
  5145. MOVO X14, 64(BP)
  5146. PADDD X3, X0
  5147. PXOR X0, X9
  5148. ROL16(X9, X14)
  5149. PADDD X9, X6
  5150. PXOR X6, X3
  5151. MOVO X3, X14
  5152. PSLLL $0x0c, X14
  5153. PSRLL $0x14, X3
  5154. PXOR X14, X3
  5155. PADDD X3, X0
  5156. PXOR X0, X9
  5157. ROL8(X9, X14)
  5158. PADDD X9, X6
  5159. PXOR X6, X3
  5160. MOVO X3, X14
  5161. PSLLL $0x07, X14
  5162. PSRLL $0x19, X3
  5163. PXOR X14, X3
  5164. PADDD X4, X1
  5165. PXOR X1, X10
  5166. ROL16(X10, X14)
  5167. PADDD X10, X7
  5168. PXOR X7, X4
  5169. MOVO X4, X14
  5170. PSLLL $0x0c, X14
  5171. PSRLL $0x14, X4
  5172. PXOR X14, X4
  5173. PADDD X4, X1
  5174. PXOR X1, X10
  5175. ROL8(X10, X14)
  5176. PADDD X10, X7
  5177. PXOR X7, X4
  5178. MOVO X4, X14
  5179. PSLLL $0x07, X14
  5180. PSRLL $0x19, X4
  5181. PXOR X14, X4
  5182. PADDD X5, X2
  5183. PXOR X2, X11
  5184. ROL16(X11, X14)
  5185. PADDD X11, X8
  5186. PXOR X8, X5
  5187. MOVO X5, X14
  5188. PSLLL $0x0c, X14
  5189. PSRLL $0x14, X5
  5190. PXOR X14, X5
  5191. PADDD X5, X2
  5192. PXOR X2, X11
  5193. ROL8(X11, X14)
  5194. PADDD X11, X8
  5195. PXOR X8, X5
  5196. MOVO X5, X14
  5197. PSLLL $0x07, X14
  5198. PSRLL $0x19, X5
  5199. PXOR X14, X5
  5200. MOVO 64(BP), X14
  5201. MOVO X7, 64(BP)
  5202. PADDD X13, X12
  5203. PXOR X12, X15
  5204. ROL16(X15, X7)
  5205. PADDD X15, X14
  5206. PXOR X14, X13
  5207. MOVO X13, X7
  5208. PSLLL $0x0c, X7
  5209. PSRLL $0x14, X13
  5210. PXOR X7, X13
  5211. PADDD X13, X12
  5212. PXOR X12, X15
  5213. ROL8(X15, X7)
  5214. PADDD X15, X14
  5215. PXOR X14, X13
  5216. MOVO X13, X7
  5217. PSLLL $0x07, X7
  5218. PSRLL $0x19, X13
  5219. PXOR X7, X13
  5220. MOVO 64(BP), X7
  5221. ADDQ (DI), R10
  5222. ADCQ 8(DI), R11
  5223. ADCQ $0x01, R12
  5224. BYTE $0x66
  5225. BYTE $0x0f
  5226. BYTE $0x3a
  5227. BYTE $0x0f
  5228. BYTE $0xdb
  5229. BYTE $0x04
  5230. BYTE $0x66
  5231. BYTE $0x0f
  5232. BYTE $0x3a
  5233. BYTE $0x0f
  5234. BYTE $0xe4
  5235. BYTE $0x04
  5236. BYTE $0x66
  5237. BYTE $0x0f
  5238. BYTE $0x3a
  5239. BYTE $0x0f
  5240. BYTE $0xed
  5241. BYTE $0x04
  5242. BYTE $0x66
  5243. BYTE $0x45
  5244. BYTE $0x0f
  5245. BYTE $0x3a
  5246. BYTE $0x0f
  5247. BYTE $0xed
  5248. BYTE $0x04
  5249. BYTE $0x66
  5250. BYTE $0x0f
  5251. BYTE $0x3a
  5252. BYTE $0x0f
  5253. BYTE $0xf6
  5254. BYTE $0x08
  5255. BYTE $0x66
  5256. BYTE $0x0f
  5257. BYTE $0x3a
  5258. BYTE $0x0f
  5259. BYTE $0xff
  5260. BYTE $0x08
  5261. BYTE $0x66
  5262. BYTE $0x45
  5263. BYTE $0x0f
  5264. BYTE $0x3a
  5265. BYTE $0x0f
  5266. BYTE $0xc0
  5267. BYTE $0x08
  5268. BYTE $0x66
  5269. BYTE $0x45
  5270. BYTE $0x0f
  5271. BYTE $0x3a
  5272. BYTE $0x0f
  5273. BYTE $0xf6
  5274. BYTE $0x08
  5275. BYTE $0x66
  5276. BYTE $0x45
  5277. BYTE $0x0f
  5278. BYTE $0x3a
  5279. BYTE $0x0f
  5280. BYTE $0xc9
  5281. BYTE $0x0c
  5282. BYTE $0x66
  5283. BYTE $0x45
  5284. BYTE $0x0f
  5285. BYTE $0x3a
  5286. BYTE $0x0f
  5287. BYTE $0xd2
  5288. BYTE $0x0c
  5289. BYTE $0x66
  5290. BYTE $0x45
  5291. BYTE $0x0f
  5292. BYTE $0x3a
  5293. BYTE $0x0f
  5294. BYTE $0xdb
  5295. BYTE $0x0c
  5296. BYTE $0x66
  5297. BYTE $0x45
  5298. BYTE $0x0f
  5299. BYTE $0x3a
  5300. BYTE $0x0f
  5301. BYTE $0xff
  5302. BYTE $0x0c
  5303. MOVQ (BP), AX
  5304. MOVQ AX, R15
  5305. MULQ R10
  5306. MOVQ AX, R13
  5307. MOVQ DX, R14
  5308. MOVQ (BP), AX
  5309. MULQ R11
  5310. IMULQ R12, R15
  5311. ADDQ AX, R14
  5312. ADCQ DX, R15
  5313. MOVQ 8(BP), AX
  5314. MOVQ AX, R8
  5315. MULQ R10
  5316. ADDQ AX, R14
  5317. ADCQ $0x00, DX
  5318. MOVQ DX, R10
  5319. MOVQ 8(BP), AX
  5320. MULQ R11
  5321. ADDQ AX, R15
  5322. ADCQ $0x00, DX
  5323. LEAQ 16(DI), DI
  5324. MOVO X14, 64(BP)
  5325. PADDD X3, X0
  5326. PXOR X0, X9
  5327. ROL16(X9, X14)
  5328. PADDD X9, X6
  5329. PXOR X6, X3
  5330. MOVO X3, X14
  5331. PSLLL $0x0c, X14
  5332. PSRLL $0x14, X3
  5333. PXOR X14, X3
  5334. PADDD X3, X0
  5335. PXOR X0, X9
  5336. ROL8(X9, X14)
  5337. PADDD X9, X6
  5338. PXOR X6, X3
  5339. MOVO X3, X14
  5340. PSLLL $0x07, X14
  5341. PSRLL $0x19, X3
  5342. PXOR X14, X3
  5343. PADDD X4, X1
  5344. PXOR X1, X10
  5345. ROL16(X10, X14)
  5346. PADDD X10, X7
  5347. PXOR X7, X4
  5348. MOVO X4, X14
  5349. PSLLL $0x0c, X14
  5350. PSRLL $0x14, X4
  5351. PXOR X14, X4
  5352. PADDD X4, X1
  5353. PXOR X1, X10
  5354. ROL8(X10, X14)
  5355. PADDD X10, X7
  5356. PXOR X7, X4
  5357. MOVO X4, X14
  5358. PSLLL $0x07, X14
  5359. PSRLL $0x19, X4
  5360. PXOR X14, X4
  5361. PADDD X5, X2
  5362. PXOR X2, X11
  5363. ROL16(X11, X14)
  5364. PADDD X11, X8
  5365. PXOR X8, X5
  5366. MOVO X5, X14
  5367. PSLLL $0x0c, X14
  5368. PSRLL $0x14, X5
  5369. PXOR X14, X5
  5370. PADDD X5, X2
  5371. PXOR X2, X11
  5372. ROL8(X11, X14)
  5373. PADDD X11, X8
  5374. PXOR X8, X5
  5375. MOVO X5, X14
  5376. PSLLL $0x07, X14
  5377. PSRLL $0x19, X5
  5378. PXOR X14, X5
  5379. MOVO 64(BP), X14
  5380. MOVO X7, 64(BP)
  5381. IMULQ R12, R8
  5382. ADDQ R10, R15
  5383. ADCQ DX, R8
  5384. PADDD X13, X12
  5385. PXOR X12, X15
  5386. ROL16(X15, X7)
  5387. PADDD X15, X14
  5388. PXOR X14, X13
  5389. MOVO X13, X7
  5390. PSLLL $0x0c, X7
  5391. PSRLL $0x14, X13
  5392. PXOR X7, X13
  5393. PADDD X13, X12
  5394. PXOR X12, X15
  5395. ROL8(X15, X7)
  5396. PADDD X15, X14
  5397. PXOR X14, X13
  5398. MOVO X13, X7
  5399. PSLLL $0x07, X7
  5400. PSRLL $0x19, X13
  5401. PXOR X7, X13
  5402. MOVO 64(BP), X7
  5403. MOVQ R13, R10
  5404. MOVQ R14, R11
  5405. MOVQ R15, R12
  5406. ANDQ $0x03, R12
  5407. MOVQ R15, R13
  5408. ANDQ $-4, R13
  5409. MOVQ R8, R14
  5410. SHRQ $0x02, R8, R15
  5411. SHRQ $0x02, R8
  5412. ADDQ R13, R10
  5413. ADCQ R14, R11
  5414. ADCQ $0x00, R12
  5415. ADDQ R15, R10
  5416. ADCQ R8, R11
  5417. ADCQ $0x00, R12
  5418. BYTE $0x66
  5419. BYTE $0x0f
  5420. BYTE $0x3a
  5421. BYTE $0x0f
  5422. BYTE $0xdb
  5423. BYTE $0x0c
  5424. BYTE $0x66
  5425. BYTE $0x0f
  5426. BYTE $0x3a
  5427. BYTE $0x0f
  5428. BYTE $0xe4
  5429. BYTE $0x0c
  5430. BYTE $0x66
  5431. BYTE $0x0f
  5432. BYTE $0x3a
  5433. BYTE $0x0f
  5434. BYTE $0xed
  5435. BYTE $0x0c
  5436. BYTE $0x66
  5437. BYTE $0x45
  5438. BYTE $0x0f
  5439. BYTE $0x3a
  5440. BYTE $0x0f
  5441. BYTE $0xed
  5442. BYTE $0x0c
  5443. BYTE $0x66
  5444. BYTE $0x0f
  5445. BYTE $0x3a
  5446. BYTE $0x0f
  5447. BYTE $0xf6
  5448. BYTE $0x08
  5449. BYTE $0x66
  5450. BYTE $0x0f
  5451. BYTE $0x3a
  5452. BYTE $0x0f
  5453. BYTE $0xff
  5454. BYTE $0x08
  5455. BYTE $0x66
  5456. BYTE $0x45
  5457. BYTE $0x0f
  5458. BYTE $0x3a
  5459. BYTE $0x0f
  5460. BYTE $0xc0
  5461. BYTE $0x08
  5462. BYTE $0x66
  5463. BYTE $0x45
  5464. BYTE $0x0f
  5465. BYTE $0x3a
  5466. BYTE $0x0f
  5467. BYTE $0xf6
  5468. BYTE $0x08
  5469. BYTE $0x66
  5470. BYTE $0x45
  5471. BYTE $0x0f
  5472. BYTE $0x3a
  5473. BYTE $0x0f
  5474. BYTE $0xc9
  5475. BYTE $0x04
  5476. BYTE $0x66
  5477. BYTE $0x45
  5478. BYTE $0x0f
  5479. BYTE $0x3a
  5480. BYTE $0x0f
  5481. BYTE $0xd2
  5482. BYTE $0x04
  5483. BYTE $0x66
  5484. BYTE $0x45
  5485. BYTE $0x0f
  5486. BYTE $0x3a
  5487. BYTE $0x0f
  5488. BYTE $0xdb
  5489. BYTE $0x04
  5490. BYTE $0x66
  5491. BYTE $0x45
  5492. BYTE $0x0f
  5493. BYTE $0x3a
  5494. BYTE $0x0f
  5495. BYTE $0xff
  5496. BYTE $0x04
  5497. DECQ R9
  5498. JGE sealSSEInnerLoop
  5499. ADDQ (DI), R10
  5500. ADCQ 8(DI), R11
  5501. ADCQ $0x01, R12
  5502. MOVQ (BP), AX
  5503. MOVQ AX, R15
  5504. MULQ R10
  5505. MOVQ AX, R13
  5506. MOVQ DX, R14
  5507. MOVQ (BP), AX
  5508. MULQ R11
  5509. IMULQ R12, R15
  5510. ADDQ AX, R14
  5511. ADCQ DX, R15
  5512. MOVQ 8(BP), AX
  5513. MOVQ AX, R8
  5514. MULQ R10
  5515. ADDQ AX, R14
  5516. ADCQ $0x00, DX
  5517. MOVQ DX, R10
  5518. MOVQ 8(BP), AX
  5519. MULQ R11
  5520. ADDQ AX, R15
  5521. ADCQ $0x00, DX
  5522. IMULQ R12, R8
  5523. ADDQ R10, R15
  5524. ADCQ DX, R8
  5525. MOVQ R13, R10
  5526. MOVQ R14, R11
  5527. MOVQ R15, R12
  5528. ANDQ $0x03, R12
  5529. MOVQ R15, R13
  5530. ANDQ $-4, R13
  5531. MOVQ R8, R14
  5532. SHRQ $0x02, R8, R15
  5533. SHRQ $0x02, R8
  5534. ADDQ R13, R10
  5535. ADCQ R14, R11
  5536. ADCQ $0x00, R12
  5537. ADDQ R15, R10
  5538. ADCQ R8, R11
  5539. ADCQ $0x00, R12
  5540. LEAQ 16(DI), DI
  5541. DECQ CX
  5542. JG sealSSEInnerLoop
  5543. // Add in the state
  5544. PADDD ·chacha20Constants<>+0(SB), X0
  5545. PADDD ·chacha20Constants<>+0(SB), X1
  5546. PADDD ·chacha20Constants<>+0(SB), X2
  5547. PADDD ·chacha20Constants<>+0(SB), X12
  5548. PADDD 32(BP), X3
  5549. PADDD 32(BP), X4
  5550. PADDD 32(BP), X5
  5551. PADDD 32(BP), X13
  5552. PADDD 48(BP), X6
  5553. PADDD 48(BP), X7
  5554. PADDD 48(BP), X8
  5555. PADDD 48(BP), X14
  5556. PADDD 80(BP), X9
  5557. PADDD 96(BP), X10
  5558. PADDD 112(BP), X11
  5559. PADDD 128(BP), X15
  5560. MOVO X15, 64(BP)
  5561. // Load - xor - store
  5562. MOVOU (SI), X15
  5563. PXOR X15, X0
  5564. MOVOU 16(SI), X15
  5565. PXOR X15, X3
  5566. MOVOU 32(SI), X15
  5567. PXOR X15, X6
  5568. MOVOU 48(SI), X15
  5569. PXOR X15, X9
  5570. MOVOU X0, (DI)
  5571. MOVOU X3, 16(DI)
  5572. MOVOU X6, 32(DI)
  5573. MOVOU X9, 48(DI)
  5574. MOVO 64(BP), X15
  5575. MOVOU 64(SI), X0
  5576. MOVOU 80(SI), X3
  5577. MOVOU 96(SI), X6
  5578. MOVOU 112(SI), X9
  5579. PXOR X0, X1
  5580. PXOR X3, X4
  5581. PXOR X6, X7
  5582. PXOR X9, X10
  5583. MOVOU X1, 64(DI)
  5584. MOVOU X4, 80(DI)
  5585. MOVOU X7, 96(DI)
  5586. MOVOU X10, 112(DI)
  5587. MOVOU 128(SI), X0
  5588. MOVOU 144(SI), X3
  5589. MOVOU 160(SI), X6
  5590. MOVOU 176(SI), X9
  5591. PXOR X0, X2
  5592. PXOR X3, X5
  5593. PXOR X6, X8
  5594. PXOR X9, X11
  5595. MOVOU X2, 128(DI)
  5596. MOVOU X5, 144(DI)
  5597. MOVOU X8, 160(DI)
  5598. MOVOU X11, 176(DI)
  5599. ADDQ $0xc0, SI
  5600. MOVQ $0x000000c0, CX
  5601. SUBQ $0xc0, BX
  5602. MOVO X12, X1
  5603. MOVO X13, X4
  5604. MOVO X14, X7
  5605. MOVO X15, X10
  5606. CMPQ BX, $0x40
  5607. JBE sealSSE128SealHash
  5608. MOVOU (SI), X0
  5609. MOVOU 16(SI), X3
  5610. MOVOU 32(SI), X6
  5611. MOVOU 48(SI), X9
  5612. PXOR X0, X12
  5613. PXOR X3, X13
  5614. PXOR X6, X14
  5615. PXOR X9, X15
  5616. MOVOU X12, 192(DI)
  5617. MOVOU X13, 208(DI)
  5618. MOVOU X14, 224(DI)
  5619. MOVOU X15, 240(DI)
  5620. LEAQ 64(SI), SI
  5621. SUBQ $0x40, BX
  5622. MOVQ $0x00000006, CX
  5623. MOVQ $0x00000004, R9
  5624. CMPQ BX, $0xc0
  5625. JG sealSSEMainLoop
  5626. MOVQ BX, CX
  5627. TESTQ BX, BX
  5628. JE sealSSE128SealHash
  5629. MOVQ $0x00000006, CX
  5630. CMPQ BX, $0x40
  5631. JBE sealSSETail64
  5632. CMPQ BX, $0x80
  5633. JBE sealSSETail128
  5634. JMP sealSSETail192
  5635. sealSSETail64:
  5636. MOVO ·chacha20Constants<>+0(SB), X1
  5637. MOVO 32(BP), X4
  5638. MOVO 48(BP), X7
  5639. MOVO 128(BP), X10
  5640. PADDL ·sseIncMask<>+0(SB), X10
  5641. MOVO X10, 80(BP)
  5642. sealSSETail64LoopA:
  5643. ADDQ (DI), R10
  5644. ADCQ 8(DI), R11
  5645. ADCQ $0x01, R12
  5646. MOVQ (BP), AX
  5647. MOVQ AX, R15
  5648. MULQ R10
  5649. MOVQ AX, R13
  5650. MOVQ DX, R14
  5651. MOVQ (BP), AX
  5652. MULQ R11
  5653. IMULQ R12, R15
  5654. ADDQ AX, R14
  5655. ADCQ DX, R15
  5656. MOVQ 8(BP), AX
  5657. MOVQ AX, R8
  5658. MULQ R10
  5659. ADDQ AX, R14
  5660. ADCQ $0x00, DX
  5661. MOVQ DX, R10
  5662. MOVQ 8(BP), AX
  5663. MULQ R11
  5664. ADDQ AX, R15
  5665. ADCQ $0x00, DX
  5666. IMULQ R12, R8
  5667. ADDQ R10, R15
  5668. ADCQ DX, R8
  5669. MOVQ R13, R10
  5670. MOVQ R14, R11
  5671. MOVQ R15, R12
  5672. ANDQ $0x03, R12
  5673. MOVQ R15, R13
  5674. ANDQ $-4, R13
  5675. MOVQ R8, R14
  5676. SHRQ $0x02, R8, R15
  5677. SHRQ $0x02, R8
  5678. ADDQ R13, R10
  5679. ADCQ R14, R11
  5680. ADCQ $0x00, R12
  5681. ADDQ R15, R10
  5682. ADCQ R8, R11
  5683. ADCQ $0x00, R12
  5684. LEAQ 16(DI), DI
  5685. sealSSETail64LoopB:
  5686. PADDD X4, X1
  5687. PXOR X1, X10
  5688. ROL16(X10, X13)
  5689. PADDD X10, X7
  5690. PXOR X7, X4
  5691. MOVO X4, X13
  5692. PSLLL $0x0c, X13
  5693. PSRLL $0x14, X4
  5694. PXOR X13, X4
  5695. PADDD X4, X1
  5696. PXOR X1, X10
  5697. ROL8(X10, X13)
  5698. PADDD X10, X7
  5699. PXOR X7, X4
  5700. MOVO X4, X13
  5701. PSLLL $0x07, X13
  5702. PSRLL $0x19, X4
  5703. PXOR X13, X4
  5704. BYTE $0x66
  5705. BYTE $0x0f
  5706. BYTE $0x3a
  5707. BYTE $0x0f
  5708. BYTE $0xe4
  5709. BYTE $0x04
  5710. BYTE $0x66
  5711. BYTE $0x0f
  5712. BYTE $0x3a
  5713. BYTE $0x0f
  5714. BYTE $0xff
  5715. BYTE $0x08
  5716. BYTE $0x66
  5717. BYTE $0x45
  5718. BYTE $0x0f
  5719. BYTE $0x3a
  5720. BYTE $0x0f
  5721. BYTE $0xd2
  5722. BYTE $0x0c
  5723. PADDD X4, X1
  5724. PXOR X1, X10
  5725. ROL16(X10, X13)
  5726. PADDD X10, X7
  5727. PXOR X7, X4
  5728. MOVO X4, X13
  5729. PSLLL $0x0c, X13
  5730. PSRLL $0x14, X4
  5731. PXOR X13, X4
  5732. PADDD X4, X1
  5733. PXOR X1, X10
  5734. ROL8(X10, X13)
  5735. PADDD X10, X7
  5736. PXOR X7, X4
  5737. MOVO X4, X13
  5738. PSLLL $0x07, X13
  5739. PSRLL $0x19, X4
  5740. PXOR X13, X4
  5741. BYTE $0x66
  5742. BYTE $0x0f
  5743. BYTE $0x3a
  5744. BYTE $0x0f
  5745. BYTE $0xe4
  5746. BYTE $0x0c
  5747. BYTE $0x66
  5748. BYTE $0x0f
  5749. BYTE $0x3a
  5750. BYTE $0x0f
  5751. BYTE $0xff
  5752. BYTE $0x08
  5753. BYTE $0x66
  5754. BYTE $0x45
  5755. BYTE $0x0f
  5756. BYTE $0x3a
  5757. BYTE $0x0f
  5758. BYTE $0xd2
  5759. BYTE $0x04
  5760. ADDQ (DI), R10
  5761. ADCQ 8(DI), R11
  5762. ADCQ $0x01, R12
  5763. MOVQ (BP), AX
  5764. MOVQ AX, R15
  5765. MULQ R10
  5766. MOVQ AX, R13
  5767. MOVQ DX, R14
  5768. MOVQ (BP), AX
  5769. MULQ R11
  5770. IMULQ R12, R15
  5771. ADDQ AX, R14
  5772. ADCQ DX, R15
  5773. MOVQ 8(BP), AX
  5774. MOVQ AX, R8
  5775. MULQ R10
  5776. ADDQ AX, R14
  5777. ADCQ $0x00, DX
  5778. MOVQ DX, R10
  5779. MOVQ 8(BP), AX
  5780. MULQ R11
  5781. ADDQ AX, R15
  5782. ADCQ $0x00, DX
  5783. IMULQ R12, R8
  5784. ADDQ R10, R15
  5785. ADCQ DX, R8
  5786. MOVQ R13, R10
  5787. MOVQ R14, R11
  5788. MOVQ R15, R12
  5789. ANDQ $0x03, R12
  5790. MOVQ R15, R13
  5791. ANDQ $-4, R13
  5792. MOVQ R8, R14
  5793. SHRQ $0x02, R8, R15
  5794. SHRQ $0x02, R8
  5795. ADDQ R13, R10
  5796. ADCQ R14, R11
  5797. ADCQ $0x00, R12
  5798. ADDQ R15, R10
  5799. ADCQ R8, R11
  5800. ADCQ $0x00, R12
  5801. LEAQ 16(DI), DI
  5802. DECQ CX
  5803. JG sealSSETail64LoopA
  5804. DECQ R9
  5805. JGE sealSSETail64LoopB
  5806. PADDL ·chacha20Constants<>+0(SB), X1
  5807. PADDL 32(BP), X4
  5808. PADDL 48(BP), X7
  5809. PADDL 80(BP), X10
  5810. JMP sealSSE128Seal
  5811. sealSSETail128:
  5812. MOVO ·chacha20Constants<>+0(SB), X0
  5813. MOVO 32(BP), X3
  5814. MOVO 48(BP), X6
  5815. MOVO 128(BP), X9
  5816. PADDL ·sseIncMask<>+0(SB), X9
  5817. MOVO X9, 80(BP)
  5818. MOVO X0, X1
  5819. MOVO X3, X4
  5820. MOVO X6, X7
  5821. MOVO X9, X10
  5822. PADDL ·sseIncMask<>+0(SB), X10
  5823. MOVO X10, 96(BP)
  5824. sealSSETail128LoopA:
  5825. ADDQ (DI), R10
  5826. ADCQ 8(DI), R11
  5827. ADCQ $0x01, R12
  5828. MOVQ (BP), AX
  5829. MOVQ AX, R15
  5830. MULQ R10
  5831. MOVQ AX, R13
  5832. MOVQ DX, R14
  5833. MOVQ (BP), AX
  5834. MULQ R11
  5835. IMULQ R12, R15
  5836. ADDQ AX, R14
  5837. ADCQ DX, R15
  5838. MOVQ 8(BP), AX
  5839. MOVQ AX, R8
  5840. MULQ R10
  5841. ADDQ AX, R14
  5842. ADCQ $0x00, DX
  5843. MOVQ DX, R10
  5844. MOVQ 8(BP), AX
  5845. MULQ R11
  5846. ADDQ AX, R15
  5847. ADCQ $0x00, DX
  5848. IMULQ R12, R8
  5849. ADDQ R10, R15
  5850. ADCQ DX, R8
  5851. MOVQ R13, R10
  5852. MOVQ R14, R11
  5853. MOVQ R15, R12
  5854. ANDQ $0x03, R12
  5855. MOVQ R15, R13
  5856. ANDQ $-4, R13
  5857. MOVQ R8, R14
  5858. SHRQ $0x02, R8, R15
  5859. SHRQ $0x02, R8
  5860. ADDQ R13, R10
  5861. ADCQ R14, R11
  5862. ADCQ $0x00, R12
  5863. ADDQ R15, R10
  5864. ADCQ R8, R11
  5865. ADCQ $0x00, R12
  5866. LEAQ 16(DI), DI
  5867. sealSSETail128LoopB:
  5868. PADDD X3, X0
  5869. PXOR X0, X9
  5870. ROL16(X9, X12)
  5871. PADDD X9, X6
  5872. PXOR X6, X3
  5873. MOVO X3, X12
  5874. PSLLL $0x0c, X12
  5875. PSRLL $0x14, X3
  5876. PXOR X12, X3
  5877. PADDD X3, X0
  5878. PXOR X0, X9
  5879. ROL8(X9, X12)
  5880. PADDD X9, X6
  5881. PXOR X6, X3
  5882. MOVO X3, X12
  5883. PSLLL $0x07, X12
  5884. PSRLL $0x19, X3
  5885. PXOR X12, X3
  5886. PADDD X4, X1
  5887. PXOR X1, X10
  5888. ROL16(X10, X12)
  5889. PADDD X10, X7
  5890. PXOR X7, X4
  5891. MOVO X4, X12
  5892. PSLLL $0x0c, X12
  5893. PSRLL $0x14, X4
  5894. PXOR X12, X4
  5895. PADDD X4, X1
  5896. PXOR X1, X10
  5897. ROL8(X10, X12)
  5898. PADDD X10, X7
  5899. PXOR X7, X4
  5900. MOVO X4, X12
  5901. PSLLL $0x07, X12
  5902. PSRLL $0x19, X4
  5903. PXOR X12, X4
  5904. BYTE $0x66
  5905. BYTE $0x0f
  5906. BYTE $0x3a
  5907. BYTE $0x0f
  5908. BYTE $0xdb
  5909. BYTE $0x04
  5910. BYTE $0x66
  5911. BYTE $0x0f
  5912. BYTE $0x3a
  5913. BYTE $0x0f
  5914. BYTE $0xf6
  5915. BYTE $0x08
  5916. BYTE $0x66
  5917. BYTE $0x45
  5918. BYTE $0x0f
  5919. BYTE $0x3a
  5920. BYTE $0x0f
  5921. BYTE $0xc9
  5922. BYTE $0x0c
  5923. BYTE $0x66
  5924. BYTE $0x0f
  5925. BYTE $0x3a
  5926. BYTE $0x0f
  5927. BYTE $0xe4
  5928. BYTE $0x04
  5929. BYTE $0x66
  5930. BYTE $0x0f
  5931. BYTE $0x3a
  5932. BYTE $0x0f
  5933. BYTE $0xff
  5934. BYTE $0x08
  5935. BYTE $0x66
  5936. BYTE $0x45
  5937. BYTE $0x0f
  5938. BYTE $0x3a
  5939. BYTE $0x0f
  5940. BYTE $0xd2
  5941. BYTE $0x0c
  5942. ADDQ (DI), R10
  5943. ADCQ 8(DI), R11
  5944. ADCQ $0x01, R12
  5945. MOVQ (BP), AX
  5946. MOVQ AX, R15
  5947. MULQ R10
  5948. MOVQ AX, R13
  5949. MOVQ DX, R14
  5950. MOVQ (BP), AX
  5951. MULQ R11
  5952. IMULQ R12, R15
  5953. ADDQ AX, R14
  5954. ADCQ DX, R15
  5955. MOVQ 8(BP), AX
  5956. MOVQ AX, R8
  5957. MULQ R10
  5958. ADDQ AX, R14
  5959. ADCQ $0x00, DX
  5960. MOVQ DX, R10
  5961. MOVQ 8(BP), AX
  5962. MULQ R11
  5963. ADDQ AX, R15
  5964. ADCQ $0x00, DX
  5965. IMULQ R12, R8
  5966. ADDQ R10, R15
  5967. ADCQ DX, R8
  5968. MOVQ R13, R10
  5969. MOVQ R14, R11
  5970. MOVQ R15, R12
  5971. ANDQ $0x03, R12
  5972. MOVQ R15, R13
  5973. ANDQ $-4, R13
  5974. MOVQ R8, R14
  5975. SHRQ $0x02, R8, R15
  5976. SHRQ $0x02, R8
  5977. ADDQ R13, R10
  5978. ADCQ R14, R11
  5979. ADCQ $0x00, R12
  5980. ADDQ R15, R10
  5981. ADCQ R8, R11
  5982. ADCQ $0x00, R12
  5983. LEAQ 16(DI), DI
  5984. PADDD X3, X0
  5985. PXOR X0, X9
  5986. ROL16(X9, X12)
  5987. PADDD X9, X6
  5988. PXOR X6, X3
  5989. MOVO X3, X12
  5990. PSLLL $0x0c, X12
  5991. PSRLL $0x14, X3
  5992. PXOR X12, X3
  5993. PADDD X3, X0
  5994. PXOR X0, X9
  5995. ROL8(X9, X12)
  5996. PADDD X9, X6
  5997. PXOR X6, X3
  5998. MOVO X3, X12
  5999. PSLLL $0x07, X12
  6000. PSRLL $0x19, X3
  6001. PXOR X12, X3
  6002. PADDD X4, X1
  6003. PXOR X1, X10
  6004. ROL16(X10, X12)
  6005. PADDD X10, X7
  6006. PXOR X7, X4
  6007. MOVO X4, X12
  6008. PSLLL $0x0c, X12
  6009. PSRLL $0x14, X4
  6010. PXOR X12, X4
  6011. PADDD X4, X1
  6012. PXOR X1, X10
  6013. ROL8(X10, X12)
  6014. PADDD X10, X7
  6015. PXOR X7, X4
  6016. MOVO X4, X12
  6017. PSLLL $0x07, X12
  6018. PSRLL $0x19, X4
  6019. PXOR X12, X4
  6020. BYTE $0x66
  6021. BYTE $0x0f
  6022. BYTE $0x3a
  6023. BYTE $0x0f
  6024. BYTE $0xdb
  6025. BYTE $0x0c
  6026. BYTE $0x66
  6027. BYTE $0x0f
  6028. BYTE $0x3a
  6029. BYTE $0x0f
  6030. BYTE $0xf6
  6031. BYTE $0x08
  6032. BYTE $0x66
  6033. BYTE $0x45
  6034. BYTE $0x0f
  6035. BYTE $0x3a
  6036. BYTE $0x0f
  6037. BYTE $0xc9
  6038. BYTE $0x04
  6039. BYTE $0x66
  6040. BYTE $0x0f
  6041. BYTE $0x3a
  6042. BYTE $0x0f
  6043. BYTE $0xe4
  6044. BYTE $0x0c
  6045. BYTE $0x66
  6046. BYTE $0x0f
  6047. BYTE $0x3a
  6048. BYTE $0x0f
  6049. BYTE $0xff
  6050. BYTE $0x08
  6051. BYTE $0x66
  6052. BYTE $0x45
  6053. BYTE $0x0f
  6054. BYTE $0x3a
  6055. BYTE $0x0f
  6056. BYTE $0xd2
  6057. BYTE $0x04
  6058. DECQ CX
  6059. JG sealSSETail128LoopA
  6060. DECQ R9
  6061. JGE sealSSETail128LoopB
  6062. PADDL ·chacha20Constants<>+0(SB), X0
  6063. PADDL ·chacha20Constants<>+0(SB), X1
  6064. PADDL 32(BP), X3
  6065. PADDL 32(BP), X4
  6066. PADDL 48(BP), X6
  6067. PADDL 48(BP), X7
  6068. PADDL 80(BP), X9
  6069. PADDL 96(BP), X10
  6070. MOVOU (SI), X12
  6071. MOVOU 16(SI), X13
  6072. MOVOU 32(SI), X14
  6073. MOVOU 48(SI), X15
  6074. PXOR X12, X0
  6075. PXOR X13, X3
  6076. PXOR X14, X6
  6077. PXOR X15, X9
  6078. MOVOU X0, (DI)
  6079. MOVOU X3, 16(DI)
  6080. MOVOU X6, 32(DI)
  6081. MOVOU X9, 48(DI)
  6082. MOVQ $0x00000040, CX
  6083. LEAQ 64(SI), SI
  6084. SUBQ $0x40, BX
  6085. JMP sealSSE128SealHash
  6086. sealSSETail192:
  6087. MOVO ·chacha20Constants<>+0(SB), X0
  6088. MOVO 32(BP), X3
  6089. MOVO 48(BP), X6
  6090. MOVO 128(BP), X9
  6091. PADDL ·sseIncMask<>+0(SB), X9
  6092. MOVO X9, 80(BP)
  6093. MOVO X0, X1
  6094. MOVO X3, X4
  6095. MOVO X6, X7
  6096. MOVO X9, X10
  6097. PADDL ·sseIncMask<>+0(SB), X10
  6098. MOVO X10, 96(BP)
  6099. MOVO X1, X2
  6100. MOVO X4, X5
  6101. MOVO X7, X8
  6102. MOVO X10, X11
  6103. PADDL ·sseIncMask<>+0(SB), X11
  6104. MOVO X11, 112(BP)
  6105. sealSSETail192LoopA:
  6106. ADDQ (DI), R10
  6107. ADCQ 8(DI), R11
  6108. ADCQ $0x01, R12
  6109. MOVQ (BP), AX
  6110. MOVQ AX, R15
  6111. MULQ R10
  6112. MOVQ AX, R13
  6113. MOVQ DX, R14
  6114. MOVQ (BP), AX
  6115. MULQ R11
  6116. IMULQ R12, R15
  6117. ADDQ AX, R14
  6118. ADCQ DX, R15
  6119. MOVQ 8(BP), AX
  6120. MOVQ AX, R8
  6121. MULQ R10
  6122. ADDQ AX, R14
  6123. ADCQ $0x00, DX
  6124. MOVQ DX, R10
  6125. MOVQ 8(BP), AX
  6126. MULQ R11
  6127. ADDQ AX, R15
  6128. ADCQ $0x00, DX
  6129. IMULQ R12, R8
  6130. ADDQ R10, R15
  6131. ADCQ DX, R8
  6132. MOVQ R13, R10
  6133. MOVQ R14, R11
  6134. MOVQ R15, R12
  6135. ANDQ $0x03, R12
  6136. MOVQ R15, R13
  6137. ANDQ $-4, R13
  6138. MOVQ R8, R14
  6139. SHRQ $0x02, R8, R15
  6140. SHRQ $0x02, R8
  6141. ADDQ R13, R10
  6142. ADCQ R14, R11
  6143. ADCQ $0x00, R12
  6144. ADDQ R15, R10
  6145. ADCQ R8, R11
  6146. ADCQ $0x00, R12
  6147. LEAQ 16(DI), DI
  6148. sealSSETail192LoopB:
  6149. PADDD X3, X0
  6150. PXOR X0, X9
  6151. ROL16(X9, X12)
  6152. PADDD X9, X6
  6153. PXOR X6, X3
  6154. MOVO X3, X12
  6155. PSLLL $0x0c, X12
  6156. PSRLL $0x14, X3
  6157. PXOR X12, X3
  6158. PADDD X3, X0
  6159. PXOR X0, X9
  6160. ROL8(X9, X12)
  6161. PADDD X9, X6
  6162. PXOR X6, X3
  6163. MOVO X3, X12
  6164. PSLLL $0x07, X12
  6165. PSRLL $0x19, X3
  6166. PXOR X12, X3
  6167. PADDD X4, X1
  6168. PXOR X1, X10
  6169. ROL16(X10, X12)
  6170. PADDD X10, X7
  6171. PXOR X7, X4
  6172. MOVO X4, X12
  6173. PSLLL $0x0c, X12
  6174. PSRLL $0x14, X4
  6175. PXOR X12, X4
  6176. PADDD X4, X1
  6177. PXOR X1, X10
  6178. ROL8(X10, X12)
  6179. PADDD X10, X7
  6180. PXOR X7, X4
  6181. MOVO X4, X12
  6182. PSLLL $0x07, X12
  6183. PSRLL $0x19, X4
  6184. PXOR X12, X4
  6185. PADDD X5, X2
  6186. PXOR X2, X11
  6187. ROL16(X11, X12)
  6188. PADDD X11, X8
  6189. PXOR X8, X5
  6190. MOVO X5, X12
  6191. PSLLL $0x0c, X12
  6192. PSRLL $0x14, X5
  6193. PXOR X12, X5
  6194. PADDD X5, X2
  6195. PXOR X2, X11
  6196. ROL8(X11, X12)
  6197. PADDD X11, X8
  6198. PXOR X8, X5
  6199. MOVO X5, X12
  6200. PSLLL $0x07, X12
  6201. PSRLL $0x19, X5
  6202. PXOR X12, X5
  6203. BYTE $0x66
  6204. BYTE $0x0f
  6205. BYTE $0x3a
  6206. BYTE $0x0f
  6207. BYTE $0xdb
  6208. BYTE $0x04
  6209. BYTE $0x66
  6210. BYTE $0x0f
  6211. BYTE $0x3a
  6212. BYTE $0x0f
  6213. BYTE $0xf6
  6214. BYTE $0x08
  6215. BYTE $0x66
  6216. BYTE $0x45
  6217. BYTE $0x0f
  6218. BYTE $0x3a
  6219. BYTE $0x0f
  6220. BYTE $0xc9
  6221. BYTE $0x0c
  6222. BYTE $0x66
  6223. BYTE $0x0f
  6224. BYTE $0x3a
  6225. BYTE $0x0f
  6226. BYTE $0xe4
  6227. BYTE $0x04
  6228. BYTE $0x66
  6229. BYTE $0x0f
  6230. BYTE $0x3a
  6231. BYTE $0x0f
  6232. BYTE $0xff
  6233. BYTE $0x08
  6234. BYTE $0x66
  6235. BYTE $0x45
  6236. BYTE $0x0f
  6237. BYTE $0x3a
  6238. BYTE $0x0f
  6239. BYTE $0xd2
  6240. BYTE $0x0c
  6241. BYTE $0x66
  6242. BYTE $0x0f
  6243. BYTE $0x3a
  6244. BYTE $0x0f
  6245. BYTE $0xed
  6246. BYTE $0x04
  6247. BYTE $0x66
  6248. BYTE $0x45
  6249. BYTE $0x0f
  6250. BYTE $0x3a
  6251. BYTE $0x0f
  6252. BYTE $0xc0
  6253. BYTE $0x08
  6254. BYTE $0x66
  6255. BYTE $0x45
  6256. BYTE $0x0f
  6257. BYTE $0x3a
  6258. BYTE $0x0f
  6259. BYTE $0xdb
  6260. BYTE $0x0c
  6261. ADDQ (DI), R10
  6262. ADCQ 8(DI), R11
  6263. ADCQ $0x01, R12
  6264. MOVQ (BP), AX
  6265. MOVQ AX, R15
  6266. MULQ R10
  6267. MOVQ AX, R13
  6268. MOVQ DX, R14
  6269. MOVQ (BP), AX
  6270. MULQ R11
  6271. IMULQ R12, R15
  6272. ADDQ AX, R14
  6273. ADCQ DX, R15
  6274. MOVQ 8(BP), AX
  6275. MOVQ AX, R8
  6276. MULQ R10
  6277. ADDQ AX, R14
  6278. ADCQ $0x00, DX
  6279. MOVQ DX, R10
  6280. MOVQ 8(BP), AX
  6281. MULQ R11
  6282. ADDQ AX, R15
  6283. ADCQ $0x00, DX
  6284. IMULQ R12, R8
  6285. ADDQ R10, R15
  6286. ADCQ DX, R8
  6287. MOVQ R13, R10
  6288. MOVQ R14, R11
  6289. MOVQ R15, R12
  6290. ANDQ $0x03, R12
  6291. MOVQ R15, R13
  6292. ANDQ $-4, R13
  6293. MOVQ R8, R14
  6294. SHRQ $0x02, R8, R15
  6295. SHRQ $0x02, R8
  6296. ADDQ R13, R10
  6297. ADCQ R14, R11
  6298. ADCQ $0x00, R12
  6299. ADDQ R15, R10
  6300. ADCQ R8, R11
  6301. ADCQ $0x00, R12
  6302. LEAQ 16(DI), DI
  6303. PADDD X3, X0
  6304. PXOR X0, X9
  6305. ROL16(X9, X12)
  6306. PADDD X9, X6
  6307. PXOR X6, X3
  6308. MOVO X3, X12
  6309. PSLLL $0x0c, X12
  6310. PSRLL $0x14, X3
  6311. PXOR X12, X3
  6312. PADDD X3, X0
  6313. PXOR X0, X9
  6314. ROL8(X9, X12)
  6315. PADDD X9, X6
  6316. PXOR X6, X3
  6317. MOVO X3, X12
  6318. PSLLL $0x07, X12
  6319. PSRLL $0x19, X3
  6320. PXOR X12, X3
  6321. PADDD X4, X1
  6322. PXOR X1, X10
  6323. ROL16(X10, X12)
  6324. PADDD X10, X7
  6325. PXOR X7, X4
  6326. MOVO X4, X12
  6327. PSLLL $0x0c, X12
  6328. PSRLL $0x14, X4
  6329. PXOR X12, X4
  6330. PADDD X4, X1
  6331. PXOR X1, X10
  6332. ROL8(X10, X12)
  6333. PADDD X10, X7
  6334. PXOR X7, X4
  6335. MOVO X4, X12
  6336. PSLLL $0x07, X12
  6337. PSRLL $0x19, X4
  6338. PXOR X12, X4
  6339. PADDD X5, X2
  6340. PXOR X2, X11
  6341. ROL16(X11, X12)
  6342. PADDD X11, X8
  6343. PXOR X8, X5
  6344. MOVO X5, X12
  6345. PSLLL $0x0c, X12
  6346. PSRLL $0x14, X5
  6347. PXOR X12, X5
  6348. PADDD X5, X2
  6349. PXOR X2, X11
  6350. ROL8(X11, X12)
  6351. PADDD X11, X8
  6352. PXOR X8, X5
  6353. MOVO X5, X12
  6354. PSLLL $0x07, X12
  6355. PSRLL $0x19, X5
  6356. PXOR X12, X5
  6357. BYTE $0x66
  6358. BYTE $0x0f
  6359. BYTE $0x3a
  6360. BYTE $0x0f
  6361. BYTE $0xdb
  6362. BYTE $0x0c
  6363. BYTE $0x66
  6364. BYTE $0x0f
  6365. BYTE $0x3a
  6366. BYTE $0x0f
  6367. BYTE $0xf6
  6368. BYTE $0x08
  6369. BYTE $0x66
  6370. BYTE $0x45
  6371. BYTE $0x0f
  6372. BYTE $0x3a
  6373. BYTE $0x0f
  6374. BYTE $0xc9
  6375. BYTE $0x04
  6376. BYTE $0x66
  6377. BYTE $0x0f
  6378. BYTE $0x3a
  6379. BYTE $0x0f
  6380. BYTE $0xe4
  6381. BYTE $0x0c
  6382. BYTE $0x66
  6383. BYTE $0x0f
  6384. BYTE $0x3a
  6385. BYTE $0x0f
  6386. BYTE $0xff
  6387. BYTE $0x08
  6388. BYTE $0x66
  6389. BYTE $0x45
  6390. BYTE $0x0f
  6391. BYTE $0x3a
  6392. BYTE $0x0f
  6393. BYTE $0xd2
  6394. BYTE $0x04
  6395. BYTE $0x66
  6396. BYTE $0x0f
  6397. BYTE $0x3a
  6398. BYTE $0x0f
  6399. BYTE $0xed
  6400. BYTE $0x0c
  6401. BYTE $0x66
  6402. BYTE $0x45
  6403. BYTE $0x0f
  6404. BYTE $0x3a
  6405. BYTE $0x0f
  6406. BYTE $0xc0
  6407. BYTE $0x08
  6408. BYTE $0x66
  6409. BYTE $0x45
  6410. BYTE $0x0f
  6411. BYTE $0x3a
  6412. BYTE $0x0f
  6413. BYTE $0xdb
  6414. BYTE $0x04
  6415. DECQ CX
  6416. JG sealSSETail192LoopA
  6417. DECQ R9
  6418. JGE sealSSETail192LoopB
  6419. PADDL ·chacha20Constants<>+0(SB), X0
  6420. PADDL ·chacha20Constants<>+0(SB), X1
  6421. PADDL ·chacha20Constants<>+0(SB), X2
  6422. PADDL 32(BP), X3
  6423. PADDL 32(BP), X4
  6424. PADDL 32(BP), X5
  6425. PADDL 48(BP), X6
  6426. PADDL 48(BP), X7
  6427. PADDL 48(BP), X8
  6428. PADDL 80(BP), X9
  6429. PADDL 96(BP), X10
  6430. PADDL 112(BP), X11
  6431. MOVOU (SI), X12
  6432. MOVOU 16(SI), X13
  6433. MOVOU 32(SI), X14
  6434. MOVOU 48(SI), X15
  6435. PXOR X12, X0
  6436. PXOR X13, X3
  6437. PXOR X14, X6
  6438. PXOR X15, X9
  6439. MOVOU X0, (DI)
  6440. MOVOU X3, 16(DI)
  6441. MOVOU X6, 32(DI)
  6442. MOVOU X9, 48(DI)
  6443. MOVOU 64(SI), X12
  6444. MOVOU 80(SI), X13
  6445. MOVOU 96(SI), X14
  6446. MOVOU 112(SI), X15
  6447. PXOR X12, X1
  6448. PXOR X13, X4
  6449. PXOR X14, X7
  6450. PXOR X15, X10
  6451. MOVOU X1, 64(DI)
  6452. MOVOU X4, 80(DI)
  6453. MOVOU X7, 96(DI)
  6454. MOVOU X10, 112(DI)
  6455. MOVO X2, X1
  6456. MOVO X5, X4
  6457. MOVO X8, X7
  6458. MOVO X11, X10
  6459. MOVQ $0x00000080, CX
  6460. LEAQ 128(SI), SI
  6461. SUBQ $0x80, BX
  6462. JMP sealSSE128SealHash
  6463. sealSSE128:
  6464. MOVOU ·chacha20Constants<>+0(SB), X0
  6465. MOVOU 16(R8), X3
  6466. MOVOU 32(R8), X6
  6467. MOVOU 48(R8), X9
  6468. MOVO X0, X1
  6469. MOVO X3, X4
  6470. MOVO X6, X7
  6471. MOVO X9, X10
  6472. PADDL ·sseIncMask<>+0(SB), X10
  6473. MOVO X1, X2
  6474. MOVO X4, X5
  6475. MOVO X7, X8
  6476. MOVO X10, X11
  6477. PADDL ·sseIncMask<>+0(SB), X11
  6478. MOVO X3, X13
  6479. MOVO X6, X14
  6480. MOVO X10, X15
  6481. MOVQ $0x0000000a, R9
  6482. sealSSE128InnerCipherLoop:
  6483. PADDD X3, X0
  6484. PXOR X0, X9
  6485. ROL16(X9, X12)
  6486. PADDD X9, X6
  6487. PXOR X6, X3
  6488. MOVO X3, X12
  6489. PSLLL $0x0c, X12
  6490. PSRLL $0x14, X3
  6491. PXOR X12, X3
  6492. PADDD X3, X0
  6493. PXOR X0, X9
  6494. ROL8(X9, X12)
  6495. PADDD X9, X6
  6496. PXOR X6, X3
  6497. MOVO X3, X12
  6498. PSLLL $0x07, X12
  6499. PSRLL $0x19, X3
  6500. PXOR X12, X3
  6501. PADDD X4, X1
  6502. PXOR X1, X10
  6503. ROL16(X10, X12)
  6504. PADDD X10, X7
  6505. PXOR X7, X4
  6506. MOVO X4, X12
  6507. PSLLL $0x0c, X12
  6508. PSRLL $0x14, X4
  6509. PXOR X12, X4
  6510. PADDD X4, X1
  6511. PXOR X1, X10
  6512. ROL8(X10, X12)
  6513. PADDD X10, X7
  6514. PXOR X7, X4
  6515. MOVO X4, X12
  6516. PSLLL $0x07, X12
  6517. PSRLL $0x19, X4
  6518. PXOR X12, X4
  6519. PADDD X5, X2
  6520. PXOR X2, X11
  6521. ROL16(X11, X12)
  6522. PADDD X11, X8
  6523. PXOR X8, X5
  6524. MOVO X5, X12
  6525. PSLLL $0x0c, X12
  6526. PSRLL $0x14, X5
  6527. PXOR X12, X5
  6528. PADDD X5, X2
  6529. PXOR X2, X11
  6530. ROL8(X11, X12)
  6531. PADDD X11, X8
  6532. PXOR X8, X5
  6533. MOVO X5, X12
  6534. PSLLL $0x07, X12
  6535. PSRLL $0x19, X5
  6536. PXOR X12, X5
  6537. BYTE $0x66
  6538. BYTE $0x0f
  6539. BYTE $0x3a
  6540. BYTE $0x0f
  6541. BYTE $0xdb
  6542. BYTE $0x04
  6543. BYTE $0x66
  6544. BYTE $0x0f
  6545. BYTE $0x3a
  6546. BYTE $0x0f
  6547. BYTE $0xe4
  6548. BYTE $0x04
  6549. BYTE $0x66
  6550. BYTE $0x0f
  6551. BYTE $0x3a
  6552. BYTE $0x0f
  6553. BYTE $0xed
  6554. BYTE $0x04
  6555. BYTE $0x66
  6556. BYTE $0x0f
  6557. BYTE $0x3a
  6558. BYTE $0x0f
  6559. BYTE $0xf6
  6560. BYTE $0x08
  6561. BYTE $0x66
  6562. BYTE $0x0f
  6563. BYTE $0x3a
  6564. BYTE $0x0f
  6565. BYTE $0xff
  6566. BYTE $0x08
  6567. BYTE $0x66
  6568. BYTE $0x45
  6569. BYTE $0x0f
  6570. BYTE $0x3a
  6571. BYTE $0x0f
  6572. BYTE $0xc0
  6573. BYTE $0x08
  6574. BYTE $0x66
  6575. BYTE $0x45
  6576. BYTE $0x0f
  6577. BYTE $0x3a
  6578. BYTE $0x0f
  6579. BYTE $0xc9
  6580. BYTE $0x0c
  6581. BYTE $0x66
  6582. BYTE $0x45
  6583. BYTE $0x0f
  6584. BYTE $0x3a
  6585. BYTE $0x0f
  6586. BYTE $0xd2
  6587. BYTE $0x0c
  6588. BYTE $0x66
  6589. BYTE $0x45
  6590. BYTE $0x0f
  6591. BYTE $0x3a
  6592. BYTE $0x0f
  6593. BYTE $0xdb
  6594. BYTE $0x0c
  6595. PADDD X3, X0
  6596. PXOR X0, X9
  6597. ROL16(X9, X12)
  6598. PADDD X9, X6
  6599. PXOR X6, X3
  6600. MOVO X3, X12
  6601. PSLLL $0x0c, X12
  6602. PSRLL $0x14, X3
  6603. PXOR X12, X3
  6604. PADDD X3, X0
  6605. PXOR X0, X9
  6606. ROL8(X9, X12)
  6607. PADDD X9, X6
  6608. PXOR X6, X3
  6609. MOVO X3, X12
  6610. PSLLL $0x07, X12
  6611. PSRLL $0x19, X3
  6612. PXOR X12, X3
  6613. PADDD X4, X1
  6614. PXOR X1, X10
  6615. ROL16(X10, X12)
  6616. PADDD X10, X7
  6617. PXOR X7, X4
  6618. MOVO X4, X12
  6619. PSLLL $0x0c, X12
  6620. PSRLL $0x14, X4
  6621. PXOR X12, X4
  6622. PADDD X4, X1
  6623. PXOR X1, X10
  6624. ROL8(X10, X12)
  6625. PADDD X10, X7
  6626. PXOR X7, X4
  6627. MOVO X4, X12
  6628. PSLLL $0x07, X12
  6629. PSRLL $0x19, X4
  6630. PXOR X12, X4
  6631. PADDD X5, X2
  6632. PXOR X2, X11
  6633. ROL16(X11, X12)
  6634. PADDD X11, X8
  6635. PXOR X8, X5
  6636. MOVO X5, X12
  6637. PSLLL $0x0c, X12
  6638. PSRLL $0x14, X5
  6639. PXOR X12, X5
  6640. PADDD X5, X2
  6641. PXOR X2, X11
  6642. ROL8(X11, X12)
  6643. PADDD X11, X8
  6644. PXOR X8, X5
  6645. MOVO X5, X12
  6646. PSLLL $0x07, X12
  6647. PSRLL $0x19, X5
  6648. PXOR X12, X5
  6649. BYTE $0x66
  6650. BYTE $0x0f
  6651. BYTE $0x3a
  6652. BYTE $0x0f
  6653. BYTE $0xdb
  6654. BYTE $0x0c
  6655. BYTE $0x66
  6656. BYTE $0x0f
  6657. BYTE $0x3a
  6658. BYTE $0x0f
  6659. BYTE $0xe4
  6660. BYTE $0x0c
  6661. BYTE $0x66
  6662. BYTE $0x0f
  6663. BYTE $0x3a
  6664. BYTE $0x0f
  6665. BYTE $0xed
  6666. BYTE $0x0c
  6667. BYTE $0x66
  6668. BYTE $0x0f
  6669. BYTE $0x3a
  6670. BYTE $0x0f
  6671. BYTE $0xf6
  6672. BYTE $0x08
  6673. BYTE $0x66
  6674. BYTE $0x0f
  6675. BYTE $0x3a
  6676. BYTE $0x0f
  6677. BYTE $0xff
  6678. BYTE $0x08
  6679. BYTE $0x66
  6680. BYTE $0x45
  6681. BYTE $0x0f
  6682. BYTE $0x3a
  6683. BYTE $0x0f
  6684. BYTE $0xc0
  6685. BYTE $0x08
  6686. BYTE $0x66
  6687. BYTE $0x45
  6688. BYTE $0x0f
  6689. BYTE $0x3a
  6690. BYTE $0x0f
  6691. BYTE $0xc9
  6692. BYTE $0x04
  6693. BYTE $0x66
  6694. BYTE $0x45
  6695. BYTE $0x0f
  6696. BYTE $0x3a
  6697. BYTE $0x0f
  6698. BYTE $0xd2
  6699. BYTE $0x04
  6700. BYTE $0x66
  6701. BYTE $0x45
  6702. BYTE $0x0f
  6703. BYTE $0x3a
  6704. BYTE $0x0f
  6705. BYTE $0xdb
  6706. BYTE $0x04
  6707. DECQ R9
  6708. JNE sealSSE128InnerCipherLoop
  6709. // A0|B0 hold the Poly1305 32-byte key, C0,D0 can be discarded
  6710. PADDL ·chacha20Constants<>+0(SB), X0
  6711. PADDL ·chacha20Constants<>+0(SB), X1
  6712. PADDL ·chacha20Constants<>+0(SB), X2
  6713. PADDL X13, X3
  6714. PADDL X13, X4
  6715. PADDL X13, X5
  6716. PADDL X14, X7
  6717. PADDL X14, X8
  6718. PADDL X15, X10
  6719. PADDL ·sseIncMask<>+0(SB), X15
  6720. PADDL X15, X11
  6721. PAND ·polyClampMask<>+0(SB), X0
  6722. MOVOU X0, (BP)
  6723. MOVOU X3, 16(BP)
  6724. // Hash
  6725. MOVQ ad_len+80(FP), R9
  6726. CALL polyHashADInternal<>(SB)
  6727. XORQ CX, CX
  6728. sealSSE128SealHash:
  6729. CMPQ CX, $0x10
  6730. JB sealSSE128Seal
  6731. ADDQ (DI), R10
  6732. ADCQ 8(DI), R11
  6733. ADCQ $0x01, R12
  6734. MOVQ (BP), AX
  6735. MOVQ AX, R15
  6736. MULQ R10
  6737. MOVQ AX, R13
  6738. MOVQ DX, R14
  6739. MOVQ (BP), AX
  6740. MULQ R11
  6741. IMULQ R12, R15
  6742. ADDQ AX, R14
  6743. ADCQ DX, R15
  6744. MOVQ 8(BP), AX
  6745. MOVQ AX, R8
  6746. MULQ R10
  6747. ADDQ AX, R14
  6748. ADCQ $0x00, DX
  6749. MOVQ DX, R10
  6750. MOVQ 8(BP), AX
  6751. MULQ R11
  6752. ADDQ AX, R15
  6753. ADCQ $0x00, DX
  6754. IMULQ R12, R8
  6755. ADDQ R10, R15
  6756. ADCQ DX, R8
  6757. MOVQ R13, R10
  6758. MOVQ R14, R11
  6759. MOVQ R15, R12
  6760. ANDQ $0x03, R12
  6761. MOVQ R15, R13
  6762. ANDQ $-4, R13
  6763. MOVQ R8, R14
  6764. SHRQ $0x02, R8, R15
  6765. SHRQ $0x02, R8
  6766. ADDQ R13, R10
  6767. ADCQ R14, R11
  6768. ADCQ $0x00, R12
  6769. ADDQ R15, R10
  6770. ADCQ R8, R11
  6771. ADCQ $0x00, R12
  6772. SUBQ $0x10, CX
  6773. ADDQ $0x10, DI
  6774. JMP sealSSE128SealHash
  6775. sealSSE128Seal:
  6776. CMPQ BX, $0x10
  6777. JB sealSSETail
  6778. SUBQ $0x10, BX
  6779. // Load for decryption
  6780. MOVOU (SI), X12
  6781. PXOR X12, X1
  6782. MOVOU X1, (DI)
  6783. LEAQ 16(SI), SI
  6784. LEAQ 16(DI), DI
  6785. // Extract for hashing
  6786. MOVQ X1, R13
  6787. PSRLDQ $0x08, X1
  6788. MOVQ X1, R14
  6789. ADDQ R13, R10
  6790. ADCQ R14, R11
  6791. ADCQ $0x01, R12
  6792. MOVQ (BP), AX
  6793. MOVQ AX, R15
  6794. MULQ R10
  6795. MOVQ AX, R13
  6796. MOVQ DX, R14
  6797. MOVQ (BP), AX
  6798. MULQ R11
  6799. IMULQ R12, R15
  6800. ADDQ AX, R14
  6801. ADCQ DX, R15
  6802. MOVQ 8(BP), AX
  6803. MOVQ AX, R8
  6804. MULQ R10
  6805. ADDQ AX, R14
  6806. ADCQ $0x00, DX
  6807. MOVQ DX, R10
  6808. MOVQ 8(BP), AX
  6809. MULQ R11
  6810. ADDQ AX, R15
  6811. ADCQ $0x00, DX
  6812. IMULQ R12, R8
  6813. ADDQ R10, R15
  6814. ADCQ DX, R8
  6815. MOVQ R13, R10
  6816. MOVQ R14, R11
  6817. MOVQ R15, R12
  6818. ANDQ $0x03, R12
  6819. MOVQ R15, R13
  6820. ANDQ $-4, R13
  6821. MOVQ R8, R14
  6822. SHRQ $0x02, R8, R15
  6823. SHRQ $0x02, R8
  6824. ADDQ R13, R10
  6825. ADCQ R14, R11
  6826. ADCQ $0x00, R12
  6827. ADDQ R15, R10
  6828. ADCQ R8, R11
  6829. ADCQ $0x00, R12
  6830. // Shift the stream "left"
  6831. MOVO X4, X1
  6832. MOVO X7, X4
  6833. MOVO X10, X7
  6834. MOVO X2, X10
  6835. MOVO X5, X2
  6836. MOVO X8, X5
  6837. MOVO X11, X8
  6838. JMP sealSSE128Seal
  6839. sealSSETail:
  6840. TESTQ BX, BX
  6841. JE sealSSEFinalize
  6842. // We can only load the PT one byte at a time to avoid read after end of buffer
  6843. MOVQ BX, R9
  6844. SHLQ $0x04, R9
  6845. LEAQ ·andMask<>+0(SB), R13
  6846. MOVQ BX, CX
  6847. LEAQ -1(SI)(BX*1), SI
  6848. XORQ R15, R15
  6849. XORQ R8, R8
  6850. XORQ AX, AX
  6851. sealSSETailLoadLoop:
  6852. SHLQ $0x08, R15, R8
  6853. SHLQ $0x08, R15
  6854. MOVB (SI), AX
  6855. XORQ AX, R15
  6856. LEAQ -1(SI), SI
  6857. DECQ CX
  6858. JNE sealSSETailLoadLoop
  6859. MOVQ R15, 64(BP)
  6860. MOVQ R8, 72(BP)
  6861. PXOR 64(BP), X1
  6862. MOVOU X1, (DI)
  6863. MOVOU -16(R13)(R9*1), X12
  6864. PAND X12, X1
  6865. MOVQ X1, R13
  6866. PSRLDQ $0x08, X1
  6867. MOVQ X1, R14
  6868. ADDQ R13, R10
  6869. ADCQ R14, R11
  6870. ADCQ $0x01, R12
  6871. MOVQ (BP), AX
  6872. MOVQ AX, R15
  6873. MULQ R10
  6874. MOVQ AX, R13
  6875. MOVQ DX, R14
  6876. MOVQ (BP), AX
  6877. MULQ R11
  6878. IMULQ R12, R15
  6879. ADDQ AX, R14
  6880. ADCQ DX, R15
  6881. MOVQ 8(BP), AX
  6882. MOVQ AX, R8
  6883. MULQ R10
  6884. ADDQ AX, R14
  6885. ADCQ $0x00, DX
  6886. MOVQ DX, R10
  6887. MOVQ 8(BP), AX
  6888. MULQ R11
  6889. ADDQ AX, R15
  6890. ADCQ $0x00, DX
  6891. IMULQ R12, R8
  6892. ADDQ R10, R15
  6893. ADCQ DX, R8
  6894. MOVQ R13, R10
  6895. MOVQ R14, R11
  6896. MOVQ R15, R12
  6897. ANDQ $0x03, R12
  6898. MOVQ R15, R13
  6899. ANDQ $-4, R13
  6900. MOVQ R8, R14
  6901. SHRQ $0x02, R8, R15
  6902. SHRQ $0x02, R8
  6903. ADDQ R13, R10
  6904. ADCQ R14, R11
  6905. ADCQ $0x00, R12
  6906. ADDQ R15, R10
  6907. ADCQ R8, R11
  6908. ADCQ $0x00, R12
  6909. ADDQ BX, DI
  6910. sealSSEFinalize:
  6911. // Hash in the buffer lengths
  6912. ADDQ ad_len+80(FP), R10
  6913. ADCQ src_len+56(FP), R11
  6914. ADCQ $0x01, R12
  6915. MOVQ (BP), AX
  6916. MOVQ AX, R15
  6917. MULQ R10
  6918. MOVQ AX, R13
  6919. MOVQ DX, R14
  6920. MOVQ (BP), AX
  6921. MULQ R11
  6922. IMULQ R12, R15
  6923. ADDQ AX, R14
  6924. ADCQ DX, R15
  6925. MOVQ 8(BP), AX
  6926. MOVQ AX, R8
  6927. MULQ R10
  6928. ADDQ AX, R14
  6929. ADCQ $0x00, DX
  6930. MOVQ DX, R10
  6931. MOVQ 8(BP), AX
  6932. MULQ R11
  6933. ADDQ AX, R15
  6934. ADCQ $0x00, DX
  6935. IMULQ R12, R8
  6936. ADDQ R10, R15
  6937. ADCQ DX, R8
  6938. MOVQ R13, R10
  6939. MOVQ R14, R11
  6940. MOVQ R15, R12
  6941. ANDQ $0x03, R12
  6942. MOVQ R15, R13
  6943. ANDQ $-4, R13
  6944. MOVQ R8, R14
  6945. SHRQ $0x02, R8, R15
  6946. SHRQ $0x02, R8
  6947. ADDQ R13, R10
  6948. ADCQ R14, R11
  6949. ADCQ $0x00, R12
  6950. ADDQ R15, R10
  6951. ADCQ R8, R11
  6952. ADCQ $0x00, R12
  6953. // Final reduce
  6954. MOVQ R10, R13
  6955. MOVQ R11, R14
  6956. MOVQ R12, R15
  6957. SUBQ $-5, R10
  6958. SBBQ $-1, R11
  6959. SBBQ $0x03, R12
  6960. CMOVQCS R13, R10
  6961. CMOVQCS R14, R11
  6962. CMOVQCS R15, R12
  6963. // Add in the "s" part of the key
  6964. ADDQ 16(BP), R10
  6965. ADCQ 24(BP), R11
  6966. // Finally store the tag at the end of the message
  6967. MOVQ R10, (DI)
  6968. MOVQ R11, 8(DI)
  6969. RET
  6970. chacha20Poly1305Seal_AVX2:
  6971. VZEROUPPER
  6972. VMOVDQU ·chacha20Constants<>+0(SB), Y0
  6973. BYTE $0xc4
  6974. BYTE $0x42
  6975. BYTE $0x7d
  6976. BYTE $0x5a
  6977. BYTE $0x70
  6978. BYTE $0x10
  6979. BYTE $0xc4
  6980. BYTE $0x42
  6981. BYTE $0x7d
  6982. BYTE $0x5a
  6983. BYTE $0x60
  6984. BYTE $0x20
  6985. BYTE $0xc4
  6986. BYTE $0xc2
  6987. BYTE $0x7d
  6988. BYTE $0x5a
  6989. BYTE $0x60
  6990. BYTE $0x30
  6991. VPADDD ·avx2InitMask<>+0(SB), Y4, Y4
  6992. // Special optimizations, for very short buffers
  6993. CMPQ BX, $0x000000c0
  6994. JBE seal192AVX2
  6995. CMPQ BX, $0x00000140
  6996. JBE seal320AVX2
  6997. // For the general key prepare the key first - as a byproduct we have 64 bytes of cipher stream
  6998. VMOVDQA Y0, Y5
  6999. VMOVDQA Y0, Y6
  7000. VMOVDQA Y0, Y7
  7001. VMOVDQA Y14, Y9
  7002. VMOVDQA Y14, Y10
  7003. VMOVDQA Y14, Y11
  7004. VMOVDQA Y14, 32(BP)
  7005. VMOVDQA Y12, Y13
  7006. VMOVDQA Y12, Y8
  7007. VMOVDQA Y12, Y15
  7008. VMOVDQA Y12, 64(BP)
  7009. VPADDD ·avx2IncMask<>+0(SB), Y4, Y1
  7010. VMOVDQA Y4, 96(BP)
  7011. VPADDD ·avx2IncMask<>+0(SB), Y1, Y2
  7012. VMOVDQA Y1, 128(BP)
  7013. VPADDD ·avx2IncMask<>+0(SB), Y2, Y3
  7014. VMOVDQA Y2, 160(BP)
  7015. VMOVDQA Y3, 192(BP)
  7016. MOVQ $0x0000000a, R9
  7017. sealAVX2IntroLoop:
  7018. VMOVDQA Y15, 224(BP)
  7019. VPADDD Y14, Y0, Y0
  7020. VPXOR Y0, Y4, Y4
  7021. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  7022. VPADDD Y4, Y12, Y12
  7023. VPXOR Y12, Y14, Y14
  7024. VPSLLD $0x0c, Y14, Y15
  7025. VPSRLD $0x14, Y14, Y14
  7026. VPXOR Y15, Y14, Y14
  7027. VPADDD Y14, Y0, Y0
  7028. VPXOR Y0, Y4, Y4
  7029. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  7030. VPADDD Y4, Y12, Y12
  7031. VPXOR Y12, Y14, Y14
  7032. VPSLLD $0x07, Y14, Y15
  7033. VPSRLD $0x19, Y14, Y14
  7034. VPXOR Y15, Y14, Y14
  7035. VPADDD Y9, Y5, Y5
  7036. VPXOR Y5, Y1, Y1
  7037. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  7038. VPADDD Y1, Y13, Y13
  7039. VPXOR Y13, Y9, Y9
  7040. VPSLLD $0x0c, Y9, Y15
  7041. VPSRLD $0x14, Y9, Y9
  7042. VPXOR Y15, Y9, Y9
  7043. VPADDD Y9, Y5, Y5
  7044. VPXOR Y5, Y1, Y1
  7045. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  7046. VPADDD Y1, Y13, Y13
  7047. VPXOR Y13, Y9, Y9
  7048. VPSLLD $0x07, Y9, Y15
  7049. VPSRLD $0x19, Y9, Y9
  7050. VPXOR Y15, Y9, Y9
  7051. VPADDD Y10, Y6, Y6
  7052. VPXOR Y6, Y2, Y2
  7053. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  7054. VPADDD Y2, Y8, Y8
  7055. VPXOR Y8, Y10, Y10
  7056. VPSLLD $0x0c, Y10, Y15
  7057. VPSRLD $0x14, Y10, Y10
  7058. VPXOR Y15, Y10, Y10
  7059. VPADDD Y10, Y6, Y6
  7060. VPXOR Y6, Y2, Y2
  7061. VPSHUFB ·rol8<>+0(SB), Y2, Y2
  7062. VPADDD Y2, Y8, Y8
  7063. VPXOR Y8, Y10, Y10
  7064. VPSLLD $0x07, Y10, Y15
  7065. VPSRLD $0x19, Y10, Y10
  7066. VPXOR Y15, Y10, Y10
  7067. VMOVDQA 224(BP), Y15
  7068. VMOVDQA Y13, 224(BP)
  7069. VPADDD Y11, Y7, Y7
  7070. VPXOR Y7, Y3, Y3
  7071. VPSHUFB ·rol16<>+0(SB), Y3, Y3
  7072. VPADDD Y3, Y15, Y15
  7073. VPXOR Y15, Y11, Y11
  7074. VPSLLD $0x0c, Y11, Y13
  7075. VPSRLD $0x14, Y11, Y11
  7076. VPXOR Y13, Y11, Y11
  7077. VPADDD Y11, Y7, Y7
  7078. VPXOR Y7, Y3, Y3
  7079. VPSHUFB ·rol8<>+0(SB), Y3, Y3
  7080. VPADDD Y3, Y15, Y15
  7081. VPXOR Y15, Y11, Y11
  7082. VPSLLD $0x07, Y11, Y13
  7083. VPSRLD $0x19, Y11, Y11
  7084. VPXOR Y13, Y11, Y11
  7085. VMOVDQA 224(BP), Y13
  7086. VPALIGNR $0x04, Y14, Y14, Y14
  7087. VPALIGNR $0x08, Y12, Y12, Y12
  7088. VPALIGNR $0x0c, Y4, Y4, Y4
  7089. VPALIGNR $0x04, Y9, Y9, Y9
  7090. VPALIGNR $0x08, Y13, Y13, Y13
  7091. VPALIGNR $0x0c, Y1, Y1, Y1
  7092. VPALIGNR $0x04, Y10, Y10, Y10
  7093. VPALIGNR $0x08, Y8, Y8, Y8
  7094. VPALIGNR $0x0c, Y2, Y2, Y2
  7095. VPALIGNR $0x04, Y11, Y11, Y11
  7096. VPALIGNR $0x08, Y15, Y15, Y15
  7097. VPALIGNR $0x0c, Y3, Y3, Y3
  7098. VMOVDQA Y15, 224(BP)
  7099. VPADDD Y14, Y0, Y0
  7100. VPXOR Y0, Y4, Y4
  7101. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  7102. VPADDD Y4, Y12, Y12
  7103. VPXOR Y12, Y14, Y14
  7104. VPSLLD $0x0c, Y14, Y15
  7105. VPSRLD $0x14, Y14, Y14
  7106. VPXOR Y15, Y14, Y14
  7107. VPADDD Y14, Y0, Y0
  7108. VPXOR Y0, Y4, Y4
  7109. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  7110. VPADDD Y4, Y12, Y12
  7111. VPXOR Y12, Y14, Y14
  7112. VPSLLD $0x07, Y14, Y15
  7113. VPSRLD $0x19, Y14, Y14
  7114. VPXOR Y15, Y14, Y14
  7115. VPADDD Y9, Y5, Y5
  7116. VPXOR Y5, Y1, Y1
  7117. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  7118. VPADDD Y1, Y13, Y13
  7119. VPXOR Y13, Y9, Y9
  7120. VPSLLD $0x0c, Y9, Y15
  7121. VPSRLD $0x14, Y9, Y9
  7122. VPXOR Y15, Y9, Y9
  7123. VPADDD Y9, Y5, Y5
  7124. VPXOR Y5, Y1, Y1
  7125. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  7126. VPADDD Y1, Y13, Y13
  7127. VPXOR Y13, Y9, Y9
  7128. VPSLLD $0x07, Y9, Y15
  7129. VPSRLD $0x19, Y9, Y9
  7130. VPXOR Y15, Y9, Y9
  7131. VPADDD Y10, Y6, Y6
  7132. VPXOR Y6, Y2, Y2
  7133. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  7134. VPADDD Y2, Y8, Y8
  7135. VPXOR Y8, Y10, Y10
  7136. VPSLLD $0x0c, Y10, Y15
  7137. VPSRLD $0x14, Y10, Y10
  7138. VPXOR Y15, Y10, Y10
  7139. VPADDD Y10, Y6, Y6
  7140. VPXOR Y6, Y2, Y2
  7141. VPSHUFB ·rol8<>+0(SB), Y2, Y2
  7142. VPADDD Y2, Y8, Y8
  7143. VPXOR Y8, Y10, Y10
  7144. VPSLLD $0x07, Y10, Y15
  7145. VPSRLD $0x19, Y10, Y10
  7146. VPXOR Y15, Y10, Y10
  7147. VMOVDQA 224(BP), Y15
  7148. VMOVDQA Y13, 224(BP)
  7149. VPADDD Y11, Y7, Y7
  7150. VPXOR Y7, Y3, Y3
  7151. VPSHUFB ·rol16<>+0(SB), Y3, Y3
  7152. VPADDD Y3, Y15, Y15
  7153. VPXOR Y15, Y11, Y11
  7154. VPSLLD $0x0c, Y11, Y13
  7155. VPSRLD $0x14, Y11, Y11
  7156. VPXOR Y13, Y11, Y11
  7157. VPADDD Y11, Y7, Y7
  7158. VPXOR Y7, Y3, Y3
  7159. VPSHUFB ·rol8<>+0(SB), Y3, Y3
  7160. VPADDD Y3, Y15, Y15
  7161. VPXOR Y15, Y11, Y11
  7162. VPSLLD $0x07, Y11, Y13
  7163. VPSRLD $0x19, Y11, Y11
  7164. VPXOR Y13, Y11, Y11
  7165. VMOVDQA 224(BP), Y13
  7166. VPALIGNR $0x0c, Y14, Y14, Y14
  7167. VPALIGNR $0x08, Y12, Y12, Y12
  7168. VPALIGNR $0x04, Y4, Y4, Y4
  7169. VPALIGNR $0x0c, Y9, Y9, Y9
  7170. VPALIGNR $0x08, Y13, Y13, Y13
  7171. VPALIGNR $0x04, Y1, Y1, Y1
  7172. VPALIGNR $0x0c, Y10, Y10, Y10
  7173. VPALIGNR $0x08, Y8, Y8, Y8
  7174. VPALIGNR $0x04, Y2, Y2, Y2
  7175. VPALIGNR $0x0c, Y11, Y11, Y11
  7176. VPALIGNR $0x08, Y15, Y15, Y15
  7177. VPALIGNR $0x04, Y3, Y3, Y3
  7178. DECQ R9
  7179. JNE sealAVX2IntroLoop
  7180. VPADDD ·chacha20Constants<>+0(SB), Y0, Y0
  7181. VPADDD ·chacha20Constants<>+0(SB), Y5, Y5
  7182. VPADDD ·chacha20Constants<>+0(SB), Y6, Y6
  7183. VPADDD ·chacha20Constants<>+0(SB), Y7, Y7
  7184. VPADDD 32(BP), Y14, Y14
  7185. VPADDD 32(BP), Y9, Y9
  7186. VPADDD 32(BP), Y10, Y10
  7187. VPADDD 32(BP), Y11, Y11
  7188. VPADDD 64(BP), Y12, Y12
  7189. VPADDD 64(BP), Y13, Y13
  7190. VPADDD 64(BP), Y8, Y8
  7191. VPADDD 64(BP), Y15, Y15
  7192. VPADDD 96(BP), Y4, Y4
  7193. VPADDD 128(BP), Y1, Y1
  7194. VPADDD 160(BP), Y2, Y2
  7195. VPADDD 192(BP), Y3, Y3
  7196. VPERM2I128 $0x13, Y12, Y4, Y12
  7197. VPERM2I128 $0x02, Y0, Y14, Y4
  7198. VPERM2I128 $0x13, Y0, Y14, Y0
  7199. // Clamp and store poly key
  7200. VPAND ·polyClampMask<>+0(SB), Y4, Y4
  7201. VMOVDQA Y4, (BP)
  7202. // Hash AD
  7203. MOVQ ad_len+80(FP), R9
  7204. CALL polyHashADInternal<>(SB)
  7205. // Can store at least 320 bytes
  7206. VPXOR (SI), Y0, Y0
  7207. VPXOR 32(SI), Y12, Y12
  7208. VMOVDQU Y0, (DI)
  7209. VMOVDQU Y12, 32(DI)
  7210. VPERM2I128 $0x02, Y5, Y9, Y0
  7211. VPERM2I128 $0x02, Y13, Y1, Y14
  7212. VPERM2I128 $0x13, Y5, Y9, Y12
  7213. VPERM2I128 $0x13, Y13, Y1, Y4
  7214. VPXOR 64(SI), Y0, Y0
  7215. VPXOR 96(SI), Y14, Y14
  7216. VPXOR 128(SI), Y12, Y12
  7217. VPXOR 160(SI), Y4, Y4
  7218. VMOVDQU Y0, 64(DI)
  7219. VMOVDQU Y14, 96(DI)
  7220. VMOVDQU Y12, 128(DI)
  7221. VMOVDQU Y4, 160(DI)
  7222. VPERM2I128 $0x02, Y6, Y10, Y0
  7223. VPERM2I128 $0x02, Y8, Y2, Y14
  7224. VPERM2I128 $0x13, Y6, Y10, Y12
  7225. VPERM2I128 $0x13, Y8, Y2, Y4
  7226. VPXOR 192(SI), Y0, Y0
  7227. VPXOR 224(SI), Y14, Y14
  7228. VPXOR 256(SI), Y12, Y12
  7229. VPXOR 288(SI), Y4, Y4
  7230. VMOVDQU Y0, 192(DI)
  7231. VMOVDQU Y14, 224(DI)
  7232. VMOVDQU Y12, 256(DI)
  7233. VMOVDQU Y4, 288(DI)
  7234. MOVQ $0x00000140, CX
  7235. SUBQ $0x00000140, BX
  7236. LEAQ 320(SI), SI
  7237. VPERM2I128 $0x02, Y7, Y11, Y0
  7238. VPERM2I128 $0x02, Y15, Y3, Y14
  7239. VPERM2I128 $0x13, Y7, Y11, Y12
  7240. VPERM2I128 $0x13, Y15, Y3, Y4
  7241. CMPQ BX, $0x80
  7242. JBE sealAVX2SealHash
  7243. VPXOR (SI), Y0, Y0
  7244. VPXOR 32(SI), Y14, Y14
  7245. VPXOR 64(SI), Y12, Y12
  7246. VPXOR 96(SI), Y4, Y4
  7247. VMOVDQU Y0, 320(DI)
  7248. VMOVDQU Y14, 352(DI)
  7249. VMOVDQU Y12, 384(DI)
  7250. VMOVDQU Y4, 416(DI)
  7251. SUBQ $0x80, BX
  7252. LEAQ 128(SI), SI
  7253. MOVQ $0x00000008, CX
  7254. MOVQ $0x00000002, R9
  7255. CMPQ BX, $0x80
  7256. JBE sealAVX2Tail128
  7257. CMPQ BX, $0x00000100
  7258. JBE sealAVX2Tail256
  7259. CMPQ BX, $0x00000180
  7260. JBE sealAVX2Tail384
  7261. CMPQ BX, $0x00000200
  7262. JBE sealAVX2Tail512
  7263. // We have 448 bytes to hash, but main loop hashes 512 bytes at a time - perform some rounds, before the main loop
  7264. VMOVDQA ·chacha20Constants<>+0(SB), Y0
  7265. VMOVDQA Y0, Y5
  7266. VMOVDQA Y0, Y6
  7267. VMOVDQA Y0, Y7
  7268. VMOVDQA 32(BP), Y14
  7269. VMOVDQA Y14, Y9
  7270. VMOVDQA Y14, Y10
  7271. VMOVDQA Y14, Y11
  7272. VMOVDQA 64(BP), Y12
  7273. VMOVDQA Y12, Y13
  7274. VMOVDQA Y12, Y8
  7275. VMOVDQA Y12, Y15
  7276. VMOVDQA 192(BP), Y4
  7277. VPADDD ·avx2IncMask<>+0(SB), Y4, Y4
  7278. VPADDD ·avx2IncMask<>+0(SB), Y4, Y1
  7279. VPADDD ·avx2IncMask<>+0(SB), Y1, Y2
  7280. VPADDD ·avx2IncMask<>+0(SB), Y2, Y3
  7281. VMOVDQA Y4, 96(BP)
  7282. VMOVDQA Y1, 128(BP)
  7283. VMOVDQA Y2, 160(BP)
  7284. VMOVDQA Y3, 192(BP)
  7285. VMOVDQA Y15, 224(BP)
  7286. VPADDD Y14, Y0, Y0
  7287. VPXOR Y0, Y4, Y4
  7288. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  7289. VPADDD Y4, Y12, Y12
  7290. VPXOR Y12, Y14, Y14
  7291. VPSLLD $0x0c, Y14, Y15
  7292. VPSRLD $0x14, Y14, Y14
  7293. VPXOR Y15, Y14, Y14
  7294. VPADDD Y14, Y0, Y0
  7295. VPXOR Y0, Y4, Y4
  7296. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  7297. VPADDD Y4, Y12, Y12
  7298. VPXOR Y12, Y14, Y14
  7299. VPSLLD $0x07, Y14, Y15
  7300. VPSRLD $0x19, Y14, Y14
  7301. VPXOR Y15, Y14, Y14
  7302. VPADDD Y9, Y5, Y5
  7303. VPXOR Y5, Y1, Y1
  7304. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  7305. VPADDD Y1, Y13, Y13
  7306. VPXOR Y13, Y9, Y9
  7307. VPSLLD $0x0c, Y9, Y15
  7308. VPSRLD $0x14, Y9, Y9
  7309. VPXOR Y15, Y9, Y9
  7310. VPADDD Y9, Y5, Y5
  7311. VPXOR Y5, Y1, Y1
  7312. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  7313. VPADDD Y1, Y13, Y13
  7314. VPXOR Y13, Y9, Y9
  7315. VPSLLD $0x07, Y9, Y15
  7316. VPSRLD $0x19, Y9, Y9
  7317. VPXOR Y15, Y9, Y9
  7318. VPADDD Y10, Y6, Y6
  7319. VPXOR Y6, Y2, Y2
  7320. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  7321. VPADDD Y2, Y8, Y8
  7322. VPXOR Y8, Y10, Y10
  7323. VPSLLD $0x0c, Y10, Y15
  7324. VPSRLD $0x14, Y10, Y10
  7325. VPXOR Y15, Y10, Y10
  7326. VPADDD Y10, Y6, Y6
  7327. VPXOR Y6, Y2, Y2
  7328. VPSHUFB ·rol8<>+0(SB), Y2, Y2
  7329. VPADDD Y2, Y8, Y8
  7330. VPXOR Y8, Y10, Y10
  7331. VPSLLD $0x07, Y10, Y15
  7332. VPSRLD $0x19, Y10, Y10
  7333. VPXOR Y15, Y10, Y10
  7334. VMOVDQA 224(BP), Y15
  7335. VMOVDQA Y13, 224(BP)
  7336. VPADDD Y11, Y7, Y7
  7337. VPXOR Y7, Y3, Y3
  7338. VPSHUFB ·rol16<>+0(SB), Y3, Y3
  7339. VPADDD Y3, Y15, Y15
  7340. VPXOR Y15, Y11, Y11
  7341. VPSLLD $0x0c, Y11, Y13
  7342. VPSRLD $0x14, Y11, Y11
  7343. VPXOR Y13, Y11, Y11
  7344. VPADDD Y11, Y7, Y7
  7345. VPXOR Y7, Y3, Y3
  7346. VPSHUFB ·rol8<>+0(SB), Y3, Y3
  7347. VPADDD Y3, Y15, Y15
  7348. VPXOR Y15, Y11, Y11
  7349. VPSLLD $0x07, Y11, Y13
  7350. VPSRLD $0x19, Y11, Y11
  7351. VPXOR Y13, Y11, Y11
  7352. VMOVDQA 224(BP), Y13
  7353. VPALIGNR $0x04, Y14, Y14, Y14
  7354. VPALIGNR $0x08, Y12, Y12, Y12
  7355. VPALIGNR $0x0c, Y4, Y4, Y4
  7356. VPALIGNR $0x04, Y9, Y9, Y9
  7357. VPALIGNR $0x08, Y13, Y13, Y13
  7358. VPALIGNR $0x0c, Y1, Y1, Y1
  7359. VPALIGNR $0x04, Y10, Y10, Y10
  7360. VPALIGNR $0x08, Y8, Y8, Y8
  7361. VPALIGNR $0x0c, Y2, Y2, Y2
  7362. VPALIGNR $0x04, Y11, Y11, Y11
  7363. VPALIGNR $0x08, Y15, Y15, Y15
  7364. VPALIGNR $0x0c, Y3, Y3, Y3
  7365. VMOVDQA Y15, 224(BP)
  7366. VPADDD Y14, Y0, Y0
  7367. VPXOR Y0, Y4, Y4
  7368. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  7369. VPADDD Y4, Y12, Y12
  7370. VPXOR Y12, Y14, Y14
  7371. VPSLLD $0x0c, Y14, Y15
  7372. VPSRLD $0x14, Y14, Y14
  7373. VPXOR Y15, Y14, Y14
  7374. VPADDD Y14, Y0, Y0
  7375. VPXOR Y0, Y4, Y4
  7376. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  7377. VPADDD Y4, Y12, Y12
  7378. VPXOR Y12, Y14, Y14
  7379. VPSLLD $0x07, Y14, Y15
  7380. VPSRLD $0x19, Y14, Y14
  7381. VPXOR Y15, Y14, Y14
  7382. VPADDD Y9, Y5, Y5
  7383. VPXOR Y5, Y1, Y1
  7384. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  7385. VPADDD Y1, Y13, Y13
  7386. VPXOR Y13, Y9, Y9
  7387. VPSLLD $0x0c, Y9, Y15
  7388. VPSRLD $0x14, Y9, Y9
  7389. VPXOR Y15, Y9, Y9
  7390. VPADDD Y9, Y5, Y5
  7391. VPXOR Y5, Y1, Y1
  7392. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  7393. VPADDD Y1, Y13, Y13
  7394. VPXOR Y13, Y9, Y9
  7395. VPSLLD $0x07, Y9, Y15
  7396. VPSRLD $0x19, Y9, Y9
  7397. VPXOR Y15, Y9, Y9
  7398. VPADDD Y10, Y6, Y6
  7399. VPXOR Y6, Y2, Y2
  7400. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  7401. VPADDD Y2, Y8, Y8
  7402. VPXOR Y8, Y10, Y10
  7403. VPSLLD $0x0c, Y10, Y15
  7404. VPSRLD $0x14, Y10, Y10
  7405. VPXOR Y15, Y10, Y10
  7406. VPADDD Y10, Y6, Y6
  7407. VPXOR Y6, Y2, Y2
  7408. VPSHUFB ·rol8<>+0(SB), Y2, Y2
  7409. VPADDD Y2, Y8, Y8
  7410. VPXOR Y8, Y10, Y10
  7411. VPSLLD $0x07, Y10, Y15
  7412. VPSRLD $0x19, Y10, Y10
  7413. VPXOR Y15, Y10, Y10
  7414. VMOVDQA 224(BP), Y15
  7415. VMOVDQA Y13, 224(BP)
  7416. VPADDD Y11, Y7, Y7
  7417. VPXOR Y7, Y3, Y3
  7418. VPSHUFB ·rol16<>+0(SB), Y3, Y3
  7419. VPADDD Y3, Y15, Y15
  7420. VPXOR Y15, Y11, Y11
  7421. VPSLLD $0x0c, Y11, Y13
  7422. VPSRLD $0x14, Y11, Y11
  7423. VPXOR Y13, Y11, Y11
  7424. VPADDD Y11, Y7, Y7
  7425. VPXOR Y7, Y3, Y3
  7426. VPSHUFB ·rol8<>+0(SB), Y3, Y3
  7427. VPADDD Y3, Y15, Y15
  7428. VPXOR Y15, Y11, Y11
  7429. VPSLLD $0x07, Y11, Y13
  7430. VPSRLD $0x19, Y11, Y11
  7431. VPXOR Y13, Y11, Y11
  7432. VMOVDQA 224(BP), Y13
  7433. VPALIGNR $0x0c, Y14, Y14, Y14
  7434. VPALIGNR $0x08, Y12, Y12, Y12
  7435. VPALIGNR $0x04, Y4, Y4, Y4
  7436. VPALIGNR $0x0c, Y9, Y9, Y9
  7437. VPALIGNR $0x08, Y13, Y13, Y13
  7438. VPALIGNR $0x04, Y1, Y1, Y1
  7439. VPALIGNR $0x0c, Y10, Y10, Y10
  7440. VPALIGNR $0x08, Y8, Y8, Y8
  7441. VPALIGNR $0x04, Y2, Y2, Y2
  7442. VPALIGNR $0x0c, Y11, Y11, Y11
  7443. VPALIGNR $0x08, Y15, Y15, Y15
  7444. VPALIGNR $0x04, Y3, Y3, Y3
  7445. VPADDD Y14, Y0, Y0
  7446. VPADDD Y9, Y5, Y5
  7447. VPADDD Y10, Y6, Y6
  7448. VPADDD Y11, Y7, Y7
  7449. VPXOR Y0, Y4, Y4
  7450. VPXOR Y5, Y1, Y1
  7451. VPXOR Y6, Y2, Y2
  7452. VPXOR Y7, Y3, Y3
  7453. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  7454. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  7455. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  7456. VPSHUFB ·rol16<>+0(SB), Y3, Y3
  7457. VPADDD Y4, Y12, Y12
  7458. VPADDD Y1, Y13, Y13
  7459. VPADDD Y2, Y8, Y8
  7460. VPADDD Y3, Y15, Y15
  7461. VPXOR Y12, Y14, Y14
  7462. VPXOR Y13, Y9, Y9
  7463. VPXOR Y8, Y10, Y10
  7464. VPXOR Y15, Y11, Y11
  7465. VMOVDQA Y15, 224(BP)
  7466. VPSLLD $0x0c, Y14, Y15
  7467. VPSRLD $0x14, Y14, Y14
  7468. VPXOR Y15, Y14, Y14
  7469. VPSLLD $0x0c, Y9, Y15
  7470. VPSRLD $0x14, Y9, Y9
  7471. VPXOR Y15, Y9, Y9
  7472. VPSLLD $0x0c, Y10, Y15
  7473. VPSRLD $0x14, Y10, Y10
  7474. VPXOR Y15, Y10, Y10
  7475. VPSLLD $0x0c, Y11, Y15
  7476. VPSRLD $0x14, Y11, Y11
  7477. VPXOR Y15, Y11, Y11
  7478. VMOVDQA 224(BP), Y15
  7479. SUBQ $0x10, DI
  7480. MOVQ $0x00000009, CX
  7481. JMP sealAVX2InternalLoopStart
  7482. sealAVX2MainLoop:
  7483. VMOVDQU ·chacha20Constants<>+0(SB), Y0
  7484. VMOVDQA Y0, Y5
  7485. VMOVDQA Y0, Y6
  7486. VMOVDQA Y0, Y7
  7487. VMOVDQA 32(BP), Y14
  7488. VMOVDQA Y14, Y9
  7489. VMOVDQA Y14, Y10
  7490. VMOVDQA Y14, Y11
  7491. VMOVDQA 64(BP), Y12
  7492. VMOVDQA Y12, Y13
  7493. VMOVDQA Y12, Y8
  7494. VMOVDQA Y12, Y15
  7495. VMOVDQA 192(BP), Y4
  7496. VPADDD ·avx2IncMask<>+0(SB), Y4, Y4
  7497. VPADDD ·avx2IncMask<>+0(SB), Y4, Y1
  7498. VPADDD ·avx2IncMask<>+0(SB), Y1, Y2
  7499. VPADDD ·avx2IncMask<>+0(SB), Y2, Y3
  7500. VMOVDQA Y4, 96(BP)
  7501. VMOVDQA Y1, 128(BP)
  7502. VMOVDQA Y2, 160(BP)
  7503. VMOVDQA Y3, 192(BP)
  7504. MOVQ $0x0000000a, CX
  7505. sealAVX2InternalLoop:
  7506. ADDQ (DI), R10
  7507. ADCQ 8(DI), R11
  7508. ADCQ $0x01, R12
  7509. VPADDD Y14, Y0, Y0
  7510. VPADDD Y9, Y5, Y5
  7511. VPADDD Y10, Y6, Y6
  7512. VPADDD Y11, Y7, Y7
  7513. MOVQ (BP), DX
  7514. MOVQ DX, R15
  7515. MULXQ R10, R13, R14
  7516. IMULQ R12, R15
  7517. MULXQ R11, AX, DX
  7518. ADDQ AX, R14
  7519. ADCQ DX, R15
  7520. VPXOR Y0, Y4, Y4
  7521. VPXOR Y5, Y1, Y1
  7522. VPXOR Y6, Y2, Y2
  7523. VPXOR Y7, Y3, Y3
  7524. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  7525. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  7526. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  7527. VPSHUFB ·rol16<>+0(SB), Y3, Y3
  7528. MOVQ 8(BP), DX
  7529. MULXQ R10, R10, AX
  7530. ADDQ R10, R14
  7531. MULXQ R11, R11, R8
  7532. ADCQ R11, R15
  7533. ADCQ $0x00, R8
  7534. VPADDD Y4, Y12, Y12
  7535. VPADDD Y1, Y13, Y13
  7536. VPADDD Y2, Y8, Y8
  7537. VPADDD Y3, Y15, Y15
  7538. VPXOR Y12, Y14, Y14
  7539. VPXOR Y13, Y9, Y9
  7540. VPXOR Y8, Y10, Y10
  7541. VPXOR Y15, Y11, Y11
  7542. IMULQ R12, DX
  7543. ADDQ AX, R15
  7544. ADCQ DX, R8
  7545. VMOVDQA Y15, 224(BP)
  7546. VPSLLD $0x0c, Y14, Y15
  7547. VPSRLD $0x14, Y14, Y14
  7548. VPXOR Y15, Y14, Y14
  7549. VPSLLD $0x0c, Y9, Y15
  7550. VPSRLD $0x14, Y9, Y9
  7551. VPXOR Y15, Y9, Y9
  7552. VPSLLD $0x0c, Y10, Y15
  7553. VPSRLD $0x14, Y10, Y10
  7554. VPXOR Y15, Y10, Y10
  7555. VPSLLD $0x0c, Y11, Y15
  7556. VPSRLD $0x14, Y11, Y11
  7557. VPXOR Y15, Y11, Y11
  7558. VMOVDQA 224(BP), Y15
  7559. MOVQ R13, R10
  7560. MOVQ R14, R11
  7561. MOVQ R15, R12
  7562. ANDQ $0x03, R12
  7563. MOVQ R15, R13
  7564. ANDQ $-4, R13
  7565. MOVQ R8, R14
  7566. SHRQ $0x02, R8, R15
  7567. SHRQ $0x02, R8
  7568. ADDQ R13, R10
  7569. ADCQ R14, R11
  7570. ADCQ $0x00, R12
  7571. ADDQ R15, R10
  7572. ADCQ R8, R11
  7573. ADCQ $0x00, R12
  7574. sealAVX2InternalLoopStart:
  7575. VPADDD Y14, Y0, Y0
  7576. VPADDD Y9, Y5, Y5
  7577. VPADDD Y10, Y6, Y6
  7578. VPADDD Y11, Y7, Y7
  7579. VPXOR Y0, Y4, Y4
  7580. VPXOR Y5, Y1, Y1
  7581. VPXOR Y6, Y2, Y2
  7582. VPXOR Y7, Y3, Y3
  7583. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  7584. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  7585. VPSHUFB ·rol8<>+0(SB), Y2, Y2
  7586. VPSHUFB ·rol8<>+0(SB), Y3, Y3
  7587. ADDQ 16(DI), R10
  7588. ADCQ 24(DI), R11
  7589. ADCQ $0x01, R12
  7590. VPADDD Y4, Y12, Y12
  7591. VPADDD Y1, Y13, Y13
  7592. VPADDD Y2, Y8, Y8
  7593. VPADDD Y3, Y15, Y15
  7594. MOVQ (BP), DX
  7595. MOVQ DX, R15
  7596. MULXQ R10, R13, R14
  7597. IMULQ R12, R15
  7598. MULXQ R11, AX, DX
  7599. ADDQ AX, R14
  7600. ADCQ DX, R15
  7601. VPXOR Y12, Y14, Y14
  7602. VPXOR Y13, Y9, Y9
  7603. VPXOR Y8, Y10, Y10
  7604. VPXOR Y15, Y11, Y11
  7605. VMOVDQA Y15, 224(BP)
  7606. VPSLLD $0x07, Y14, Y15
  7607. VPSRLD $0x19, Y14, Y14
  7608. VPXOR Y15, Y14, Y14
  7609. VPSLLD $0x07, Y9, Y15
  7610. VPSRLD $0x19, Y9, Y9
  7611. VPXOR Y15, Y9, Y9
  7612. VPSLLD $0x07, Y10, Y15
  7613. VPSRLD $0x19, Y10, Y10
  7614. VPXOR Y15, Y10, Y10
  7615. VPSLLD $0x07, Y11, Y15
  7616. VPSRLD $0x19, Y11, Y11
  7617. VPXOR Y15, Y11, Y11
  7618. VMOVDQA 224(BP), Y15
  7619. MOVQ 8(BP), DX
  7620. MULXQ R10, R10, AX
  7621. ADDQ R10, R14
  7622. MULXQ R11, R11, R8
  7623. ADCQ R11, R15
  7624. ADCQ $0x00, R8
  7625. VPALIGNR $0x04, Y14, Y14, Y14
  7626. VPALIGNR $0x04, Y9, Y9, Y9
  7627. VPALIGNR $0x04, Y10, Y10, Y10
  7628. VPALIGNR $0x04, Y11, Y11, Y11
  7629. VPALIGNR $0x08, Y12, Y12, Y12
  7630. VPALIGNR $0x08, Y13, Y13, Y13
  7631. VPALIGNR $0x08, Y8, Y8, Y8
  7632. VPALIGNR $0x08, Y15, Y15, Y15
  7633. VPALIGNR $0x0c, Y4, Y4, Y4
  7634. VPALIGNR $0x0c, Y1, Y1, Y1
  7635. VPALIGNR $0x0c, Y2, Y2, Y2
  7636. VPALIGNR $0x0c, Y3, Y3, Y3
  7637. VPADDD Y14, Y0, Y0
  7638. VPADDD Y9, Y5, Y5
  7639. VPADDD Y10, Y6, Y6
  7640. VPADDD Y11, Y7, Y7
  7641. IMULQ R12, DX
  7642. ADDQ AX, R15
  7643. ADCQ DX, R8
  7644. VPXOR Y0, Y4, Y4
  7645. VPXOR Y5, Y1, Y1
  7646. VPXOR Y6, Y2, Y2
  7647. VPXOR Y7, Y3, Y3
  7648. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  7649. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  7650. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  7651. VPSHUFB ·rol16<>+0(SB), Y3, Y3
  7652. MOVQ R13, R10
  7653. MOVQ R14, R11
  7654. MOVQ R15, R12
  7655. ANDQ $0x03, R12
  7656. MOVQ R15, R13
  7657. ANDQ $-4, R13
  7658. MOVQ R8, R14
  7659. SHRQ $0x02, R8, R15
  7660. SHRQ $0x02, R8
  7661. ADDQ R13, R10
  7662. ADCQ R14, R11
  7663. ADCQ $0x00, R12
  7664. ADDQ R15, R10
  7665. ADCQ R8, R11
  7666. ADCQ $0x00, R12
  7667. VPADDD Y4, Y12, Y12
  7668. VPADDD Y1, Y13, Y13
  7669. VPADDD Y2, Y8, Y8
  7670. VPADDD Y3, Y15, Y15
  7671. VPXOR Y12, Y14, Y14
  7672. VPXOR Y13, Y9, Y9
  7673. VPXOR Y8, Y10, Y10
  7674. VPXOR Y15, Y11, Y11
  7675. ADDQ 32(DI), R10
  7676. ADCQ 40(DI), R11
  7677. ADCQ $0x01, R12
  7678. LEAQ 48(DI), DI
  7679. VMOVDQA Y15, 224(BP)
  7680. VPSLLD $0x0c, Y14, Y15
  7681. VPSRLD $0x14, Y14, Y14
  7682. VPXOR Y15, Y14, Y14
  7683. VPSLLD $0x0c, Y9, Y15
  7684. VPSRLD $0x14, Y9, Y9
  7685. VPXOR Y15, Y9, Y9
  7686. VPSLLD $0x0c, Y10, Y15
  7687. VPSRLD $0x14, Y10, Y10
  7688. VPXOR Y15, Y10, Y10
  7689. VPSLLD $0x0c, Y11, Y15
  7690. VPSRLD $0x14, Y11, Y11
  7691. VPXOR Y15, Y11, Y11
  7692. VMOVDQA 224(BP), Y15
  7693. MOVQ (BP), DX
  7694. MOVQ DX, R15
  7695. MULXQ R10, R13, R14
  7696. IMULQ R12, R15
  7697. MULXQ R11, AX, DX
  7698. ADDQ AX, R14
  7699. ADCQ DX, R15
  7700. VPADDD Y14, Y0, Y0
  7701. VPADDD Y9, Y5, Y5
  7702. VPADDD Y10, Y6, Y6
  7703. VPADDD Y11, Y7, Y7
  7704. VPXOR Y0, Y4, Y4
  7705. VPXOR Y5, Y1, Y1
  7706. VPXOR Y6, Y2, Y2
  7707. VPXOR Y7, Y3, Y3
  7708. MOVQ 8(BP), DX
  7709. MULXQ R10, R10, AX
  7710. ADDQ R10, R14
  7711. MULXQ R11, R11, R8
  7712. ADCQ R11, R15
  7713. ADCQ $0x00, R8
  7714. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  7715. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  7716. VPSHUFB ·rol8<>+0(SB), Y2, Y2
  7717. VPSHUFB ·rol8<>+0(SB), Y3, Y3
  7718. VPADDD Y4, Y12, Y12
  7719. VPADDD Y1, Y13, Y13
  7720. VPADDD Y2, Y8, Y8
  7721. VPADDD Y3, Y15, Y15
  7722. IMULQ R12, DX
  7723. ADDQ AX, R15
  7724. ADCQ DX, R8
  7725. VPXOR Y12, Y14, Y14
  7726. VPXOR Y13, Y9, Y9
  7727. VPXOR Y8, Y10, Y10
  7728. VPXOR Y15, Y11, Y11
  7729. VMOVDQA Y15, 224(BP)
  7730. VPSLLD $0x07, Y14, Y15
  7731. VPSRLD $0x19, Y14, Y14
  7732. VPXOR Y15, Y14, Y14
  7733. VPSLLD $0x07, Y9, Y15
  7734. VPSRLD $0x19, Y9, Y9
  7735. VPXOR Y15, Y9, Y9
  7736. VPSLLD $0x07, Y10, Y15
  7737. VPSRLD $0x19, Y10, Y10
  7738. VPXOR Y15, Y10, Y10
  7739. VPSLLD $0x07, Y11, Y15
  7740. VPSRLD $0x19, Y11, Y11
  7741. VPXOR Y15, Y11, Y11
  7742. VMOVDQA 224(BP), Y15
  7743. MOVQ R13, R10
  7744. MOVQ R14, R11
  7745. MOVQ R15, R12
  7746. ANDQ $0x03, R12
  7747. MOVQ R15, R13
  7748. ANDQ $-4, R13
  7749. MOVQ R8, R14
  7750. SHRQ $0x02, R8, R15
  7751. SHRQ $0x02, R8
  7752. ADDQ R13, R10
  7753. ADCQ R14, R11
  7754. ADCQ $0x00, R12
  7755. ADDQ R15, R10
  7756. ADCQ R8, R11
  7757. ADCQ $0x00, R12
  7758. VPALIGNR $0x0c, Y14, Y14, Y14
  7759. VPALIGNR $0x0c, Y9, Y9, Y9
  7760. VPALIGNR $0x0c, Y10, Y10, Y10
  7761. VPALIGNR $0x0c, Y11, Y11, Y11
  7762. VPALIGNR $0x08, Y12, Y12, Y12
  7763. VPALIGNR $0x08, Y13, Y13, Y13
  7764. VPALIGNR $0x08, Y8, Y8, Y8
  7765. VPALIGNR $0x08, Y15, Y15, Y15
  7766. VPALIGNR $0x04, Y4, Y4, Y4
  7767. VPALIGNR $0x04, Y1, Y1, Y1
  7768. VPALIGNR $0x04, Y2, Y2, Y2
  7769. VPALIGNR $0x04, Y3, Y3, Y3
  7770. DECQ CX
  7771. JNE sealAVX2InternalLoop
  7772. VPADDD ·chacha20Constants<>+0(SB), Y0, Y0
  7773. VPADDD ·chacha20Constants<>+0(SB), Y5, Y5
  7774. VPADDD ·chacha20Constants<>+0(SB), Y6, Y6
  7775. VPADDD ·chacha20Constants<>+0(SB), Y7, Y7
  7776. VPADDD 32(BP), Y14, Y14
  7777. VPADDD 32(BP), Y9, Y9
  7778. VPADDD 32(BP), Y10, Y10
  7779. VPADDD 32(BP), Y11, Y11
  7780. VPADDD 64(BP), Y12, Y12
  7781. VPADDD 64(BP), Y13, Y13
  7782. VPADDD 64(BP), Y8, Y8
  7783. VPADDD 64(BP), Y15, Y15
  7784. VPADDD 96(BP), Y4, Y4
  7785. VPADDD 128(BP), Y1, Y1
  7786. VPADDD 160(BP), Y2, Y2
  7787. VPADDD 192(BP), Y3, Y3
  7788. VMOVDQA Y15, 224(BP)
  7789. // We only hashed 480 of the 512 bytes available - hash the remaining 32 here
  7790. ADDQ (DI), R10
  7791. ADCQ 8(DI), R11
  7792. ADCQ $0x01, R12
  7793. MOVQ (BP), DX
  7794. MOVQ DX, R15
  7795. MULXQ R10, R13, R14
  7796. IMULQ R12, R15
  7797. MULXQ R11, AX, DX
  7798. ADDQ AX, R14
  7799. ADCQ DX, R15
  7800. MOVQ 8(BP), DX
  7801. MULXQ R10, R10, AX
  7802. ADDQ R10, R14
  7803. MULXQ R11, R11, R8
  7804. ADCQ R11, R15
  7805. ADCQ $0x00, R8
  7806. IMULQ R12, DX
  7807. ADDQ AX, R15
  7808. ADCQ DX, R8
  7809. MOVQ R13, R10
  7810. MOVQ R14, R11
  7811. MOVQ R15, R12
  7812. ANDQ $0x03, R12
  7813. MOVQ R15, R13
  7814. ANDQ $-4, R13
  7815. MOVQ R8, R14
  7816. SHRQ $0x02, R8, R15
  7817. SHRQ $0x02, R8
  7818. ADDQ R13, R10
  7819. ADCQ R14, R11
  7820. ADCQ $0x00, R12
  7821. ADDQ R15, R10
  7822. ADCQ R8, R11
  7823. ADCQ $0x00, R12
  7824. LEAQ 32(DI), DI
  7825. VPERM2I128 $0x02, Y0, Y14, Y15
  7826. VPERM2I128 $0x13, Y0, Y14, Y14
  7827. VPERM2I128 $0x02, Y12, Y4, Y0
  7828. VPERM2I128 $0x13, Y12, Y4, Y12
  7829. VPXOR (SI), Y15, Y15
  7830. VPXOR 32(SI), Y0, Y0
  7831. VPXOR 64(SI), Y14, Y14
  7832. VPXOR 96(SI), Y12, Y12
  7833. VMOVDQU Y15, (DI)
  7834. VMOVDQU Y0, 32(DI)
  7835. VMOVDQU Y14, 64(DI)
  7836. VMOVDQU Y12, 96(DI)
  7837. VPERM2I128 $0x02, Y5, Y9, Y0
  7838. VPERM2I128 $0x02, Y13, Y1, Y14
  7839. VPERM2I128 $0x13, Y5, Y9, Y12
  7840. VPERM2I128 $0x13, Y13, Y1, Y4
  7841. VPXOR 128(SI), Y0, Y0
  7842. VPXOR 160(SI), Y14, Y14
  7843. VPXOR 192(SI), Y12, Y12
  7844. VPXOR 224(SI), Y4, Y4
  7845. VMOVDQU Y0, 128(DI)
  7846. VMOVDQU Y14, 160(DI)
  7847. VMOVDQU Y12, 192(DI)
  7848. VMOVDQU Y4, 224(DI)
  7849. // and here
  7850. ADDQ -16(DI), R10
  7851. ADCQ -8(DI), R11
  7852. ADCQ $0x01, R12
  7853. MOVQ (BP), DX
  7854. MOVQ DX, R15
  7855. MULXQ R10, R13, R14
  7856. IMULQ R12, R15
  7857. MULXQ R11, AX, DX
  7858. ADDQ AX, R14
  7859. ADCQ DX, R15
  7860. MOVQ 8(BP), DX
  7861. MULXQ R10, R10, AX
  7862. ADDQ R10, R14
  7863. MULXQ R11, R11, R8
  7864. ADCQ R11, R15
  7865. ADCQ $0x00, R8
  7866. IMULQ R12, DX
  7867. ADDQ AX, R15
  7868. ADCQ DX, R8
  7869. MOVQ R13, R10
  7870. MOVQ R14, R11
  7871. MOVQ R15, R12
  7872. ANDQ $0x03, R12
  7873. MOVQ R15, R13
  7874. ANDQ $-4, R13
  7875. MOVQ R8, R14
  7876. SHRQ $0x02, R8, R15
  7877. SHRQ $0x02, R8
  7878. ADDQ R13, R10
  7879. ADCQ R14, R11
  7880. ADCQ $0x00, R12
  7881. ADDQ R15, R10
  7882. ADCQ R8, R11
  7883. ADCQ $0x00, R12
  7884. VPERM2I128 $0x02, Y6, Y10, Y0
  7885. VPERM2I128 $0x02, Y8, Y2, Y14
  7886. VPERM2I128 $0x13, Y6, Y10, Y12
  7887. VPERM2I128 $0x13, Y8, Y2, Y4
  7888. VPXOR 256(SI), Y0, Y0
  7889. VPXOR 288(SI), Y14, Y14
  7890. VPXOR 320(SI), Y12, Y12
  7891. VPXOR 352(SI), Y4, Y4
  7892. VMOVDQU Y0, 256(DI)
  7893. VMOVDQU Y14, 288(DI)
  7894. VMOVDQU Y12, 320(DI)
  7895. VMOVDQU Y4, 352(DI)
  7896. VPERM2I128 $0x02, Y7, Y11, Y0
  7897. VPERM2I128 $0x02, 224(BP), Y3, Y14
  7898. VPERM2I128 $0x13, Y7, Y11, Y12
  7899. VPERM2I128 $0x13, 224(BP), Y3, Y4
  7900. VPXOR 384(SI), Y0, Y0
  7901. VPXOR 416(SI), Y14, Y14
  7902. VPXOR 448(SI), Y12, Y12
  7903. VPXOR 480(SI), Y4, Y4
  7904. VMOVDQU Y0, 384(DI)
  7905. VMOVDQU Y14, 416(DI)
  7906. VMOVDQU Y12, 448(DI)
  7907. VMOVDQU Y4, 480(DI)
  7908. LEAQ 512(SI), SI
  7909. SUBQ $0x00000200, BX
  7910. CMPQ BX, $0x00000200
  7911. JG sealAVX2MainLoop
  7912. // Tail can only hash 480 bytes
  7913. ADDQ (DI), R10
  7914. ADCQ 8(DI), R11
  7915. ADCQ $0x01, R12
  7916. MOVQ (BP), DX
  7917. MOVQ DX, R15
  7918. MULXQ R10, R13, R14
  7919. IMULQ R12, R15
  7920. MULXQ R11, AX, DX
  7921. ADDQ AX, R14
  7922. ADCQ DX, R15
  7923. MOVQ 8(BP), DX
  7924. MULXQ R10, R10, AX
  7925. ADDQ R10, R14
  7926. MULXQ R11, R11, R8
  7927. ADCQ R11, R15
  7928. ADCQ $0x00, R8
  7929. IMULQ R12, DX
  7930. ADDQ AX, R15
  7931. ADCQ DX, R8
  7932. MOVQ R13, R10
  7933. MOVQ R14, R11
  7934. MOVQ R15, R12
  7935. ANDQ $0x03, R12
  7936. MOVQ R15, R13
  7937. ANDQ $-4, R13
  7938. MOVQ R8, R14
  7939. SHRQ $0x02, R8, R15
  7940. SHRQ $0x02, R8
  7941. ADDQ R13, R10
  7942. ADCQ R14, R11
  7943. ADCQ $0x00, R12
  7944. ADDQ R15, R10
  7945. ADCQ R8, R11
  7946. ADCQ $0x00, R12
  7947. ADDQ 16(DI), R10
  7948. ADCQ 24(DI), R11
  7949. ADCQ $0x01, R12
  7950. MOVQ (BP), DX
  7951. MOVQ DX, R15
  7952. MULXQ R10, R13, R14
  7953. IMULQ R12, R15
  7954. MULXQ R11, AX, DX
  7955. ADDQ AX, R14
  7956. ADCQ DX, R15
  7957. MOVQ 8(BP), DX
  7958. MULXQ R10, R10, AX
  7959. ADDQ R10, R14
  7960. MULXQ R11, R11, R8
  7961. ADCQ R11, R15
  7962. ADCQ $0x00, R8
  7963. IMULQ R12, DX
  7964. ADDQ AX, R15
  7965. ADCQ DX, R8
  7966. MOVQ R13, R10
  7967. MOVQ R14, R11
  7968. MOVQ R15, R12
  7969. ANDQ $0x03, R12
  7970. MOVQ R15, R13
  7971. ANDQ $-4, R13
  7972. MOVQ R8, R14
  7973. SHRQ $0x02, R8, R15
  7974. SHRQ $0x02, R8
  7975. ADDQ R13, R10
  7976. ADCQ R14, R11
  7977. ADCQ $0x00, R12
  7978. ADDQ R15, R10
  7979. ADCQ R8, R11
  7980. ADCQ $0x00, R12
  7981. LEAQ 32(DI), DI
  7982. MOVQ $0x0000000a, CX
  7983. MOVQ $0x00000000, R9
  7984. CMPQ BX, $0x80
  7985. JBE sealAVX2Tail128
  7986. CMPQ BX, $0x00000100
  7987. JBE sealAVX2Tail256
  7988. CMPQ BX, $0x00000180
  7989. JBE sealAVX2Tail384
  7990. JMP sealAVX2Tail512
  7991. seal192AVX2:
  7992. VMOVDQA Y0, Y5
  7993. VMOVDQA Y14, Y9
  7994. VMOVDQA Y12, Y13
  7995. VPADDD ·avx2IncMask<>+0(SB), Y4, Y1
  7996. VMOVDQA Y0, Y6
  7997. VMOVDQA Y14, Y10
  7998. VMOVDQA Y12, Y8
  7999. VMOVDQA Y4, Y2
  8000. VMOVDQA Y1, Y15
  8001. MOVQ $0x0000000a, R9
  8002. sealAVX2192InnerCipherLoop:
  8003. VPADDD Y14, Y0, Y0
  8004. VPXOR Y0, Y4, Y4
  8005. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  8006. VPADDD Y4, Y12, Y12
  8007. VPXOR Y12, Y14, Y14
  8008. VPSLLD $0x0c, Y14, Y3
  8009. VPSRLD $0x14, Y14, Y14
  8010. VPXOR Y3, Y14, Y14
  8011. VPADDD Y14, Y0, Y0
  8012. VPXOR Y0, Y4, Y4
  8013. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  8014. VPADDD Y4, Y12, Y12
  8015. VPXOR Y12, Y14, Y14
  8016. VPSLLD $0x07, Y14, Y3
  8017. VPSRLD $0x19, Y14, Y14
  8018. VPXOR Y3, Y14, Y14
  8019. VPADDD Y9, Y5, Y5
  8020. VPXOR Y5, Y1, Y1
  8021. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  8022. VPADDD Y1, Y13, Y13
  8023. VPXOR Y13, Y9, Y9
  8024. VPSLLD $0x0c, Y9, Y3
  8025. VPSRLD $0x14, Y9, Y9
  8026. VPXOR Y3, Y9, Y9
  8027. VPADDD Y9, Y5, Y5
  8028. VPXOR Y5, Y1, Y1
  8029. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  8030. VPADDD Y1, Y13, Y13
  8031. VPXOR Y13, Y9, Y9
  8032. VPSLLD $0x07, Y9, Y3
  8033. VPSRLD $0x19, Y9, Y9
  8034. VPXOR Y3, Y9, Y9
  8035. VPALIGNR $0x04, Y14, Y14, Y14
  8036. VPALIGNR $0x04, Y9, Y9, Y9
  8037. VPALIGNR $0x08, Y12, Y12, Y12
  8038. VPALIGNR $0x08, Y13, Y13, Y13
  8039. VPALIGNR $0x0c, Y4, Y4, Y4
  8040. VPALIGNR $0x0c, Y1, Y1, Y1
  8041. VPADDD Y14, Y0, Y0
  8042. VPXOR Y0, Y4, Y4
  8043. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  8044. VPADDD Y4, Y12, Y12
  8045. VPXOR Y12, Y14, Y14
  8046. VPSLLD $0x0c, Y14, Y3
  8047. VPSRLD $0x14, Y14, Y14
  8048. VPXOR Y3, Y14, Y14
  8049. VPADDD Y14, Y0, Y0
  8050. VPXOR Y0, Y4, Y4
  8051. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  8052. VPADDD Y4, Y12, Y12
  8053. VPXOR Y12, Y14, Y14
  8054. VPSLLD $0x07, Y14, Y3
  8055. VPSRLD $0x19, Y14, Y14
  8056. VPXOR Y3, Y14, Y14
  8057. VPADDD Y9, Y5, Y5
  8058. VPXOR Y5, Y1, Y1
  8059. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  8060. VPADDD Y1, Y13, Y13
  8061. VPXOR Y13, Y9, Y9
  8062. VPSLLD $0x0c, Y9, Y3
  8063. VPSRLD $0x14, Y9, Y9
  8064. VPXOR Y3, Y9, Y9
  8065. VPADDD Y9, Y5, Y5
  8066. VPXOR Y5, Y1, Y1
  8067. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  8068. VPADDD Y1, Y13, Y13
  8069. VPXOR Y13, Y9, Y9
  8070. VPSLLD $0x07, Y9, Y3
  8071. VPSRLD $0x19, Y9, Y9
  8072. VPXOR Y3, Y9, Y9
  8073. VPALIGNR $0x0c, Y14, Y14, Y14
  8074. VPALIGNR $0x0c, Y9, Y9, Y9
  8075. VPALIGNR $0x08, Y12, Y12, Y12
  8076. VPALIGNR $0x08, Y13, Y13, Y13
  8077. VPALIGNR $0x04, Y4, Y4, Y4
  8078. VPALIGNR $0x04, Y1, Y1, Y1
  8079. DECQ R9
  8080. JNE sealAVX2192InnerCipherLoop
  8081. VPADDD Y6, Y0, Y0
  8082. VPADDD Y6, Y5, Y5
  8083. VPADDD Y10, Y14, Y14
  8084. VPADDD Y10, Y9, Y9
  8085. VPADDD Y8, Y12, Y12
  8086. VPADDD Y8, Y13, Y13
  8087. VPADDD Y2, Y4, Y4
  8088. VPADDD Y15, Y1, Y1
  8089. VPERM2I128 $0x02, Y0, Y14, Y3
  8090. // Clamp and store poly key
  8091. VPAND ·polyClampMask<>+0(SB), Y3, Y3
  8092. VMOVDQA Y3, (BP)
  8093. // Stream for up to 192 bytes
  8094. VPERM2I128 $0x13, Y0, Y14, Y0
  8095. VPERM2I128 $0x13, Y12, Y4, Y14
  8096. VPERM2I128 $0x02, Y5, Y9, Y12
  8097. VPERM2I128 $0x02, Y13, Y1, Y4
  8098. VPERM2I128 $0x13, Y5, Y9, Y5
  8099. VPERM2I128 $0x13, Y13, Y1, Y9
  8100. sealAVX2ShortSeal:
  8101. // Hash aad
  8102. MOVQ ad_len+80(FP), R9
  8103. CALL polyHashADInternal<>(SB)
  8104. XORQ CX, CX
  8105. sealAVX2SealHash:
  8106. // itr1 holds the number of bytes encrypted but not yet hashed
  8107. CMPQ CX, $0x10
  8108. JB sealAVX2ShortSealLoop
  8109. ADDQ (DI), R10
  8110. ADCQ 8(DI), R11
  8111. ADCQ $0x01, R12
  8112. MOVQ (BP), AX
  8113. MOVQ AX, R15
  8114. MULQ R10
  8115. MOVQ AX, R13
  8116. MOVQ DX, R14
  8117. MOVQ (BP), AX
  8118. MULQ R11
  8119. IMULQ R12, R15
  8120. ADDQ AX, R14
  8121. ADCQ DX, R15
  8122. MOVQ 8(BP), AX
  8123. MOVQ AX, R8
  8124. MULQ R10
  8125. ADDQ AX, R14
  8126. ADCQ $0x00, DX
  8127. MOVQ DX, R10
  8128. MOVQ 8(BP), AX
  8129. MULQ R11
  8130. ADDQ AX, R15
  8131. ADCQ $0x00, DX
  8132. IMULQ R12, R8
  8133. ADDQ R10, R15
  8134. ADCQ DX, R8
  8135. MOVQ R13, R10
  8136. MOVQ R14, R11
  8137. MOVQ R15, R12
  8138. ANDQ $0x03, R12
  8139. MOVQ R15, R13
  8140. ANDQ $-4, R13
  8141. MOVQ R8, R14
  8142. SHRQ $0x02, R8, R15
  8143. SHRQ $0x02, R8
  8144. ADDQ R13, R10
  8145. ADCQ R14, R11
  8146. ADCQ $0x00, R12
  8147. ADDQ R15, R10
  8148. ADCQ R8, R11
  8149. ADCQ $0x00, R12
  8150. SUBQ $0x10, CX
  8151. ADDQ $0x10, DI
  8152. JMP sealAVX2SealHash
  8153. sealAVX2ShortSealLoop:
  8154. CMPQ BX, $0x20
  8155. JB sealAVX2ShortTail32
  8156. SUBQ $0x20, BX
  8157. // Load for encryption
  8158. VPXOR (SI), Y0, Y0
  8159. VMOVDQU Y0, (DI)
  8160. LEAQ 32(SI), SI
  8161. // Now can hash
  8162. ADDQ (DI), R10
  8163. ADCQ 8(DI), R11
  8164. ADCQ $0x01, R12
  8165. MOVQ (BP), DX
  8166. MOVQ DX, R15
  8167. MULXQ R10, R13, R14
  8168. IMULQ R12, R15
  8169. MULXQ R11, AX, DX
  8170. ADDQ AX, R14
  8171. ADCQ DX, R15
  8172. MOVQ 8(BP), DX
  8173. MULXQ R10, R10, AX
  8174. ADDQ R10, R14
  8175. MULXQ R11, R11, R8
  8176. ADCQ R11, R15
  8177. ADCQ $0x00, R8
  8178. IMULQ R12, DX
  8179. ADDQ AX, R15
  8180. ADCQ DX, R8
  8181. MOVQ R13, R10
  8182. MOVQ R14, R11
  8183. MOVQ R15, R12
  8184. ANDQ $0x03, R12
  8185. MOVQ R15, R13
  8186. ANDQ $-4, R13
  8187. MOVQ R8, R14
  8188. SHRQ $0x02, R8, R15
  8189. SHRQ $0x02, R8
  8190. ADDQ R13, R10
  8191. ADCQ R14, R11
  8192. ADCQ $0x00, R12
  8193. ADDQ R15, R10
  8194. ADCQ R8, R11
  8195. ADCQ $0x00, R12
  8196. ADDQ 16(DI), R10
  8197. ADCQ 24(DI), R11
  8198. ADCQ $0x01, R12
  8199. MOVQ (BP), DX
  8200. MOVQ DX, R15
  8201. MULXQ R10, R13, R14
  8202. IMULQ R12, R15
  8203. MULXQ R11, AX, DX
  8204. ADDQ AX, R14
  8205. ADCQ DX, R15
  8206. MOVQ 8(BP), DX
  8207. MULXQ R10, R10, AX
  8208. ADDQ R10, R14
  8209. MULXQ R11, R11, R8
  8210. ADCQ R11, R15
  8211. ADCQ $0x00, R8
  8212. IMULQ R12, DX
  8213. ADDQ AX, R15
  8214. ADCQ DX, R8
  8215. MOVQ R13, R10
  8216. MOVQ R14, R11
  8217. MOVQ R15, R12
  8218. ANDQ $0x03, R12
  8219. MOVQ R15, R13
  8220. ANDQ $-4, R13
  8221. MOVQ R8, R14
  8222. SHRQ $0x02, R8, R15
  8223. SHRQ $0x02, R8
  8224. ADDQ R13, R10
  8225. ADCQ R14, R11
  8226. ADCQ $0x00, R12
  8227. ADDQ R15, R10
  8228. ADCQ R8, R11
  8229. ADCQ $0x00, R12
  8230. LEAQ 32(DI), DI
  8231. // Shift stream left
  8232. VMOVDQA Y14, Y0
  8233. VMOVDQA Y12, Y14
  8234. VMOVDQA Y4, Y12
  8235. VMOVDQA Y5, Y4
  8236. VMOVDQA Y9, Y5
  8237. VMOVDQA Y13, Y9
  8238. VMOVDQA Y1, Y13
  8239. VMOVDQA Y6, Y1
  8240. VMOVDQA Y10, Y6
  8241. JMP sealAVX2ShortSealLoop
  8242. sealAVX2ShortTail32:
  8243. CMPQ BX, $0x10
  8244. VMOVDQA X0, X1
  8245. JB sealAVX2ShortDone
  8246. SUBQ $0x10, BX
  8247. // Load for encryption
  8248. VPXOR (SI), X0, X12
  8249. VMOVDQU X12, (DI)
  8250. LEAQ 16(SI), SI
  8251. // Hash
  8252. ADDQ (DI), R10
  8253. ADCQ 8(DI), R11
  8254. ADCQ $0x01, R12
  8255. MOVQ (BP), DX
  8256. MOVQ DX, R15
  8257. MULXQ R10, R13, R14
  8258. IMULQ R12, R15
  8259. MULXQ R11, AX, DX
  8260. ADDQ AX, R14
  8261. ADCQ DX, R15
  8262. MOVQ 8(BP), DX
  8263. MULXQ R10, R10, AX
  8264. ADDQ R10, R14
  8265. MULXQ R11, R11, R8
  8266. ADCQ R11, R15
  8267. ADCQ $0x00, R8
  8268. IMULQ R12, DX
  8269. ADDQ AX, R15
  8270. ADCQ DX, R8
  8271. MOVQ R13, R10
  8272. MOVQ R14, R11
  8273. MOVQ R15, R12
  8274. ANDQ $0x03, R12
  8275. MOVQ R15, R13
  8276. ANDQ $-4, R13
  8277. MOVQ R8, R14
  8278. SHRQ $0x02, R8, R15
  8279. SHRQ $0x02, R8
  8280. ADDQ R13, R10
  8281. ADCQ R14, R11
  8282. ADCQ $0x00, R12
  8283. ADDQ R15, R10
  8284. ADCQ R8, R11
  8285. ADCQ $0x00, R12
  8286. LEAQ 16(DI), DI
  8287. VPERM2I128 $0x11, Y0, Y0, Y0
  8288. VMOVDQA X0, X1
  8289. sealAVX2ShortDone:
  8290. VZEROUPPER
  8291. JMP sealSSETail
  8292. seal320AVX2:
  8293. VMOVDQA Y0, Y5
  8294. VMOVDQA Y14, Y9
  8295. VMOVDQA Y12, Y13
  8296. VPADDD ·avx2IncMask<>+0(SB), Y4, Y1
  8297. VMOVDQA Y0, Y6
  8298. VMOVDQA Y14, Y10
  8299. VMOVDQA Y12, Y8
  8300. VPADDD ·avx2IncMask<>+0(SB), Y1, Y2
  8301. VMOVDQA Y14, Y7
  8302. VMOVDQA Y12, Y11
  8303. VMOVDQA Y4, Y15
  8304. MOVQ $0x0000000a, R9
  8305. sealAVX2320InnerCipherLoop:
  8306. VPADDD Y14, Y0, Y0
  8307. VPXOR Y0, Y4, Y4
  8308. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  8309. VPADDD Y4, Y12, Y12
  8310. VPXOR Y12, Y14, Y14
  8311. VPSLLD $0x0c, Y14, Y3
  8312. VPSRLD $0x14, Y14, Y14
  8313. VPXOR Y3, Y14, Y14
  8314. VPADDD Y14, Y0, Y0
  8315. VPXOR Y0, Y4, Y4
  8316. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  8317. VPADDD Y4, Y12, Y12
  8318. VPXOR Y12, Y14, Y14
  8319. VPSLLD $0x07, Y14, Y3
  8320. VPSRLD $0x19, Y14, Y14
  8321. VPXOR Y3, Y14, Y14
  8322. VPADDD Y9, Y5, Y5
  8323. VPXOR Y5, Y1, Y1
  8324. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  8325. VPADDD Y1, Y13, Y13
  8326. VPXOR Y13, Y9, Y9
  8327. VPSLLD $0x0c, Y9, Y3
  8328. VPSRLD $0x14, Y9, Y9
  8329. VPXOR Y3, Y9, Y9
  8330. VPADDD Y9, Y5, Y5
  8331. VPXOR Y5, Y1, Y1
  8332. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  8333. VPADDD Y1, Y13, Y13
  8334. VPXOR Y13, Y9, Y9
  8335. VPSLLD $0x07, Y9, Y3
  8336. VPSRLD $0x19, Y9, Y9
  8337. VPXOR Y3, Y9, Y9
  8338. VPADDD Y10, Y6, Y6
  8339. VPXOR Y6, Y2, Y2
  8340. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  8341. VPADDD Y2, Y8, Y8
  8342. VPXOR Y8, Y10, Y10
  8343. VPSLLD $0x0c, Y10, Y3
  8344. VPSRLD $0x14, Y10, Y10
  8345. VPXOR Y3, Y10, Y10
  8346. VPADDD Y10, Y6, Y6
  8347. VPXOR Y6, Y2, Y2
  8348. VPSHUFB ·rol8<>+0(SB), Y2, Y2
  8349. VPADDD Y2, Y8, Y8
  8350. VPXOR Y8, Y10, Y10
  8351. VPSLLD $0x07, Y10, Y3
  8352. VPSRLD $0x19, Y10, Y10
  8353. VPXOR Y3, Y10, Y10
  8354. VPALIGNR $0x04, Y14, Y14, Y14
  8355. VPALIGNR $0x04, Y9, Y9, Y9
  8356. VPALIGNR $0x04, Y10, Y10, Y10
  8357. VPALIGNR $0x08, Y12, Y12, Y12
  8358. VPALIGNR $0x08, Y13, Y13, Y13
  8359. VPALIGNR $0x08, Y8, Y8, Y8
  8360. VPALIGNR $0x0c, Y4, Y4, Y4
  8361. VPALIGNR $0x0c, Y1, Y1, Y1
  8362. VPALIGNR $0x0c, Y2, Y2, Y2
  8363. VPADDD Y14, Y0, Y0
  8364. VPXOR Y0, Y4, Y4
  8365. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  8366. VPADDD Y4, Y12, Y12
  8367. VPXOR Y12, Y14, Y14
  8368. VPSLLD $0x0c, Y14, Y3
  8369. VPSRLD $0x14, Y14, Y14
  8370. VPXOR Y3, Y14, Y14
  8371. VPADDD Y14, Y0, Y0
  8372. VPXOR Y0, Y4, Y4
  8373. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  8374. VPADDD Y4, Y12, Y12
  8375. VPXOR Y12, Y14, Y14
  8376. VPSLLD $0x07, Y14, Y3
  8377. VPSRLD $0x19, Y14, Y14
  8378. VPXOR Y3, Y14, Y14
  8379. VPADDD Y9, Y5, Y5
  8380. VPXOR Y5, Y1, Y1
  8381. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  8382. VPADDD Y1, Y13, Y13
  8383. VPXOR Y13, Y9, Y9
  8384. VPSLLD $0x0c, Y9, Y3
  8385. VPSRLD $0x14, Y9, Y9
  8386. VPXOR Y3, Y9, Y9
  8387. VPADDD Y9, Y5, Y5
  8388. VPXOR Y5, Y1, Y1
  8389. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  8390. VPADDD Y1, Y13, Y13
  8391. VPXOR Y13, Y9, Y9
  8392. VPSLLD $0x07, Y9, Y3
  8393. VPSRLD $0x19, Y9, Y9
  8394. VPXOR Y3, Y9, Y9
  8395. VPADDD Y10, Y6, Y6
  8396. VPXOR Y6, Y2, Y2
  8397. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  8398. VPADDD Y2, Y8, Y8
  8399. VPXOR Y8, Y10, Y10
  8400. VPSLLD $0x0c, Y10, Y3
  8401. VPSRLD $0x14, Y10, Y10
  8402. VPXOR Y3, Y10, Y10
  8403. VPADDD Y10, Y6, Y6
  8404. VPXOR Y6, Y2, Y2
  8405. VPSHUFB ·rol8<>+0(SB), Y2, Y2
  8406. VPADDD Y2, Y8, Y8
  8407. VPXOR Y8, Y10, Y10
  8408. VPSLLD $0x07, Y10, Y3
  8409. VPSRLD $0x19, Y10, Y10
  8410. VPXOR Y3, Y10, Y10
  8411. VPALIGNR $0x0c, Y14, Y14, Y14
  8412. VPALIGNR $0x0c, Y9, Y9, Y9
  8413. VPALIGNR $0x0c, Y10, Y10, Y10
  8414. VPALIGNR $0x08, Y12, Y12, Y12
  8415. VPALIGNR $0x08, Y13, Y13, Y13
  8416. VPALIGNR $0x08, Y8, Y8, Y8
  8417. VPALIGNR $0x04, Y4, Y4, Y4
  8418. VPALIGNR $0x04, Y1, Y1, Y1
  8419. VPALIGNR $0x04, Y2, Y2, Y2
  8420. DECQ R9
  8421. JNE sealAVX2320InnerCipherLoop
  8422. VMOVDQA ·chacha20Constants<>+0(SB), Y3
  8423. VPADDD Y3, Y0, Y0
  8424. VPADDD Y3, Y5, Y5
  8425. VPADDD Y3, Y6, Y6
  8426. VPADDD Y7, Y14, Y14
  8427. VPADDD Y7, Y9, Y9
  8428. VPADDD Y7, Y10, Y10
  8429. VPADDD Y11, Y12, Y12
  8430. VPADDD Y11, Y13, Y13
  8431. VPADDD Y11, Y8, Y8
  8432. VMOVDQA ·avx2IncMask<>+0(SB), Y3
  8433. VPADDD Y15, Y4, Y4
  8434. VPADDD Y3, Y15, Y15
  8435. VPADDD Y15, Y1, Y1
  8436. VPADDD Y3, Y15, Y15
  8437. VPADDD Y15, Y2, Y2
  8438. // Clamp and store poly key
  8439. VPERM2I128 $0x02, Y0, Y14, Y3
  8440. VPAND ·polyClampMask<>+0(SB), Y3, Y3
  8441. VMOVDQA Y3, (BP)
  8442. // Stream for up to 320 bytes
  8443. VPERM2I128 $0x13, Y0, Y14, Y0
  8444. VPERM2I128 $0x13, Y12, Y4, Y14
  8445. VPERM2I128 $0x02, Y5, Y9, Y12
  8446. VPERM2I128 $0x02, Y13, Y1, Y4
  8447. VPERM2I128 $0x13, Y5, Y9, Y5
  8448. VPERM2I128 $0x13, Y13, Y1, Y9
  8449. VPERM2I128 $0x02, Y6, Y10, Y13
  8450. VPERM2I128 $0x02, Y8, Y2, Y1
  8451. VPERM2I128 $0x13, Y6, Y10, Y6
  8452. VPERM2I128 $0x13, Y8, Y2, Y10
  8453. JMP sealAVX2ShortSeal
  8454. sealAVX2Tail128:
  8455. VMOVDQA ·chacha20Constants<>+0(SB), Y0
  8456. VMOVDQA 32(BP), Y14
  8457. VMOVDQA 64(BP), Y12
  8458. VMOVDQA 192(BP), Y4
  8459. VPADDD ·avx2IncMask<>+0(SB), Y4, Y4
  8460. VMOVDQA Y4, Y1
  8461. sealAVX2Tail128LoopA:
  8462. ADDQ (DI), R10
  8463. ADCQ 8(DI), R11
  8464. ADCQ $0x01, R12
  8465. MOVQ (BP), AX
  8466. MOVQ AX, R15
  8467. MULQ R10
  8468. MOVQ AX, R13
  8469. MOVQ DX, R14
  8470. MOVQ (BP), AX
  8471. MULQ R11
  8472. IMULQ R12, R15
  8473. ADDQ AX, R14
  8474. ADCQ DX, R15
  8475. MOVQ 8(BP), AX
  8476. MOVQ AX, R8
  8477. MULQ R10
  8478. ADDQ AX, R14
  8479. ADCQ $0x00, DX
  8480. MOVQ DX, R10
  8481. MOVQ 8(BP), AX
  8482. MULQ R11
  8483. ADDQ AX, R15
  8484. ADCQ $0x00, DX
  8485. IMULQ R12, R8
  8486. ADDQ R10, R15
  8487. ADCQ DX, R8
  8488. MOVQ R13, R10
  8489. MOVQ R14, R11
  8490. MOVQ R15, R12
  8491. ANDQ $0x03, R12
  8492. MOVQ R15, R13
  8493. ANDQ $-4, R13
  8494. MOVQ R8, R14
  8495. SHRQ $0x02, R8, R15
  8496. SHRQ $0x02, R8
  8497. ADDQ R13, R10
  8498. ADCQ R14, R11
  8499. ADCQ $0x00, R12
  8500. ADDQ R15, R10
  8501. ADCQ R8, R11
  8502. ADCQ $0x00, R12
  8503. LEAQ 16(DI), DI
  8504. sealAVX2Tail128LoopB:
  8505. VPADDD Y14, Y0, Y0
  8506. VPXOR Y0, Y4, Y4
  8507. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  8508. VPADDD Y4, Y12, Y12
  8509. VPXOR Y12, Y14, Y14
  8510. VPSLLD $0x0c, Y14, Y3
  8511. VPSRLD $0x14, Y14, Y14
  8512. VPXOR Y3, Y14, Y14
  8513. VPADDD Y14, Y0, Y0
  8514. VPXOR Y0, Y4, Y4
  8515. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  8516. VPADDD Y4, Y12, Y12
  8517. VPXOR Y12, Y14, Y14
  8518. VPSLLD $0x07, Y14, Y3
  8519. VPSRLD $0x19, Y14, Y14
  8520. VPXOR Y3, Y14, Y14
  8521. ADDQ (DI), R10
  8522. ADCQ 8(DI), R11
  8523. ADCQ $0x01, R12
  8524. MOVQ (BP), AX
  8525. MOVQ AX, R15
  8526. MULQ R10
  8527. MOVQ AX, R13
  8528. MOVQ DX, R14
  8529. MOVQ (BP), AX
  8530. MULQ R11
  8531. IMULQ R12, R15
  8532. ADDQ AX, R14
  8533. ADCQ DX, R15
  8534. MOVQ 8(BP), AX
  8535. MOVQ AX, R8
  8536. MULQ R10
  8537. ADDQ AX, R14
  8538. ADCQ $0x00, DX
  8539. MOVQ DX, R10
  8540. MOVQ 8(BP), AX
  8541. MULQ R11
  8542. ADDQ AX, R15
  8543. ADCQ $0x00, DX
  8544. IMULQ R12, R8
  8545. ADDQ R10, R15
  8546. ADCQ DX, R8
  8547. MOVQ R13, R10
  8548. MOVQ R14, R11
  8549. MOVQ R15, R12
  8550. ANDQ $0x03, R12
  8551. MOVQ R15, R13
  8552. ANDQ $-4, R13
  8553. MOVQ R8, R14
  8554. SHRQ $0x02, R8, R15
  8555. SHRQ $0x02, R8
  8556. ADDQ R13, R10
  8557. ADCQ R14, R11
  8558. ADCQ $0x00, R12
  8559. ADDQ R15, R10
  8560. ADCQ R8, R11
  8561. ADCQ $0x00, R12
  8562. VPALIGNR $0x04, Y14, Y14, Y14
  8563. VPALIGNR $0x08, Y12, Y12, Y12
  8564. VPALIGNR $0x0c, Y4, Y4, Y4
  8565. VPADDD Y14, Y0, Y0
  8566. VPXOR Y0, Y4, Y4
  8567. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  8568. VPADDD Y4, Y12, Y12
  8569. VPXOR Y12, Y14, Y14
  8570. VPSLLD $0x0c, Y14, Y3
  8571. VPSRLD $0x14, Y14, Y14
  8572. VPXOR Y3, Y14, Y14
  8573. VPADDD Y14, Y0, Y0
  8574. VPXOR Y0, Y4, Y4
  8575. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  8576. VPADDD Y4, Y12, Y12
  8577. VPXOR Y12, Y14, Y14
  8578. VPSLLD $0x07, Y14, Y3
  8579. VPSRLD $0x19, Y14, Y14
  8580. VPXOR Y3, Y14, Y14
  8581. ADDQ 16(DI), R10
  8582. ADCQ 24(DI), R11
  8583. ADCQ $0x01, R12
  8584. MOVQ (BP), AX
  8585. MOVQ AX, R15
  8586. MULQ R10
  8587. MOVQ AX, R13
  8588. MOVQ DX, R14
  8589. MOVQ (BP), AX
  8590. MULQ R11
  8591. IMULQ R12, R15
  8592. ADDQ AX, R14
  8593. ADCQ DX, R15
  8594. MOVQ 8(BP), AX
  8595. MOVQ AX, R8
  8596. MULQ R10
  8597. ADDQ AX, R14
  8598. ADCQ $0x00, DX
  8599. MOVQ DX, R10
  8600. MOVQ 8(BP), AX
  8601. MULQ R11
  8602. ADDQ AX, R15
  8603. ADCQ $0x00, DX
  8604. IMULQ R12, R8
  8605. ADDQ R10, R15
  8606. ADCQ DX, R8
  8607. MOVQ R13, R10
  8608. MOVQ R14, R11
  8609. MOVQ R15, R12
  8610. ANDQ $0x03, R12
  8611. MOVQ R15, R13
  8612. ANDQ $-4, R13
  8613. MOVQ R8, R14
  8614. SHRQ $0x02, R8, R15
  8615. SHRQ $0x02, R8
  8616. ADDQ R13, R10
  8617. ADCQ R14, R11
  8618. ADCQ $0x00, R12
  8619. ADDQ R15, R10
  8620. ADCQ R8, R11
  8621. ADCQ $0x00, R12
  8622. LEAQ 32(DI), DI
  8623. VPALIGNR $0x0c, Y14, Y14, Y14
  8624. VPALIGNR $0x08, Y12, Y12, Y12
  8625. VPALIGNR $0x04, Y4, Y4, Y4
  8626. DECQ CX
  8627. JG sealAVX2Tail128LoopA
  8628. DECQ R9
  8629. JGE sealAVX2Tail128LoopB
  8630. VPADDD ·chacha20Constants<>+0(SB), Y0, Y5
  8631. VPADDD 32(BP), Y14, Y9
  8632. VPADDD 64(BP), Y12, Y13
  8633. VPADDD Y1, Y4, Y1
  8634. VPERM2I128 $0x02, Y5, Y9, Y0
  8635. VPERM2I128 $0x02, Y13, Y1, Y14
  8636. VPERM2I128 $0x13, Y5, Y9, Y12
  8637. VPERM2I128 $0x13, Y13, Y1, Y4
  8638. JMP sealAVX2ShortSealLoop
  8639. sealAVX2Tail256:
  8640. VMOVDQA ·chacha20Constants<>+0(SB), Y0
  8641. VMOVDQA ·chacha20Constants<>+0(SB), Y5
  8642. VMOVDQA 32(BP), Y14
  8643. VMOVDQA 32(BP), Y9
  8644. VMOVDQA 64(BP), Y12
  8645. VMOVDQA 64(BP), Y13
  8646. VMOVDQA 192(BP), Y4
  8647. VPADDD ·avx2IncMask<>+0(SB), Y4, Y4
  8648. VPADDD ·avx2IncMask<>+0(SB), Y4, Y1
  8649. VMOVDQA Y4, Y7
  8650. VMOVDQA Y1, Y11
  8651. sealAVX2Tail256LoopA:
  8652. ADDQ (DI), R10
  8653. ADCQ 8(DI), R11
  8654. ADCQ $0x01, R12
  8655. MOVQ (BP), AX
  8656. MOVQ AX, R15
  8657. MULQ R10
  8658. MOVQ AX, R13
  8659. MOVQ DX, R14
  8660. MOVQ (BP), AX
  8661. MULQ R11
  8662. IMULQ R12, R15
  8663. ADDQ AX, R14
  8664. ADCQ DX, R15
  8665. MOVQ 8(BP), AX
  8666. MOVQ AX, R8
  8667. MULQ R10
  8668. ADDQ AX, R14
  8669. ADCQ $0x00, DX
  8670. MOVQ DX, R10
  8671. MOVQ 8(BP), AX
  8672. MULQ R11
  8673. ADDQ AX, R15
  8674. ADCQ $0x00, DX
  8675. IMULQ R12, R8
  8676. ADDQ R10, R15
  8677. ADCQ DX, R8
  8678. MOVQ R13, R10
  8679. MOVQ R14, R11
  8680. MOVQ R15, R12
  8681. ANDQ $0x03, R12
  8682. MOVQ R15, R13
  8683. ANDQ $-4, R13
  8684. MOVQ R8, R14
  8685. SHRQ $0x02, R8, R15
  8686. SHRQ $0x02, R8
  8687. ADDQ R13, R10
  8688. ADCQ R14, R11
  8689. ADCQ $0x00, R12
  8690. ADDQ R15, R10
  8691. ADCQ R8, R11
  8692. ADCQ $0x00, R12
  8693. LEAQ 16(DI), DI
  8694. sealAVX2Tail256LoopB:
  8695. VPADDD Y14, Y0, Y0
  8696. VPXOR Y0, Y4, Y4
  8697. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  8698. VPADDD Y4, Y12, Y12
  8699. VPXOR Y12, Y14, Y14
  8700. VPSLLD $0x0c, Y14, Y3
  8701. VPSRLD $0x14, Y14, Y14
  8702. VPXOR Y3, Y14, Y14
  8703. VPADDD Y14, Y0, Y0
  8704. VPXOR Y0, Y4, Y4
  8705. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  8706. VPADDD Y4, Y12, Y12
  8707. VPXOR Y12, Y14, Y14
  8708. VPSLLD $0x07, Y14, Y3
  8709. VPSRLD $0x19, Y14, Y14
  8710. VPXOR Y3, Y14, Y14
  8711. VPADDD Y9, Y5, Y5
  8712. VPXOR Y5, Y1, Y1
  8713. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  8714. VPADDD Y1, Y13, Y13
  8715. VPXOR Y13, Y9, Y9
  8716. VPSLLD $0x0c, Y9, Y3
  8717. VPSRLD $0x14, Y9, Y9
  8718. VPXOR Y3, Y9, Y9
  8719. VPADDD Y9, Y5, Y5
  8720. VPXOR Y5, Y1, Y1
  8721. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  8722. VPADDD Y1, Y13, Y13
  8723. VPXOR Y13, Y9, Y9
  8724. VPSLLD $0x07, Y9, Y3
  8725. VPSRLD $0x19, Y9, Y9
  8726. VPXOR Y3, Y9, Y9
  8727. ADDQ (DI), R10
  8728. ADCQ 8(DI), R11
  8729. ADCQ $0x01, R12
  8730. MOVQ (BP), AX
  8731. MOVQ AX, R15
  8732. MULQ R10
  8733. MOVQ AX, R13
  8734. MOVQ DX, R14
  8735. MOVQ (BP), AX
  8736. MULQ R11
  8737. IMULQ R12, R15
  8738. ADDQ AX, R14
  8739. ADCQ DX, R15
  8740. MOVQ 8(BP), AX
  8741. MOVQ AX, R8
  8742. MULQ R10
  8743. ADDQ AX, R14
  8744. ADCQ $0x00, DX
  8745. MOVQ DX, R10
  8746. MOVQ 8(BP), AX
  8747. MULQ R11
  8748. ADDQ AX, R15
  8749. ADCQ $0x00, DX
  8750. IMULQ R12, R8
  8751. ADDQ R10, R15
  8752. ADCQ DX, R8
  8753. MOVQ R13, R10
  8754. MOVQ R14, R11
  8755. MOVQ R15, R12
  8756. ANDQ $0x03, R12
  8757. MOVQ R15, R13
  8758. ANDQ $-4, R13
  8759. MOVQ R8, R14
  8760. SHRQ $0x02, R8, R15
  8761. SHRQ $0x02, R8
  8762. ADDQ R13, R10
  8763. ADCQ R14, R11
  8764. ADCQ $0x00, R12
  8765. ADDQ R15, R10
  8766. ADCQ R8, R11
  8767. ADCQ $0x00, R12
  8768. VPALIGNR $0x04, Y14, Y14, Y14
  8769. VPALIGNR $0x04, Y9, Y9, Y9
  8770. VPALIGNR $0x08, Y12, Y12, Y12
  8771. VPALIGNR $0x08, Y13, Y13, Y13
  8772. VPALIGNR $0x0c, Y4, Y4, Y4
  8773. VPALIGNR $0x0c, Y1, Y1, Y1
  8774. VPADDD Y14, Y0, Y0
  8775. VPXOR Y0, Y4, Y4
  8776. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  8777. VPADDD Y4, Y12, Y12
  8778. VPXOR Y12, Y14, Y14
  8779. VPSLLD $0x0c, Y14, Y3
  8780. VPSRLD $0x14, Y14, Y14
  8781. VPXOR Y3, Y14, Y14
  8782. VPADDD Y14, Y0, Y0
  8783. VPXOR Y0, Y4, Y4
  8784. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  8785. VPADDD Y4, Y12, Y12
  8786. VPXOR Y12, Y14, Y14
  8787. VPSLLD $0x07, Y14, Y3
  8788. VPSRLD $0x19, Y14, Y14
  8789. VPXOR Y3, Y14, Y14
  8790. VPADDD Y9, Y5, Y5
  8791. VPXOR Y5, Y1, Y1
  8792. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  8793. VPADDD Y1, Y13, Y13
  8794. VPXOR Y13, Y9, Y9
  8795. VPSLLD $0x0c, Y9, Y3
  8796. VPSRLD $0x14, Y9, Y9
  8797. VPXOR Y3, Y9, Y9
  8798. VPADDD Y9, Y5, Y5
  8799. VPXOR Y5, Y1, Y1
  8800. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  8801. VPADDD Y1, Y13, Y13
  8802. VPXOR Y13, Y9, Y9
  8803. VPSLLD $0x07, Y9, Y3
  8804. VPSRLD $0x19, Y9, Y9
  8805. VPXOR Y3, Y9, Y9
  8806. ADDQ 16(DI), R10
  8807. ADCQ 24(DI), R11
  8808. ADCQ $0x01, R12
  8809. MOVQ (BP), AX
  8810. MOVQ AX, R15
  8811. MULQ R10
  8812. MOVQ AX, R13
  8813. MOVQ DX, R14
  8814. MOVQ (BP), AX
  8815. MULQ R11
  8816. IMULQ R12, R15
  8817. ADDQ AX, R14
  8818. ADCQ DX, R15
  8819. MOVQ 8(BP), AX
  8820. MOVQ AX, R8
  8821. MULQ R10
  8822. ADDQ AX, R14
  8823. ADCQ $0x00, DX
  8824. MOVQ DX, R10
  8825. MOVQ 8(BP), AX
  8826. MULQ R11
  8827. ADDQ AX, R15
  8828. ADCQ $0x00, DX
  8829. IMULQ R12, R8
  8830. ADDQ R10, R15
  8831. ADCQ DX, R8
  8832. MOVQ R13, R10
  8833. MOVQ R14, R11
  8834. MOVQ R15, R12
  8835. ANDQ $0x03, R12
  8836. MOVQ R15, R13
  8837. ANDQ $-4, R13
  8838. MOVQ R8, R14
  8839. SHRQ $0x02, R8, R15
  8840. SHRQ $0x02, R8
  8841. ADDQ R13, R10
  8842. ADCQ R14, R11
  8843. ADCQ $0x00, R12
  8844. ADDQ R15, R10
  8845. ADCQ R8, R11
  8846. ADCQ $0x00, R12
  8847. LEAQ 32(DI), DI
  8848. VPALIGNR $0x0c, Y14, Y14, Y14
  8849. VPALIGNR $0x0c, Y9, Y9, Y9
  8850. VPALIGNR $0x08, Y12, Y12, Y12
  8851. VPALIGNR $0x08, Y13, Y13, Y13
  8852. VPALIGNR $0x04, Y4, Y4, Y4
  8853. VPALIGNR $0x04, Y1, Y1, Y1
  8854. DECQ CX
  8855. JG sealAVX2Tail256LoopA
  8856. DECQ R9
  8857. JGE sealAVX2Tail256LoopB
  8858. VPADDD ·chacha20Constants<>+0(SB), Y0, Y0
  8859. VPADDD ·chacha20Constants<>+0(SB), Y5, Y5
  8860. VPADDD 32(BP), Y14, Y14
  8861. VPADDD 32(BP), Y9, Y9
  8862. VPADDD 64(BP), Y12, Y12
  8863. VPADDD 64(BP), Y13, Y13
  8864. VPADDD Y7, Y4, Y4
  8865. VPADDD Y11, Y1, Y1
  8866. VPERM2I128 $0x02, Y0, Y14, Y3
  8867. VPERM2I128 $0x02, Y12, Y4, Y7
  8868. VPERM2I128 $0x13, Y0, Y14, Y11
  8869. VPERM2I128 $0x13, Y12, Y4, Y15
  8870. VPXOR (SI), Y3, Y3
  8871. VPXOR 32(SI), Y7, Y7
  8872. VPXOR 64(SI), Y11, Y11
  8873. VPXOR 96(SI), Y15, Y15
  8874. VMOVDQU Y3, (DI)
  8875. VMOVDQU Y7, 32(DI)
  8876. VMOVDQU Y11, 64(DI)
  8877. VMOVDQU Y15, 96(DI)
  8878. MOVQ $0x00000080, CX
  8879. LEAQ 128(SI), SI
  8880. SUBQ $0x80, BX
  8881. VPERM2I128 $0x02, Y5, Y9, Y0
  8882. VPERM2I128 $0x02, Y13, Y1, Y14
  8883. VPERM2I128 $0x13, Y5, Y9, Y12
  8884. VPERM2I128 $0x13, Y13, Y1, Y4
  8885. JMP sealAVX2SealHash
  8886. sealAVX2Tail384:
  8887. VMOVDQA ·chacha20Constants<>+0(SB), Y0
  8888. VMOVDQA Y0, Y5
  8889. VMOVDQA Y0, Y6
  8890. VMOVDQA 32(BP), Y14
  8891. VMOVDQA Y14, Y9
  8892. VMOVDQA Y14, Y10
  8893. VMOVDQA 64(BP), Y12
  8894. VMOVDQA Y12, Y13
  8895. VMOVDQA Y12, Y8
  8896. VMOVDQA 192(BP), Y4
  8897. VPADDD ·avx2IncMask<>+0(SB), Y4, Y4
  8898. VPADDD ·avx2IncMask<>+0(SB), Y4, Y1
  8899. VPADDD ·avx2IncMask<>+0(SB), Y1, Y2
  8900. VMOVDQA Y4, Y7
  8901. VMOVDQA Y1, Y11
  8902. VMOVDQA Y2, Y15
  8903. sealAVX2Tail384LoopA:
  8904. ADDQ (DI), R10
  8905. ADCQ 8(DI), R11
  8906. ADCQ $0x01, R12
  8907. MOVQ (BP), AX
  8908. MOVQ AX, R15
  8909. MULQ R10
  8910. MOVQ AX, R13
  8911. MOVQ DX, R14
  8912. MOVQ (BP), AX
  8913. MULQ R11
  8914. IMULQ R12, R15
  8915. ADDQ AX, R14
  8916. ADCQ DX, R15
  8917. MOVQ 8(BP), AX
  8918. MOVQ AX, R8
  8919. MULQ R10
  8920. ADDQ AX, R14
  8921. ADCQ $0x00, DX
  8922. MOVQ DX, R10
  8923. MOVQ 8(BP), AX
  8924. MULQ R11
  8925. ADDQ AX, R15
  8926. ADCQ $0x00, DX
  8927. IMULQ R12, R8
  8928. ADDQ R10, R15
  8929. ADCQ DX, R8
  8930. MOVQ R13, R10
  8931. MOVQ R14, R11
  8932. MOVQ R15, R12
  8933. ANDQ $0x03, R12
  8934. MOVQ R15, R13
  8935. ANDQ $-4, R13
  8936. MOVQ R8, R14
  8937. SHRQ $0x02, R8, R15
  8938. SHRQ $0x02, R8
  8939. ADDQ R13, R10
  8940. ADCQ R14, R11
  8941. ADCQ $0x00, R12
  8942. ADDQ R15, R10
  8943. ADCQ R8, R11
  8944. ADCQ $0x00, R12
  8945. LEAQ 16(DI), DI
  8946. sealAVX2Tail384LoopB:
  8947. VPADDD Y14, Y0, Y0
  8948. VPXOR Y0, Y4, Y4
  8949. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  8950. VPADDD Y4, Y12, Y12
  8951. VPXOR Y12, Y14, Y14
  8952. VPSLLD $0x0c, Y14, Y3
  8953. VPSRLD $0x14, Y14, Y14
  8954. VPXOR Y3, Y14, Y14
  8955. VPADDD Y14, Y0, Y0
  8956. VPXOR Y0, Y4, Y4
  8957. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  8958. VPADDD Y4, Y12, Y12
  8959. VPXOR Y12, Y14, Y14
  8960. VPSLLD $0x07, Y14, Y3
  8961. VPSRLD $0x19, Y14, Y14
  8962. VPXOR Y3, Y14, Y14
  8963. VPADDD Y9, Y5, Y5
  8964. VPXOR Y5, Y1, Y1
  8965. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  8966. VPADDD Y1, Y13, Y13
  8967. VPXOR Y13, Y9, Y9
  8968. VPSLLD $0x0c, Y9, Y3
  8969. VPSRLD $0x14, Y9, Y9
  8970. VPXOR Y3, Y9, Y9
  8971. VPADDD Y9, Y5, Y5
  8972. VPXOR Y5, Y1, Y1
  8973. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  8974. VPADDD Y1, Y13, Y13
  8975. VPXOR Y13, Y9, Y9
  8976. VPSLLD $0x07, Y9, Y3
  8977. VPSRLD $0x19, Y9, Y9
  8978. VPXOR Y3, Y9, Y9
  8979. VPADDD Y10, Y6, Y6
  8980. VPXOR Y6, Y2, Y2
  8981. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  8982. VPADDD Y2, Y8, Y8
  8983. VPXOR Y8, Y10, Y10
  8984. VPSLLD $0x0c, Y10, Y3
  8985. VPSRLD $0x14, Y10, Y10
  8986. VPXOR Y3, Y10, Y10
  8987. VPADDD Y10, Y6, Y6
  8988. VPXOR Y6, Y2, Y2
  8989. VPSHUFB ·rol8<>+0(SB), Y2, Y2
  8990. VPADDD Y2, Y8, Y8
  8991. VPXOR Y8, Y10, Y10
  8992. VPSLLD $0x07, Y10, Y3
  8993. VPSRLD $0x19, Y10, Y10
  8994. VPXOR Y3, Y10, Y10
  8995. ADDQ (DI), R10
  8996. ADCQ 8(DI), R11
  8997. ADCQ $0x01, R12
  8998. MOVQ (BP), AX
  8999. MOVQ AX, R15
  9000. MULQ R10
  9001. MOVQ AX, R13
  9002. MOVQ DX, R14
  9003. MOVQ (BP), AX
  9004. MULQ R11
  9005. IMULQ R12, R15
  9006. ADDQ AX, R14
  9007. ADCQ DX, R15
  9008. MOVQ 8(BP), AX
  9009. MOVQ AX, R8
  9010. MULQ R10
  9011. ADDQ AX, R14
  9012. ADCQ $0x00, DX
  9013. MOVQ DX, R10
  9014. MOVQ 8(BP), AX
  9015. MULQ R11
  9016. ADDQ AX, R15
  9017. ADCQ $0x00, DX
  9018. IMULQ R12, R8
  9019. ADDQ R10, R15
  9020. ADCQ DX, R8
  9021. MOVQ R13, R10
  9022. MOVQ R14, R11
  9023. MOVQ R15, R12
  9024. ANDQ $0x03, R12
  9025. MOVQ R15, R13
  9026. ANDQ $-4, R13
  9027. MOVQ R8, R14
  9028. SHRQ $0x02, R8, R15
  9029. SHRQ $0x02, R8
  9030. ADDQ R13, R10
  9031. ADCQ R14, R11
  9032. ADCQ $0x00, R12
  9033. ADDQ R15, R10
  9034. ADCQ R8, R11
  9035. ADCQ $0x00, R12
  9036. VPALIGNR $0x04, Y14, Y14, Y14
  9037. VPALIGNR $0x04, Y9, Y9, Y9
  9038. VPALIGNR $0x04, Y10, Y10, Y10
  9039. VPALIGNR $0x08, Y12, Y12, Y12
  9040. VPALIGNR $0x08, Y13, Y13, Y13
  9041. VPALIGNR $0x08, Y8, Y8, Y8
  9042. VPALIGNR $0x0c, Y4, Y4, Y4
  9043. VPALIGNR $0x0c, Y1, Y1, Y1
  9044. VPALIGNR $0x0c, Y2, Y2, Y2
  9045. VPADDD Y14, Y0, Y0
  9046. VPXOR Y0, Y4, Y4
  9047. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  9048. VPADDD Y4, Y12, Y12
  9049. VPXOR Y12, Y14, Y14
  9050. VPSLLD $0x0c, Y14, Y3
  9051. VPSRLD $0x14, Y14, Y14
  9052. VPXOR Y3, Y14, Y14
  9053. VPADDD Y14, Y0, Y0
  9054. VPXOR Y0, Y4, Y4
  9055. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  9056. VPADDD Y4, Y12, Y12
  9057. VPXOR Y12, Y14, Y14
  9058. VPSLLD $0x07, Y14, Y3
  9059. VPSRLD $0x19, Y14, Y14
  9060. VPXOR Y3, Y14, Y14
  9061. VPADDD Y9, Y5, Y5
  9062. VPXOR Y5, Y1, Y1
  9063. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  9064. VPADDD Y1, Y13, Y13
  9065. VPXOR Y13, Y9, Y9
  9066. VPSLLD $0x0c, Y9, Y3
  9067. VPSRLD $0x14, Y9, Y9
  9068. VPXOR Y3, Y9, Y9
  9069. VPADDD Y9, Y5, Y5
  9070. VPXOR Y5, Y1, Y1
  9071. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  9072. VPADDD Y1, Y13, Y13
  9073. VPXOR Y13, Y9, Y9
  9074. VPSLLD $0x07, Y9, Y3
  9075. VPSRLD $0x19, Y9, Y9
  9076. VPXOR Y3, Y9, Y9
  9077. VPADDD Y10, Y6, Y6
  9078. VPXOR Y6, Y2, Y2
  9079. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  9080. VPADDD Y2, Y8, Y8
  9081. VPXOR Y8, Y10, Y10
  9082. VPSLLD $0x0c, Y10, Y3
  9083. VPSRLD $0x14, Y10, Y10
  9084. VPXOR Y3, Y10, Y10
  9085. VPADDD Y10, Y6, Y6
  9086. VPXOR Y6, Y2, Y2
  9087. VPSHUFB ·rol8<>+0(SB), Y2, Y2
  9088. VPADDD Y2, Y8, Y8
  9089. VPXOR Y8, Y10, Y10
  9090. VPSLLD $0x07, Y10, Y3
  9091. VPSRLD $0x19, Y10, Y10
  9092. VPXOR Y3, Y10, Y10
  9093. ADDQ 16(DI), R10
  9094. ADCQ 24(DI), R11
  9095. ADCQ $0x01, R12
  9096. MOVQ (BP), AX
  9097. MOVQ AX, R15
  9098. MULQ R10
  9099. MOVQ AX, R13
  9100. MOVQ DX, R14
  9101. MOVQ (BP), AX
  9102. MULQ R11
  9103. IMULQ R12, R15
  9104. ADDQ AX, R14
  9105. ADCQ DX, R15
  9106. MOVQ 8(BP), AX
  9107. MOVQ AX, R8
  9108. MULQ R10
  9109. ADDQ AX, R14
  9110. ADCQ $0x00, DX
  9111. MOVQ DX, R10
  9112. MOVQ 8(BP), AX
  9113. MULQ R11
  9114. ADDQ AX, R15
  9115. ADCQ $0x00, DX
  9116. IMULQ R12, R8
  9117. ADDQ R10, R15
  9118. ADCQ DX, R8
  9119. MOVQ R13, R10
  9120. MOVQ R14, R11
  9121. MOVQ R15, R12
  9122. ANDQ $0x03, R12
  9123. MOVQ R15, R13
  9124. ANDQ $-4, R13
  9125. MOVQ R8, R14
  9126. SHRQ $0x02, R8, R15
  9127. SHRQ $0x02, R8
  9128. ADDQ R13, R10
  9129. ADCQ R14, R11
  9130. ADCQ $0x00, R12
  9131. ADDQ R15, R10
  9132. ADCQ R8, R11
  9133. ADCQ $0x00, R12
  9134. LEAQ 32(DI), DI
  9135. VPALIGNR $0x0c, Y14, Y14, Y14
  9136. VPALIGNR $0x0c, Y9, Y9, Y9
  9137. VPALIGNR $0x0c, Y10, Y10, Y10
  9138. VPALIGNR $0x08, Y12, Y12, Y12
  9139. VPALIGNR $0x08, Y13, Y13, Y13
  9140. VPALIGNR $0x08, Y8, Y8, Y8
  9141. VPALIGNR $0x04, Y4, Y4, Y4
  9142. VPALIGNR $0x04, Y1, Y1, Y1
  9143. VPALIGNR $0x04, Y2, Y2, Y2
  9144. DECQ CX
  9145. JG sealAVX2Tail384LoopA
  9146. DECQ R9
  9147. JGE sealAVX2Tail384LoopB
  9148. VPADDD ·chacha20Constants<>+0(SB), Y0, Y0
  9149. VPADDD ·chacha20Constants<>+0(SB), Y5, Y5
  9150. VPADDD ·chacha20Constants<>+0(SB), Y6, Y6
  9151. VPADDD 32(BP), Y14, Y14
  9152. VPADDD 32(BP), Y9, Y9
  9153. VPADDD 32(BP), Y10, Y10
  9154. VPADDD 64(BP), Y12, Y12
  9155. VPADDD 64(BP), Y13, Y13
  9156. VPADDD 64(BP), Y8, Y8
  9157. VPADDD Y7, Y4, Y4
  9158. VPADDD Y11, Y1, Y1
  9159. VPADDD Y15, Y2, Y2
  9160. VPERM2I128 $0x02, Y0, Y14, Y3
  9161. VPERM2I128 $0x02, Y12, Y4, Y7
  9162. VPERM2I128 $0x13, Y0, Y14, Y11
  9163. VPERM2I128 $0x13, Y12, Y4, Y15
  9164. VPXOR (SI), Y3, Y3
  9165. VPXOR 32(SI), Y7, Y7
  9166. VPXOR 64(SI), Y11, Y11
  9167. VPXOR 96(SI), Y15, Y15
  9168. VMOVDQU Y3, (DI)
  9169. VMOVDQU Y7, 32(DI)
  9170. VMOVDQU Y11, 64(DI)
  9171. VMOVDQU Y15, 96(DI)
  9172. VPERM2I128 $0x02, Y5, Y9, Y3
  9173. VPERM2I128 $0x02, Y13, Y1, Y7
  9174. VPERM2I128 $0x13, Y5, Y9, Y11
  9175. VPERM2I128 $0x13, Y13, Y1, Y15
  9176. VPXOR 128(SI), Y3, Y3
  9177. VPXOR 160(SI), Y7, Y7
  9178. VPXOR 192(SI), Y11, Y11
  9179. VPXOR 224(SI), Y15, Y15
  9180. VMOVDQU Y3, 128(DI)
  9181. VMOVDQU Y7, 160(DI)
  9182. VMOVDQU Y11, 192(DI)
  9183. VMOVDQU Y15, 224(DI)
  9184. MOVQ $0x00000100, CX
  9185. LEAQ 256(SI), SI
  9186. SUBQ $0x00000100, BX
  9187. VPERM2I128 $0x02, Y6, Y10, Y0
  9188. VPERM2I128 $0x02, Y8, Y2, Y14
  9189. VPERM2I128 $0x13, Y6, Y10, Y12
  9190. VPERM2I128 $0x13, Y8, Y2, Y4
  9191. JMP sealAVX2SealHash
  9192. sealAVX2Tail512:
  9193. VMOVDQA ·chacha20Constants<>+0(SB), Y0
  9194. VMOVDQA Y0, Y5
  9195. VMOVDQA Y0, Y6
  9196. VMOVDQA Y0, Y7
  9197. VMOVDQA 32(BP), Y14
  9198. VMOVDQA Y14, Y9
  9199. VMOVDQA Y14, Y10
  9200. VMOVDQA Y14, Y11
  9201. VMOVDQA 64(BP), Y12
  9202. VMOVDQA Y12, Y13
  9203. VMOVDQA Y12, Y8
  9204. VMOVDQA Y12, Y15
  9205. VMOVDQA 192(BP), Y4
  9206. VPADDD ·avx2IncMask<>+0(SB), Y4, Y4
  9207. VPADDD ·avx2IncMask<>+0(SB), Y4, Y1
  9208. VPADDD ·avx2IncMask<>+0(SB), Y1, Y2
  9209. VPADDD ·avx2IncMask<>+0(SB), Y2, Y3
  9210. VMOVDQA Y4, 96(BP)
  9211. VMOVDQA Y1, 128(BP)
  9212. VMOVDQA Y2, 160(BP)
  9213. VMOVDQA Y3, 192(BP)
  9214. sealAVX2Tail512LoopA:
  9215. ADDQ (DI), R10
  9216. ADCQ 8(DI), R11
  9217. ADCQ $0x01, R12
  9218. MOVQ (BP), AX
  9219. MOVQ AX, R15
  9220. MULQ R10
  9221. MOVQ AX, R13
  9222. MOVQ DX, R14
  9223. MOVQ (BP), AX
  9224. MULQ R11
  9225. IMULQ R12, R15
  9226. ADDQ AX, R14
  9227. ADCQ DX, R15
  9228. MOVQ 8(BP), AX
  9229. MOVQ AX, R8
  9230. MULQ R10
  9231. ADDQ AX, R14
  9232. ADCQ $0x00, DX
  9233. MOVQ DX, R10
  9234. MOVQ 8(BP), AX
  9235. MULQ R11
  9236. ADDQ AX, R15
  9237. ADCQ $0x00, DX
  9238. IMULQ R12, R8
  9239. ADDQ R10, R15
  9240. ADCQ DX, R8
  9241. MOVQ R13, R10
  9242. MOVQ R14, R11
  9243. MOVQ R15, R12
  9244. ANDQ $0x03, R12
  9245. MOVQ R15, R13
  9246. ANDQ $-4, R13
  9247. MOVQ R8, R14
  9248. SHRQ $0x02, R8, R15
  9249. SHRQ $0x02, R8
  9250. ADDQ R13, R10
  9251. ADCQ R14, R11
  9252. ADCQ $0x00, R12
  9253. ADDQ R15, R10
  9254. ADCQ R8, R11
  9255. ADCQ $0x00, R12
  9256. LEAQ 16(DI), DI
  9257. sealAVX2Tail512LoopB:
  9258. VPADDD Y14, Y0, Y0
  9259. VPADDD Y9, Y5, Y5
  9260. VPADDD Y10, Y6, Y6
  9261. VPADDD Y11, Y7, Y7
  9262. VPXOR Y0, Y4, Y4
  9263. VPXOR Y5, Y1, Y1
  9264. VPXOR Y6, Y2, Y2
  9265. VPXOR Y7, Y3, Y3
  9266. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  9267. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  9268. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  9269. VPSHUFB ·rol16<>+0(SB), Y3, Y3
  9270. VPADDD Y4, Y12, Y12
  9271. VPADDD Y1, Y13, Y13
  9272. VPADDD Y2, Y8, Y8
  9273. VPADDD Y3, Y15, Y15
  9274. VPXOR Y12, Y14, Y14
  9275. VPXOR Y13, Y9, Y9
  9276. VPXOR Y8, Y10, Y10
  9277. VPXOR Y15, Y11, Y11
  9278. VMOVDQA Y15, 224(BP)
  9279. VPSLLD $0x0c, Y14, Y15
  9280. VPSRLD $0x14, Y14, Y14
  9281. VPXOR Y15, Y14, Y14
  9282. VPSLLD $0x0c, Y9, Y15
  9283. VPSRLD $0x14, Y9, Y9
  9284. VPXOR Y15, Y9, Y9
  9285. VPSLLD $0x0c, Y10, Y15
  9286. VPSRLD $0x14, Y10, Y10
  9287. VPXOR Y15, Y10, Y10
  9288. VPSLLD $0x0c, Y11, Y15
  9289. VPSRLD $0x14, Y11, Y11
  9290. VPXOR Y15, Y11, Y11
  9291. VMOVDQA 224(BP), Y15
  9292. ADDQ (DI), R10
  9293. ADCQ 8(DI), R11
  9294. ADCQ $0x01, R12
  9295. MOVQ (BP), DX
  9296. MOVQ DX, R15
  9297. MULXQ R10, R13, R14
  9298. IMULQ R12, R15
  9299. MULXQ R11, AX, DX
  9300. ADDQ AX, R14
  9301. ADCQ DX, R15
  9302. MOVQ 8(BP), DX
  9303. MULXQ R10, R10, AX
  9304. ADDQ R10, R14
  9305. MULXQ R11, R11, R8
  9306. ADCQ R11, R15
  9307. ADCQ $0x00, R8
  9308. IMULQ R12, DX
  9309. ADDQ AX, R15
  9310. ADCQ DX, R8
  9311. MOVQ R13, R10
  9312. MOVQ R14, R11
  9313. MOVQ R15, R12
  9314. ANDQ $0x03, R12
  9315. MOVQ R15, R13
  9316. ANDQ $-4, R13
  9317. MOVQ R8, R14
  9318. SHRQ $0x02, R8, R15
  9319. SHRQ $0x02, R8
  9320. ADDQ R13, R10
  9321. ADCQ R14, R11
  9322. ADCQ $0x00, R12
  9323. ADDQ R15, R10
  9324. ADCQ R8, R11
  9325. ADCQ $0x00, R12
  9326. VPADDD Y14, Y0, Y0
  9327. VPADDD Y9, Y5, Y5
  9328. VPADDD Y10, Y6, Y6
  9329. VPADDD Y11, Y7, Y7
  9330. VPXOR Y0, Y4, Y4
  9331. VPXOR Y5, Y1, Y1
  9332. VPXOR Y6, Y2, Y2
  9333. VPXOR Y7, Y3, Y3
  9334. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  9335. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  9336. VPSHUFB ·rol8<>+0(SB), Y2, Y2
  9337. VPSHUFB ·rol8<>+0(SB), Y3, Y3
  9338. VPADDD Y4, Y12, Y12
  9339. VPADDD Y1, Y13, Y13
  9340. VPADDD Y2, Y8, Y8
  9341. VPADDD Y3, Y15, Y15
  9342. VPXOR Y12, Y14, Y14
  9343. VPXOR Y13, Y9, Y9
  9344. VPXOR Y8, Y10, Y10
  9345. VPXOR Y15, Y11, Y11
  9346. VMOVDQA Y15, 224(BP)
  9347. VPSLLD $0x07, Y14, Y15
  9348. VPSRLD $0x19, Y14, Y14
  9349. VPXOR Y15, Y14, Y14
  9350. VPSLLD $0x07, Y9, Y15
  9351. VPSRLD $0x19, Y9, Y9
  9352. VPXOR Y15, Y9, Y9
  9353. VPSLLD $0x07, Y10, Y15
  9354. VPSRLD $0x19, Y10, Y10
  9355. VPXOR Y15, Y10, Y10
  9356. VPSLLD $0x07, Y11, Y15
  9357. VPSRLD $0x19, Y11, Y11
  9358. VPXOR Y15, Y11, Y11
  9359. VMOVDQA 224(BP), Y15
  9360. VPALIGNR $0x04, Y14, Y14, Y14
  9361. VPALIGNR $0x04, Y9, Y9, Y9
  9362. VPALIGNR $0x04, Y10, Y10, Y10
  9363. VPALIGNR $0x04, Y11, Y11, Y11
  9364. VPALIGNR $0x08, Y12, Y12, Y12
  9365. VPALIGNR $0x08, Y13, Y13, Y13
  9366. VPALIGNR $0x08, Y8, Y8, Y8
  9367. VPALIGNR $0x08, Y15, Y15, Y15
  9368. VPALIGNR $0x0c, Y4, Y4, Y4
  9369. VPALIGNR $0x0c, Y1, Y1, Y1
  9370. VPALIGNR $0x0c, Y2, Y2, Y2
  9371. VPALIGNR $0x0c, Y3, Y3, Y3
  9372. VPADDD Y14, Y0, Y0
  9373. VPADDD Y9, Y5, Y5
  9374. VPADDD Y10, Y6, Y6
  9375. VPADDD Y11, Y7, Y7
  9376. VPXOR Y0, Y4, Y4
  9377. VPXOR Y5, Y1, Y1
  9378. VPXOR Y6, Y2, Y2
  9379. VPXOR Y7, Y3, Y3
  9380. VPSHUFB ·rol16<>+0(SB), Y4, Y4
  9381. VPSHUFB ·rol16<>+0(SB), Y1, Y1
  9382. VPSHUFB ·rol16<>+0(SB), Y2, Y2
  9383. VPSHUFB ·rol16<>+0(SB), Y3, Y3
  9384. VPADDD Y4, Y12, Y12
  9385. VPADDD Y1, Y13, Y13
  9386. VPADDD Y2, Y8, Y8
  9387. VPADDD Y3, Y15, Y15
  9388. VPXOR Y12, Y14, Y14
  9389. VPXOR Y13, Y9, Y9
  9390. VPXOR Y8, Y10, Y10
  9391. VPXOR Y15, Y11, Y11
  9392. ADDQ 16(DI), R10
  9393. ADCQ 24(DI), R11
  9394. ADCQ $0x01, R12
  9395. MOVQ (BP), DX
  9396. MOVQ DX, R15
  9397. MULXQ R10, R13, R14
  9398. IMULQ R12, R15
  9399. MULXQ R11, AX, DX
  9400. ADDQ AX, R14
  9401. ADCQ DX, R15
  9402. MOVQ 8(BP), DX
  9403. MULXQ R10, R10, AX
  9404. ADDQ R10, R14
  9405. MULXQ R11, R11, R8
  9406. ADCQ R11, R15
  9407. ADCQ $0x00, R8
  9408. IMULQ R12, DX
  9409. ADDQ AX, R15
  9410. ADCQ DX, R8
  9411. MOVQ R13, R10
  9412. MOVQ R14, R11
  9413. MOVQ R15, R12
  9414. ANDQ $0x03, R12
  9415. MOVQ R15, R13
  9416. ANDQ $-4, R13
  9417. MOVQ R8, R14
  9418. SHRQ $0x02, R8, R15
  9419. SHRQ $0x02, R8
  9420. ADDQ R13, R10
  9421. ADCQ R14, R11
  9422. ADCQ $0x00, R12
  9423. ADDQ R15, R10
  9424. ADCQ R8, R11
  9425. ADCQ $0x00, R12
  9426. LEAQ 32(DI), DI
  9427. VMOVDQA Y15, 224(BP)
  9428. VPSLLD $0x0c, Y14, Y15
  9429. VPSRLD $0x14, Y14, Y14
  9430. VPXOR Y15, Y14, Y14
  9431. VPSLLD $0x0c, Y9, Y15
  9432. VPSRLD $0x14, Y9, Y9
  9433. VPXOR Y15, Y9, Y9
  9434. VPSLLD $0x0c, Y10, Y15
  9435. VPSRLD $0x14, Y10, Y10
  9436. VPXOR Y15, Y10, Y10
  9437. VPSLLD $0x0c, Y11, Y15
  9438. VPSRLD $0x14, Y11, Y11
  9439. VPXOR Y15, Y11, Y11
  9440. VMOVDQA 224(BP), Y15
  9441. VPADDD Y14, Y0, Y0
  9442. VPADDD Y9, Y5, Y5
  9443. VPADDD Y10, Y6, Y6
  9444. VPADDD Y11, Y7, Y7
  9445. VPXOR Y0, Y4, Y4
  9446. VPXOR Y5, Y1, Y1
  9447. VPXOR Y6, Y2, Y2
  9448. VPXOR Y7, Y3, Y3
  9449. VPSHUFB ·rol8<>+0(SB), Y4, Y4
  9450. VPSHUFB ·rol8<>+0(SB), Y1, Y1
  9451. VPSHUFB ·rol8<>+0(SB), Y2, Y2
  9452. VPSHUFB ·rol8<>+0(SB), Y3, Y3
  9453. VPADDD Y4, Y12, Y12
  9454. VPADDD Y1, Y13, Y13
  9455. VPADDD Y2, Y8, Y8
  9456. VPADDD Y3, Y15, Y15
  9457. VPXOR Y12, Y14, Y14
  9458. VPXOR Y13, Y9, Y9
  9459. VPXOR Y8, Y10, Y10
  9460. VPXOR Y15, Y11, Y11
  9461. VMOVDQA Y15, 224(BP)
  9462. VPSLLD $0x07, Y14, Y15
  9463. VPSRLD $0x19, Y14, Y14
  9464. VPXOR Y15, Y14, Y14
  9465. VPSLLD $0x07, Y9, Y15
  9466. VPSRLD $0x19, Y9, Y9
  9467. VPXOR Y15, Y9, Y9
  9468. VPSLLD $0x07, Y10, Y15
  9469. VPSRLD $0x19, Y10, Y10
  9470. VPXOR Y15, Y10, Y10
  9471. VPSLLD $0x07, Y11, Y15
  9472. VPSRLD $0x19, Y11, Y11
  9473. VPXOR Y15, Y11, Y11
  9474. VMOVDQA 224(BP), Y15
  9475. VPALIGNR $0x0c, Y14, Y14, Y14
  9476. VPALIGNR $0x0c, Y9, Y9, Y9
  9477. VPALIGNR $0x0c, Y10, Y10, Y10
  9478. VPALIGNR $0x0c, Y11, Y11, Y11
  9479. VPALIGNR $0x08, Y12, Y12, Y12
  9480. VPALIGNR $0x08, Y13, Y13, Y13
  9481. VPALIGNR $0x08, Y8, Y8, Y8
  9482. VPALIGNR $0x08, Y15, Y15, Y15
  9483. VPALIGNR $0x04, Y4, Y4, Y4
  9484. VPALIGNR $0x04, Y1, Y1, Y1
  9485. VPALIGNR $0x04, Y2, Y2, Y2
  9486. VPALIGNR $0x04, Y3, Y3, Y3
  9487. DECQ CX
  9488. JG sealAVX2Tail512LoopA
  9489. DECQ R9
  9490. JGE sealAVX2Tail512LoopB
  9491. VPADDD ·chacha20Constants<>+0(SB), Y0, Y0
  9492. VPADDD ·chacha20Constants<>+0(SB), Y5, Y5
  9493. VPADDD ·chacha20Constants<>+0(SB), Y6, Y6
  9494. VPADDD ·chacha20Constants<>+0(SB), Y7, Y7
  9495. VPADDD 32(BP), Y14, Y14
  9496. VPADDD 32(BP), Y9, Y9
  9497. VPADDD 32(BP), Y10, Y10
  9498. VPADDD 32(BP), Y11, Y11
  9499. VPADDD 64(BP), Y12, Y12
  9500. VPADDD 64(BP), Y13, Y13
  9501. VPADDD 64(BP), Y8, Y8
  9502. VPADDD 64(BP), Y15, Y15
  9503. VPADDD 96(BP), Y4, Y4
  9504. VPADDD 128(BP), Y1, Y1
  9505. VPADDD 160(BP), Y2, Y2
  9506. VPADDD 192(BP), Y3, Y3
  9507. VMOVDQA Y15, 224(BP)
  9508. VPERM2I128 $0x02, Y0, Y14, Y15
  9509. VPXOR (SI), Y15, Y15
  9510. VMOVDQU Y15, (DI)
  9511. VPERM2I128 $0x02, Y12, Y4, Y15
  9512. VPXOR 32(SI), Y15, Y15
  9513. VMOVDQU Y15, 32(DI)
  9514. VPERM2I128 $0x13, Y0, Y14, Y15
  9515. VPXOR 64(SI), Y15, Y15
  9516. VMOVDQU Y15, 64(DI)
  9517. VPERM2I128 $0x13, Y12, Y4, Y15
  9518. VPXOR 96(SI), Y15, Y15
  9519. VMOVDQU Y15, 96(DI)
  9520. VPERM2I128 $0x02, Y5, Y9, Y0
  9521. VPERM2I128 $0x02, Y13, Y1, Y14
  9522. VPERM2I128 $0x13, Y5, Y9, Y12
  9523. VPERM2I128 $0x13, Y13, Y1, Y4
  9524. VPXOR 128(SI), Y0, Y0
  9525. VPXOR 160(SI), Y14, Y14
  9526. VPXOR 192(SI), Y12, Y12
  9527. VPXOR 224(SI), Y4, Y4
  9528. VMOVDQU Y0, 128(DI)
  9529. VMOVDQU Y14, 160(DI)
  9530. VMOVDQU Y12, 192(DI)
  9531. VMOVDQU Y4, 224(DI)
  9532. VPERM2I128 $0x02, Y6, Y10, Y0
  9533. VPERM2I128 $0x02, Y8, Y2, Y14
  9534. VPERM2I128 $0x13, Y6, Y10, Y12
  9535. VPERM2I128 $0x13, Y8, Y2, Y4
  9536. VPXOR 256(SI), Y0, Y0
  9537. VPXOR 288(SI), Y14, Y14
  9538. VPXOR 320(SI), Y12, Y12
  9539. VPXOR 352(SI), Y4, Y4
  9540. VMOVDQU Y0, 256(DI)
  9541. VMOVDQU Y14, 288(DI)
  9542. VMOVDQU Y12, 320(DI)
  9543. VMOVDQU Y4, 352(DI)
  9544. MOVQ $0x00000180, CX
  9545. LEAQ 384(SI), SI
  9546. SUBQ $0x00000180, BX
  9547. VPERM2I128 $0x02, Y7, Y11, Y0
  9548. VPERM2I128 $0x02, 224(BP), Y3, Y14
  9549. VPERM2I128 $0x13, Y7, Y11, Y12
  9550. VPERM2I128 $0x13, 224(BP), Y3, Y4
  9551. JMP sealAVX2SealHash