blake2s_amd64.s 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173
  1. // Code generated by command: go run blake2s_amd64_asm.go -out ../blake2s_amd64.s -pkg blake2s. DO NOT EDIT.
  2. //go:build amd64 && gc && !purego
  3. #include "textflag.h"
  4. // func hashBlocksSSE2(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte)
  5. // Requires: SSE2
  6. TEXT ·hashBlocksSSE2(SB), $672-48
  7. MOVQ h+0(FP), AX
  8. MOVQ c+8(FP), BX
  9. MOVL flag+16(FP), CX
  10. MOVQ blocks_base+24(FP), SI
  11. MOVQ blocks_len+32(FP), DX
  12. MOVQ SP, BP
  13. ADDQ $0x0f, BP
  14. ANDQ $-16, BP
  15. MOVQ (BX), R9
  16. MOVQ R9, (BP)
  17. MOVQ CX, 8(BP)
  18. MOVOU (AX), X0
  19. MOVOU 16(AX), X1
  20. MOVOU iv0<>+0(SB), X2
  21. MOVOU iv1<>+0(SB), X3
  22. MOVOU counter<>+0(SB), X12
  23. MOVOU rol16<>+0(SB), X13
  24. MOVOU rol8<>+0(SB), X14
  25. MOVO (BP), X15
  26. loop:
  27. MOVO X0, X4
  28. MOVO X1, X5
  29. MOVO X2, X6
  30. MOVO X3, X7
  31. PADDQ X12, X15
  32. PXOR X15, X7
  33. MOVQ (SI), R8
  34. MOVQ 8(SI), R9
  35. MOVQ 16(SI), R10
  36. MOVQ 24(SI), R11
  37. MOVQ 32(SI), R12
  38. MOVQ 40(SI), R13
  39. MOVQ 48(SI), R14
  40. MOVQ 56(SI), R15
  41. MOVL R8, 16(BP)
  42. MOVL R8, 116(BP)
  43. MOVL R8, 164(BP)
  44. MOVL R8, 264(BP)
  45. MOVL R8, 288(BP)
  46. MOVL R8, 344(BP)
  47. MOVL R8, 432(BP)
  48. MOVL R8, 512(BP)
  49. MOVL R8, 540(BP)
  50. MOVL R8, 652(BP)
  51. SHRQ $0x20, R8
  52. MOVL R8, 32(BP)
  53. MOVL R8, 112(BP)
  54. MOVL R8, 200(BP)
  55. MOVL R8, 228(BP)
  56. MOVL R8, 320(BP)
  57. MOVL R8, 380(BP)
  58. MOVL R8, 404(BP)
  59. MOVL R8, 488(BP)
  60. MOVL R8, 568(BP)
  61. MOVL R8, 604(BP)
  62. MOVL R9, 20(BP)
  63. MOVL R9, 132(BP)
  64. MOVL R9, 168(BP)
  65. MOVL R9, 240(BP)
  66. MOVL R9, 280(BP)
  67. MOVL R9, 336(BP)
  68. MOVL R9, 456(BP)
  69. MOVL R9, 508(BP)
  70. MOVL R9, 576(BP)
  71. MOVL R9, 608(BP)
  72. SHRQ $0x20, R9
  73. MOVL R9, 36(BP)
  74. MOVL R9, 140(BP)
  75. MOVL R9, 180(BP)
  76. MOVL R9, 212(BP)
  77. MOVL R9, 316(BP)
  78. MOVL R9, 364(BP)
  79. MOVL R9, 452(BP)
  80. MOVL R9, 476(BP)
  81. MOVL R9, 552(BP)
  82. MOVL R9, 632(BP)
  83. MOVL R10, 24(BP)
  84. MOVL R10, 84(BP)
  85. MOVL R10, 204(BP)
  86. MOVL R10, 248(BP)
  87. MOVL R10, 296(BP)
  88. MOVL R10, 368(BP)
  89. MOVL R10, 412(BP)
  90. MOVL R10, 516(BP)
  91. MOVL R10, 584(BP)
  92. MOVL R10, 612(BP)
  93. SHRQ $0x20, R10
  94. MOVL R10, 40(BP)
  95. MOVL R10, 124(BP)
  96. MOVL R10, 152(BP)
  97. MOVL R10, 244(BP)
  98. MOVL R10, 276(BP)
  99. MOVL R10, 388(BP)
  100. MOVL R10, 416(BP)
  101. MOVL R10, 496(BP)
  102. MOVL R10, 588(BP)
  103. MOVL R10, 620(BP)
  104. MOVL R11, 28(BP)
  105. MOVL R11, 108(BP)
  106. MOVL R11, 196(BP)
  107. MOVL R11, 256(BP)
  108. MOVL R11, 312(BP)
  109. MOVL R11, 340(BP)
  110. MOVL R11, 436(BP)
  111. MOVL R11, 520(BP)
  112. MOVL R11, 528(BP)
  113. MOVL R11, 616(BP)
  114. SHRQ $0x20, R11
  115. MOVL R11, 44(BP)
  116. MOVL R11, 136(BP)
  117. MOVL R11, 184(BP)
  118. MOVL R11, 208(BP)
  119. MOVL R11, 292(BP)
  120. MOVL R11, 372(BP)
  121. MOVL R11, 448(BP)
  122. MOVL R11, 468(BP)
  123. MOVL R11, 580(BP)
  124. MOVL R11, 600(BP)
  125. MOVL R12, 48(BP)
  126. MOVL R12, 100(BP)
  127. MOVL R12, 160(BP)
  128. MOVL R12, 268(BP)
  129. MOVL R12, 328(BP)
  130. MOVL R12, 348(BP)
  131. MOVL R12, 444(BP)
  132. MOVL R12, 504(BP)
  133. MOVL R12, 556(BP)
  134. MOVL R12, 596(BP)
  135. SHRQ $0x20, R12
  136. MOVL R12, 64(BP)
  137. MOVL R12, 88(BP)
  138. MOVL R12, 188(BP)
  139. MOVL R12, 224(BP)
  140. MOVL R12, 272(BP)
  141. MOVL R12, 396(BP)
  142. MOVL R12, 440(BP)
  143. MOVL R12, 492(BP)
  144. MOVL R12, 548(BP)
  145. MOVL R12, 628(BP)
  146. MOVL R13, 52(BP)
  147. MOVL R13, 96(BP)
  148. MOVL R13, 176(BP)
  149. MOVL R13, 260(BP)
  150. MOVL R13, 284(BP)
  151. MOVL R13, 356(BP)
  152. MOVL R13, 428(BP)
  153. MOVL R13, 524(BP)
  154. MOVL R13, 572(BP)
  155. MOVL R13, 592(BP)
  156. SHRQ $0x20, R13
  157. MOVL R13, 68(BP)
  158. MOVL R13, 120(BP)
  159. MOVL R13, 144(BP)
  160. MOVL R13, 220(BP)
  161. MOVL R13, 308(BP)
  162. MOVL R13, 360(BP)
  163. MOVL R13, 460(BP)
  164. MOVL R13, 480(BP)
  165. MOVL R13, 536(BP)
  166. MOVL R13, 640(BP)
  167. MOVL R14, 56(BP)
  168. MOVL R14, 128(BP)
  169. MOVL R14, 148(BP)
  170. MOVL R14, 232(BP)
  171. MOVL R14, 324(BP)
  172. MOVL R14, 352(BP)
  173. MOVL R14, 400(BP)
  174. MOVL R14, 472(BP)
  175. MOVL R14, 560(BP)
  176. MOVL R14, 648(BP)
  177. SHRQ $0x20, R14
  178. MOVL R14, 72(BP)
  179. MOVL R14, 92(BP)
  180. MOVL R14, 172(BP)
  181. MOVL R14, 216(BP)
  182. MOVL R14, 332(BP)
  183. MOVL R14, 384(BP)
  184. MOVL R14, 424(BP)
  185. MOVL R14, 464(BP)
  186. MOVL R14, 564(BP)
  187. MOVL R14, 636(BP)
  188. MOVL R15, 60(BP)
  189. MOVL R15, 80(BP)
  190. MOVL R15, 192(BP)
  191. MOVL R15, 236(BP)
  192. MOVL R15, 304(BP)
  193. MOVL R15, 392(BP)
  194. MOVL R15, 408(BP)
  195. MOVL R15, 484(BP)
  196. MOVL R15, 532(BP)
  197. MOVL R15, 644(BP)
  198. SHRQ $0x20, R15
  199. MOVL R15, 76(BP)
  200. MOVL R15, 104(BP)
  201. MOVL R15, 156(BP)
  202. MOVL R15, 252(BP)
  203. MOVL R15, 300(BP)
  204. MOVL R15, 376(BP)
  205. MOVL R15, 420(BP)
  206. MOVL R15, 500(BP)
  207. MOVL R15, 544(BP)
  208. MOVL R15, 624(BP)
  209. PADDL 16(BP), X4
  210. PADDL X5, X4
  211. PXOR X4, X7
  212. MOVO X7, X8
  213. PSLLL $0x10, X8
  214. PSRLL $0x10, X7
  215. PXOR X8, X7
  216. PADDL X7, X6
  217. PXOR X6, X5
  218. MOVO X5, X8
  219. PSLLL $0x14, X8
  220. PSRLL $0x0c, X5
  221. PXOR X8, X5
  222. PADDL 32(BP), X4
  223. PADDL X5, X4
  224. PXOR X4, X7
  225. MOVO X7, X8
  226. PSLLL $0x18, X8
  227. PSRLL $0x08, X7
  228. PXOR X8, X7
  229. PADDL X7, X6
  230. PXOR X6, X5
  231. MOVO X5, X8
  232. PSLLL $0x19, X8
  233. PSRLL $0x07, X5
  234. PXOR X8, X5
  235. PSHUFL $0x39, X5, X5
  236. PSHUFL $0x4e, X6, X6
  237. PSHUFL $0x93, X7, X7
  238. PADDL 48(BP), X4
  239. PADDL X5, X4
  240. PXOR X4, X7
  241. MOVO X7, X8
  242. PSLLL $0x10, X8
  243. PSRLL $0x10, X7
  244. PXOR X8, X7
  245. PADDL X7, X6
  246. PXOR X6, X5
  247. MOVO X5, X8
  248. PSLLL $0x14, X8
  249. PSRLL $0x0c, X5
  250. PXOR X8, X5
  251. PADDL 64(BP), X4
  252. PADDL X5, X4
  253. PXOR X4, X7
  254. MOVO X7, X8
  255. PSLLL $0x18, X8
  256. PSRLL $0x08, X7
  257. PXOR X8, X7
  258. PADDL X7, X6
  259. PXOR X6, X5
  260. MOVO X5, X8
  261. PSLLL $0x19, X8
  262. PSRLL $0x07, X5
  263. PXOR X8, X5
  264. PSHUFL $0x39, X7, X7
  265. PSHUFL $0x4e, X6, X6
  266. PSHUFL $0x93, X5, X5
  267. PADDL 80(BP), X4
  268. PADDL X5, X4
  269. PXOR X4, X7
  270. MOVO X7, X8
  271. PSLLL $0x10, X8
  272. PSRLL $0x10, X7
  273. PXOR X8, X7
  274. PADDL X7, X6
  275. PXOR X6, X5
  276. MOVO X5, X8
  277. PSLLL $0x14, X8
  278. PSRLL $0x0c, X5
  279. PXOR X8, X5
  280. PADDL 96(BP), X4
  281. PADDL X5, X4
  282. PXOR X4, X7
  283. MOVO X7, X8
  284. PSLLL $0x18, X8
  285. PSRLL $0x08, X7
  286. PXOR X8, X7
  287. PADDL X7, X6
  288. PXOR X6, X5
  289. MOVO X5, X8
  290. PSLLL $0x19, X8
  291. PSRLL $0x07, X5
  292. PXOR X8, X5
  293. PSHUFL $0x39, X5, X5
  294. PSHUFL $0x4e, X6, X6
  295. PSHUFL $0x93, X7, X7
  296. PADDL 112(BP), X4
  297. PADDL X5, X4
  298. PXOR X4, X7
  299. MOVO X7, X8
  300. PSLLL $0x10, X8
  301. PSRLL $0x10, X7
  302. PXOR X8, X7
  303. PADDL X7, X6
  304. PXOR X6, X5
  305. MOVO X5, X8
  306. PSLLL $0x14, X8
  307. PSRLL $0x0c, X5
  308. PXOR X8, X5
  309. PADDL 128(BP), X4
  310. PADDL X5, X4
  311. PXOR X4, X7
  312. MOVO X7, X8
  313. PSLLL $0x18, X8
  314. PSRLL $0x08, X7
  315. PXOR X8, X7
  316. PADDL X7, X6
  317. PXOR X6, X5
  318. MOVO X5, X8
  319. PSLLL $0x19, X8
  320. PSRLL $0x07, X5
  321. PXOR X8, X5
  322. PSHUFL $0x39, X7, X7
  323. PSHUFL $0x4e, X6, X6
  324. PSHUFL $0x93, X5, X5
  325. PADDL 144(BP), X4
  326. PADDL X5, X4
  327. PXOR X4, X7
  328. MOVO X7, X8
  329. PSLLL $0x10, X8
  330. PSRLL $0x10, X7
  331. PXOR X8, X7
  332. PADDL X7, X6
  333. PXOR X6, X5
  334. MOVO X5, X8
  335. PSLLL $0x14, X8
  336. PSRLL $0x0c, X5
  337. PXOR X8, X5
  338. PADDL 160(BP), X4
  339. PADDL X5, X4
  340. PXOR X4, X7
  341. MOVO X7, X8
  342. PSLLL $0x18, X8
  343. PSRLL $0x08, X7
  344. PXOR X8, X7
  345. PADDL X7, X6
  346. PXOR X6, X5
  347. MOVO X5, X8
  348. PSLLL $0x19, X8
  349. PSRLL $0x07, X5
  350. PXOR X8, X5
  351. PSHUFL $0x39, X5, X5
  352. PSHUFL $0x4e, X6, X6
  353. PSHUFL $0x93, X7, X7
  354. PADDL 176(BP), X4
  355. PADDL X5, X4
  356. PXOR X4, X7
  357. MOVO X7, X8
  358. PSLLL $0x10, X8
  359. PSRLL $0x10, X7
  360. PXOR X8, X7
  361. PADDL X7, X6
  362. PXOR X6, X5
  363. MOVO X5, X8
  364. PSLLL $0x14, X8
  365. PSRLL $0x0c, X5
  366. PXOR X8, X5
  367. PADDL 192(BP), X4
  368. PADDL X5, X4
  369. PXOR X4, X7
  370. MOVO X7, X8
  371. PSLLL $0x18, X8
  372. PSRLL $0x08, X7
  373. PXOR X8, X7
  374. PADDL X7, X6
  375. PXOR X6, X5
  376. MOVO X5, X8
  377. PSLLL $0x19, X8
  378. PSRLL $0x07, X5
  379. PXOR X8, X5
  380. PSHUFL $0x39, X7, X7
  381. PSHUFL $0x4e, X6, X6
  382. PSHUFL $0x93, X5, X5
  383. PADDL 208(BP), X4
  384. PADDL X5, X4
  385. PXOR X4, X7
  386. MOVO X7, X8
  387. PSLLL $0x10, X8
  388. PSRLL $0x10, X7
  389. PXOR X8, X7
  390. PADDL X7, X6
  391. PXOR X6, X5
  392. MOVO X5, X8
  393. PSLLL $0x14, X8
  394. PSRLL $0x0c, X5
  395. PXOR X8, X5
  396. PADDL 224(BP), X4
  397. PADDL X5, X4
  398. PXOR X4, X7
  399. MOVO X7, X8
  400. PSLLL $0x18, X8
  401. PSRLL $0x08, X7
  402. PXOR X8, X7
  403. PADDL X7, X6
  404. PXOR X6, X5
  405. MOVO X5, X8
  406. PSLLL $0x19, X8
  407. PSRLL $0x07, X5
  408. PXOR X8, X5
  409. PSHUFL $0x39, X5, X5
  410. PSHUFL $0x4e, X6, X6
  411. PSHUFL $0x93, X7, X7
  412. PADDL 240(BP), X4
  413. PADDL X5, X4
  414. PXOR X4, X7
  415. MOVO X7, X8
  416. PSLLL $0x10, X8
  417. PSRLL $0x10, X7
  418. PXOR X8, X7
  419. PADDL X7, X6
  420. PXOR X6, X5
  421. MOVO X5, X8
  422. PSLLL $0x14, X8
  423. PSRLL $0x0c, X5
  424. PXOR X8, X5
  425. PADDL 256(BP), X4
  426. PADDL X5, X4
  427. PXOR X4, X7
  428. MOVO X7, X8
  429. PSLLL $0x18, X8
  430. PSRLL $0x08, X7
  431. PXOR X8, X7
  432. PADDL X7, X6
  433. PXOR X6, X5
  434. MOVO X5, X8
  435. PSLLL $0x19, X8
  436. PSRLL $0x07, X5
  437. PXOR X8, X5
  438. PSHUFL $0x39, X7, X7
  439. PSHUFL $0x4e, X6, X6
  440. PSHUFL $0x93, X5, X5
  441. PADDL 272(BP), X4
  442. PADDL X5, X4
  443. PXOR X4, X7
  444. MOVO X7, X8
  445. PSLLL $0x10, X8
  446. PSRLL $0x10, X7
  447. PXOR X8, X7
  448. PADDL X7, X6
  449. PXOR X6, X5
  450. MOVO X5, X8
  451. PSLLL $0x14, X8
  452. PSRLL $0x0c, X5
  453. PXOR X8, X5
  454. PADDL 288(BP), X4
  455. PADDL X5, X4
  456. PXOR X4, X7
  457. MOVO X7, X8
  458. PSLLL $0x18, X8
  459. PSRLL $0x08, X7
  460. PXOR X8, X7
  461. PADDL X7, X6
  462. PXOR X6, X5
  463. MOVO X5, X8
  464. PSLLL $0x19, X8
  465. PSRLL $0x07, X5
  466. PXOR X8, X5
  467. PSHUFL $0x39, X5, X5
  468. PSHUFL $0x4e, X6, X6
  469. PSHUFL $0x93, X7, X7
  470. PADDL 304(BP), X4
  471. PADDL X5, X4
  472. PXOR X4, X7
  473. MOVO X7, X8
  474. PSLLL $0x10, X8
  475. PSRLL $0x10, X7
  476. PXOR X8, X7
  477. PADDL X7, X6
  478. PXOR X6, X5
  479. MOVO X5, X8
  480. PSLLL $0x14, X8
  481. PSRLL $0x0c, X5
  482. PXOR X8, X5
  483. PADDL 320(BP), X4
  484. PADDL X5, X4
  485. PXOR X4, X7
  486. MOVO X7, X8
  487. PSLLL $0x18, X8
  488. PSRLL $0x08, X7
  489. PXOR X8, X7
  490. PADDL X7, X6
  491. PXOR X6, X5
  492. MOVO X5, X8
  493. PSLLL $0x19, X8
  494. PSRLL $0x07, X5
  495. PXOR X8, X5
  496. PSHUFL $0x39, X7, X7
  497. PSHUFL $0x4e, X6, X6
  498. PSHUFL $0x93, X5, X5
  499. PADDL 336(BP), X4
  500. PADDL X5, X4
  501. PXOR X4, X7
  502. MOVO X7, X8
  503. PSLLL $0x10, X8
  504. PSRLL $0x10, X7
  505. PXOR X8, X7
  506. PADDL X7, X6
  507. PXOR X6, X5
  508. MOVO X5, X8
  509. PSLLL $0x14, X8
  510. PSRLL $0x0c, X5
  511. PXOR X8, X5
  512. PADDL 352(BP), X4
  513. PADDL X5, X4
  514. PXOR X4, X7
  515. MOVO X7, X8
  516. PSLLL $0x18, X8
  517. PSRLL $0x08, X7
  518. PXOR X8, X7
  519. PADDL X7, X6
  520. PXOR X6, X5
  521. MOVO X5, X8
  522. PSLLL $0x19, X8
  523. PSRLL $0x07, X5
  524. PXOR X8, X5
  525. PSHUFL $0x39, X5, X5
  526. PSHUFL $0x4e, X6, X6
  527. PSHUFL $0x93, X7, X7
  528. PADDL 368(BP), X4
  529. PADDL X5, X4
  530. PXOR X4, X7
  531. MOVO X7, X8
  532. PSLLL $0x10, X8
  533. PSRLL $0x10, X7
  534. PXOR X8, X7
  535. PADDL X7, X6
  536. PXOR X6, X5
  537. MOVO X5, X8
  538. PSLLL $0x14, X8
  539. PSRLL $0x0c, X5
  540. PXOR X8, X5
  541. PADDL 384(BP), X4
  542. PADDL X5, X4
  543. PXOR X4, X7
  544. MOVO X7, X8
  545. PSLLL $0x18, X8
  546. PSRLL $0x08, X7
  547. PXOR X8, X7
  548. PADDL X7, X6
  549. PXOR X6, X5
  550. MOVO X5, X8
  551. PSLLL $0x19, X8
  552. PSRLL $0x07, X5
  553. PXOR X8, X5
  554. PSHUFL $0x39, X7, X7
  555. PSHUFL $0x4e, X6, X6
  556. PSHUFL $0x93, X5, X5
  557. PADDL 400(BP), X4
  558. PADDL X5, X4
  559. PXOR X4, X7
  560. MOVO X7, X8
  561. PSLLL $0x10, X8
  562. PSRLL $0x10, X7
  563. PXOR X8, X7
  564. PADDL X7, X6
  565. PXOR X6, X5
  566. MOVO X5, X8
  567. PSLLL $0x14, X8
  568. PSRLL $0x0c, X5
  569. PXOR X8, X5
  570. PADDL 416(BP), X4
  571. PADDL X5, X4
  572. PXOR X4, X7
  573. MOVO X7, X8
  574. PSLLL $0x18, X8
  575. PSRLL $0x08, X7
  576. PXOR X8, X7
  577. PADDL X7, X6
  578. PXOR X6, X5
  579. MOVO X5, X8
  580. PSLLL $0x19, X8
  581. PSRLL $0x07, X5
  582. PXOR X8, X5
  583. PSHUFL $0x39, X5, X5
  584. PSHUFL $0x4e, X6, X6
  585. PSHUFL $0x93, X7, X7
  586. PADDL 432(BP), X4
  587. PADDL X5, X4
  588. PXOR X4, X7
  589. MOVO X7, X8
  590. PSLLL $0x10, X8
  591. PSRLL $0x10, X7
  592. PXOR X8, X7
  593. PADDL X7, X6
  594. PXOR X6, X5
  595. MOVO X5, X8
  596. PSLLL $0x14, X8
  597. PSRLL $0x0c, X5
  598. PXOR X8, X5
  599. PADDL 448(BP), X4
  600. PADDL X5, X4
  601. PXOR X4, X7
  602. MOVO X7, X8
  603. PSLLL $0x18, X8
  604. PSRLL $0x08, X7
  605. PXOR X8, X7
  606. PADDL X7, X6
  607. PXOR X6, X5
  608. MOVO X5, X8
  609. PSLLL $0x19, X8
  610. PSRLL $0x07, X5
  611. PXOR X8, X5
  612. PSHUFL $0x39, X7, X7
  613. PSHUFL $0x4e, X6, X6
  614. PSHUFL $0x93, X5, X5
  615. PADDL 464(BP), X4
  616. PADDL X5, X4
  617. PXOR X4, X7
  618. MOVO X7, X8
  619. PSLLL $0x10, X8
  620. PSRLL $0x10, X7
  621. PXOR X8, X7
  622. PADDL X7, X6
  623. PXOR X6, X5
  624. MOVO X5, X8
  625. PSLLL $0x14, X8
  626. PSRLL $0x0c, X5
  627. PXOR X8, X5
  628. PADDL 480(BP), X4
  629. PADDL X5, X4
  630. PXOR X4, X7
  631. MOVO X7, X8
  632. PSLLL $0x18, X8
  633. PSRLL $0x08, X7
  634. PXOR X8, X7
  635. PADDL X7, X6
  636. PXOR X6, X5
  637. MOVO X5, X8
  638. PSLLL $0x19, X8
  639. PSRLL $0x07, X5
  640. PXOR X8, X5
  641. PSHUFL $0x39, X5, X5
  642. PSHUFL $0x4e, X6, X6
  643. PSHUFL $0x93, X7, X7
  644. PADDL 496(BP), X4
  645. PADDL X5, X4
  646. PXOR X4, X7
  647. MOVO X7, X8
  648. PSLLL $0x10, X8
  649. PSRLL $0x10, X7
  650. PXOR X8, X7
  651. PADDL X7, X6
  652. PXOR X6, X5
  653. MOVO X5, X8
  654. PSLLL $0x14, X8
  655. PSRLL $0x0c, X5
  656. PXOR X8, X5
  657. PADDL 512(BP), X4
  658. PADDL X5, X4
  659. PXOR X4, X7
  660. MOVO X7, X8
  661. PSLLL $0x18, X8
  662. PSRLL $0x08, X7
  663. PXOR X8, X7
  664. PADDL X7, X6
  665. PXOR X6, X5
  666. MOVO X5, X8
  667. PSLLL $0x19, X8
  668. PSRLL $0x07, X5
  669. PXOR X8, X5
  670. PSHUFL $0x39, X7, X7
  671. PSHUFL $0x4e, X6, X6
  672. PSHUFL $0x93, X5, X5
  673. PADDL 528(BP), X4
  674. PADDL X5, X4
  675. PXOR X4, X7
  676. MOVO X7, X8
  677. PSLLL $0x10, X8
  678. PSRLL $0x10, X7
  679. PXOR X8, X7
  680. PADDL X7, X6
  681. PXOR X6, X5
  682. MOVO X5, X8
  683. PSLLL $0x14, X8
  684. PSRLL $0x0c, X5
  685. PXOR X8, X5
  686. PADDL 544(BP), X4
  687. PADDL X5, X4
  688. PXOR X4, X7
  689. MOVO X7, X8
  690. PSLLL $0x18, X8
  691. PSRLL $0x08, X7
  692. PXOR X8, X7
  693. PADDL X7, X6
  694. PXOR X6, X5
  695. MOVO X5, X8
  696. PSLLL $0x19, X8
  697. PSRLL $0x07, X5
  698. PXOR X8, X5
  699. PSHUFL $0x39, X5, X5
  700. PSHUFL $0x4e, X6, X6
  701. PSHUFL $0x93, X7, X7
  702. PADDL 560(BP), X4
  703. PADDL X5, X4
  704. PXOR X4, X7
  705. MOVO X7, X8
  706. PSLLL $0x10, X8
  707. PSRLL $0x10, X7
  708. PXOR X8, X7
  709. PADDL X7, X6
  710. PXOR X6, X5
  711. MOVO X5, X8
  712. PSLLL $0x14, X8
  713. PSRLL $0x0c, X5
  714. PXOR X8, X5
  715. PADDL 576(BP), X4
  716. PADDL X5, X4
  717. PXOR X4, X7
  718. MOVO X7, X8
  719. PSLLL $0x18, X8
  720. PSRLL $0x08, X7
  721. PXOR X8, X7
  722. PADDL X7, X6
  723. PXOR X6, X5
  724. MOVO X5, X8
  725. PSLLL $0x19, X8
  726. PSRLL $0x07, X5
  727. PXOR X8, X5
  728. PSHUFL $0x39, X7, X7
  729. PSHUFL $0x4e, X6, X6
  730. PSHUFL $0x93, X5, X5
  731. PADDL 592(BP), X4
  732. PADDL X5, X4
  733. PXOR X4, X7
  734. MOVO X7, X8
  735. PSLLL $0x10, X8
  736. PSRLL $0x10, X7
  737. PXOR X8, X7
  738. PADDL X7, X6
  739. PXOR X6, X5
  740. MOVO X5, X8
  741. PSLLL $0x14, X8
  742. PSRLL $0x0c, X5
  743. PXOR X8, X5
  744. PADDL 608(BP), X4
  745. PADDL X5, X4
  746. PXOR X4, X7
  747. MOVO X7, X8
  748. PSLLL $0x18, X8
  749. PSRLL $0x08, X7
  750. PXOR X8, X7
  751. PADDL X7, X6
  752. PXOR X6, X5
  753. MOVO X5, X8
  754. PSLLL $0x19, X8
  755. PSRLL $0x07, X5
  756. PXOR X8, X5
  757. PSHUFL $0x39, X5, X5
  758. PSHUFL $0x4e, X6, X6
  759. PSHUFL $0x93, X7, X7
  760. PADDL 624(BP), X4
  761. PADDL X5, X4
  762. PXOR X4, X7
  763. MOVO X7, X8
  764. PSLLL $0x10, X8
  765. PSRLL $0x10, X7
  766. PXOR X8, X7
  767. PADDL X7, X6
  768. PXOR X6, X5
  769. MOVO X5, X8
  770. PSLLL $0x14, X8
  771. PSRLL $0x0c, X5
  772. PXOR X8, X5
  773. PADDL 640(BP), X4
  774. PADDL X5, X4
  775. PXOR X4, X7
  776. MOVO X7, X8
  777. PSLLL $0x18, X8
  778. PSRLL $0x08, X7
  779. PXOR X8, X7
  780. PADDL X7, X6
  781. PXOR X6, X5
  782. MOVO X5, X8
  783. PSLLL $0x19, X8
  784. PSRLL $0x07, X5
  785. PXOR X8, X5
  786. PSHUFL $0x39, X7, X7
  787. PSHUFL $0x4e, X6, X6
  788. PSHUFL $0x93, X5, X5
  789. PXOR X4, X0
  790. PXOR X5, X1
  791. PXOR X6, X0
  792. PXOR X7, X1
  793. LEAQ 64(SI), SI
  794. SUBQ $0x40, DX
  795. JNE loop
  796. MOVO X15, (BP)
  797. MOVQ (BP), R9
  798. MOVQ R9, (BX)
  799. MOVOU X0, (AX)
  800. MOVOU X1, 16(AX)
  801. RET
  802. DATA iv0<>+0(SB)/4, $0x6a09e667
  803. DATA iv0<>+4(SB)/4, $0xbb67ae85
  804. DATA iv0<>+8(SB)/4, $0x3c6ef372
  805. DATA iv0<>+12(SB)/4, $0xa54ff53a
  806. GLOBL iv0<>(SB), RODATA|NOPTR, $16
  807. DATA iv1<>+0(SB)/4, $0x510e527f
  808. DATA iv1<>+4(SB)/4, $0x9b05688c
  809. DATA iv1<>+8(SB)/4, $0x1f83d9ab
  810. DATA iv1<>+12(SB)/4, $0x5be0cd19
  811. GLOBL iv1<>(SB), RODATA|NOPTR, $16
  812. DATA counter<>+0(SB)/8, $0x0000000000000040
  813. DATA counter<>+8(SB)/8, $0x0000000000000000
  814. GLOBL counter<>(SB), RODATA|NOPTR, $16
  815. DATA rol16<>+0(SB)/8, $0x0504070601000302
  816. DATA rol16<>+8(SB)/8, $0x0d0c0f0e09080b0a
  817. GLOBL rol16<>(SB), RODATA|NOPTR, $16
  818. DATA rol8<>+0(SB)/8, $0x0407060500030201
  819. DATA rol8<>+8(SB)/8, $0x0c0f0e0d080b0a09
  820. GLOBL rol8<>(SB), RODATA|NOPTR, $16
  821. // func hashBlocksSSSE3(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte)
  822. // Requires: SSE2, SSSE3
  823. TEXT ·hashBlocksSSSE3(SB), $672-48
  824. MOVQ h+0(FP), AX
  825. MOVQ c+8(FP), BX
  826. MOVL flag+16(FP), CX
  827. MOVQ blocks_base+24(FP), SI
  828. MOVQ blocks_len+32(FP), DX
  829. MOVQ SP, BP
  830. ADDQ $0x0f, BP
  831. ANDQ $-16, BP
  832. MOVQ (BX), R9
  833. MOVQ R9, (BP)
  834. MOVQ CX, 8(BP)
  835. MOVOU (AX), X0
  836. MOVOU 16(AX), X1
  837. MOVOU iv0<>+0(SB), X2
  838. MOVOU iv1<>+0(SB), X3
  839. MOVOU counter<>+0(SB), X12
  840. MOVOU rol16<>+0(SB), X13
  841. MOVOU rol8<>+0(SB), X14
  842. MOVO (BP), X15
  843. loop:
  844. MOVO X0, X4
  845. MOVO X1, X5
  846. MOVO X2, X6
  847. MOVO X3, X7
  848. PADDQ X12, X15
  849. PXOR X15, X7
  850. MOVQ (SI), R8
  851. MOVQ 8(SI), R9
  852. MOVQ 16(SI), R10
  853. MOVQ 24(SI), R11
  854. MOVQ 32(SI), R12
  855. MOVQ 40(SI), R13
  856. MOVQ 48(SI), R14
  857. MOVQ 56(SI), R15
  858. MOVL R8, 16(BP)
  859. MOVL R8, 116(BP)
  860. MOVL R8, 164(BP)
  861. MOVL R8, 264(BP)
  862. MOVL R8, 288(BP)
  863. MOVL R8, 344(BP)
  864. MOVL R8, 432(BP)
  865. MOVL R8, 512(BP)
  866. MOVL R8, 540(BP)
  867. MOVL R8, 652(BP)
  868. SHRQ $0x20, R8
  869. MOVL R8, 32(BP)
  870. MOVL R8, 112(BP)
  871. MOVL R8, 200(BP)
  872. MOVL R8, 228(BP)
  873. MOVL R8, 320(BP)
  874. MOVL R8, 380(BP)
  875. MOVL R8, 404(BP)
  876. MOVL R8, 488(BP)
  877. MOVL R8, 568(BP)
  878. MOVL R8, 604(BP)
  879. MOVL R9, 20(BP)
  880. MOVL R9, 132(BP)
  881. MOVL R9, 168(BP)
  882. MOVL R9, 240(BP)
  883. MOVL R9, 280(BP)
  884. MOVL R9, 336(BP)
  885. MOVL R9, 456(BP)
  886. MOVL R9, 508(BP)
  887. MOVL R9, 576(BP)
  888. MOVL R9, 608(BP)
  889. SHRQ $0x20, R9
  890. MOVL R9, 36(BP)
  891. MOVL R9, 140(BP)
  892. MOVL R9, 180(BP)
  893. MOVL R9, 212(BP)
  894. MOVL R9, 316(BP)
  895. MOVL R9, 364(BP)
  896. MOVL R9, 452(BP)
  897. MOVL R9, 476(BP)
  898. MOVL R9, 552(BP)
  899. MOVL R9, 632(BP)
  900. MOVL R10, 24(BP)
  901. MOVL R10, 84(BP)
  902. MOVL R10, 204(BP)
  903. MOVL R10, 248(BP)
  904. MOVL R10, 296(BP)
  905. MOVL R10, 368(BP)
  906. MOVL R10, 412(BP)
  907. MOVL R10, 516(BP)
  908. MOVL R10, 584(BP)
  909. MOVL R10, 612(BP)
  910. SHRQ $0x20, R10
  911. MOVL R10, 40(BP)
  912. MOVL R10, 124(BP)
  913. MOVL R10, 152(BP)
  914. MOVL R10, 244(BP)
  915. MOVL R10, 276(BP)
  916. MOVL R10, 388(BP)
  917. MOVL R10, 416(BP)
  918. MOVL R10, 496(BP)
  919. MOVL R10, 588(BP)
  920. MOVL R10, 620(BP)
  921. MOVL R11, 28(BP)
  922. MOVL R11, 108(BP)
  923. MOVL R11, 196(BP)
  924. MOVL R11, 256(BP)
  925. MOVL R11, 312(BP)
  926. MOVL R11, 340(BP)
  927. MOVL R11, 436(BP)
  928. MOVL R11, 520(BP)
  929. MOVL R11, 528(BP)
  930. MOVL R11, 616(BP)
  931. SHRQ $0x20, R11
  932. MOVL R11, 44(BP)
  933. MOVL R11, 136(BP)
  934. MOVL R11, 184(BP)
  935. MOVL R11, 208(BP)
  936. MOVL R11, 292(BP)
  937. MOVL R11, 372(BP)
  938. MOVL R11, 448(BP)
  939. MOVL R11, 468(BP)
  940. MOVL R11, 580(BP)
  941. MOVL R11, 600(BP)
  942. MOVL R12, 48(BP)
  943. MOVL R12, 100(BP)
  944. MOVL R12, 160(BP)
  945. MOVL R12, 268(BP)
  946. MOVL R12, 328(BP)
  947. MOVL R12, 348(BP)
  948. MOVL R12, 444(BP)
  949. MOVL R12, 504(BP)
  950. MOVL R12, 556(BP)
  951. MOVL R12, 596(BP)
  952. SHRQ $0x20, R12
  953. MOVL R12, 64(BP)
  954. MOVL R12, 88(BP)
  955. MOVL R12, 188(BP)
  956. MOVL R12, 224(BP)
  957. MOVL R12, 272(BP)
  958. MOVL R12, 396(BP)
  959. MOVL R12, 440(BP)
  960. MOVL R12, 492(BP)
  961. MOVL R12, 548(BP)
  962. MOVL R12, 628(BP)
  963. MOVL R13, 52(BP)
  964. MOVL R13, 96(BP)
  965. MOVL R13, 176(BP)
  966. MOVL R13, 260(BP)
  967. MOVL R13, 284(BP)
  968. MOVL R13, 356(BP)
  969. MOVL R13, 428(BP)
  970. MOVL R13, 524(BP)
  971. MOVL R13, 572(BP)
  972. MOVL R13, 592(BP)
  973. SHRQ $0x20, R13
  974. MOVL R13, 68(BP)
  975. MOVL R13, 120(BP)
  976. MOVL R13, 144(BP)
  977. MOVL R13, 220(BP)
  978. MOVL R13, 308(BP)
  979. MOVL R13, 360(BP)
  980. MOVL R13, 460(BP)
  981. MOVL R13, 480(BP)
  982. MOVL R13, 536(BP)
  983. MOVL R13, 640(BP)
  984. MOVL R14, 56(BP)
  985. MOVL R14, 128(BP)
  986. MOVL R14, 148(BP)
  987. MOVL R14, 232(BP)
  988. MOVL R14, 324(BP)
  989. MOVL R14, 352(BP)
  990. MOVL R14, 400(BP)
  991. MOVL R14, 472(BP)
  992. MOVL R14, 560(BP)
  993. MOVL R14, 648(BP)
  994. SHRQ $0x20, R14
  995. MOVL R14, 72(BP)
  996. MOVL R14, 92(BP)
  997. MOVL R14, 172(BP)
  998. MOVL R14, 216(BP)
  999. MOVL R14, 332(BP)
  1000. MOVL R14, 384(BP)
  1001. MOVL R14, 424(BP)
  1002. MOVL R14, 464(BP)
  1003. MOVL R14, 564(BP)
  1004. MOVL R14, 636(BP)
  1005. MOVL R15, 60(BP)
  1006. MOVL R15, 80(BP)
  1007. MOVL R15, 192(BP)
  1008. MOVL R15, 236(BP)
  1009. MOVL R15, 304(BP)
  1010. MOVL R15, 392(BP)
  1011. MOVL R15, 408(BP)
  1012. MOVL R15, 484(BP)
  1013. MOVL R15, 532(BP)
  1014. MOVL R15, 644(BP)
  1015. SHRQ $0x20, R15
  1016. MOVL R15, 76(BP)
  1017. MOVL R15, 104(BP)
  1018. MOVL R15, 156(BP)
  1019. MOVL R15, 252(BP)
  1020. MOVL R15, 300(BP)
  1021. MOVL R15, 376(BP)
  1022. MOVL R15, 420(BP)
  1023. MOVL R15, 500(BP)
  1024. MOVL R15, 544(BP)
  1025. MOVL R15, 624(BP)
  1026. PADDL 16(BP), X4
  1027. PADDL X5, X4
  1028. PXOR X4, X7
  1029. PSHUFB X13, X7
  1030. PADDL X7, X6
  1031. PXOR X6, X5
  1032. MOVO X5, X8
  1033. PSLLL $0x14, X8
  1034. PSRLL $0x0c, X5
  1035. PXOR X8, X5
  1036. PADDL 32(BP), X4
  1037. PADDL X5, X4
  1038. PXOR X4, X7
  1039. PSHUFB X14, X7
  1040. PADDL X7, X6
  1041. PXOR X6, X5
  1042. MOVO X5, X8
  1043. PSLLL $0x19, X8
  1044. PSRLL $0x07, X5
  1045. PXOR X8, X5
  1046. PSHUFL $0x39, X5, X5
  1047. PSHUFL $0x4e, X6, X6
  1048. PSHUFL $0x93, X7, X7
  1049. PADDL 48(BP), X4
  1050. PADDL X5, X4
  1051. PXOR X4, X7
  1052. PSHUFB X13, X7
  1053. PADDL X7, X6
  1054. PXOR X6, X5
  1055. MOVO X5, X8
  1056. PSLLL $0x14, X8
  1057. PSRLL $0x0c, X5
  1058. PXOR X8, X5
  1059. PADDL 64(BP), X4
  1060. PADDL X5, X4
  1061. PXOR X4, X7
  1062. PSHUFB X14, X7
  1063. PADDL X7, X6
  1064. PXOR X6, X5
  1065. MOVO X5, X8
  1066. PSLLL $0x19, X8
  1067. PSRLL $0x07, X5
  1068. PXOR X8, X5
  1069. PSHUFL $0x39, X7, X7
  1070. PSHUFL $0x4e, X6, X6
  1071. PSHUFL $0x93, X5, X5
  1072. PADDL 80(BP), X4
  1073. PADDL X5, X4
  1074. PXOR X4, X7
  1075. PSHUFB X13, X7
  1076. PADDL X7, X6
  1077. PXOR X6, X5
  1078. MOVO X5, X8
  1079. PSLLL $0x14, X8
  1080. PSRLL $0x0c, X5
  1081. PXOR X8, X5
  1082. PADDL 96(BP), X4
  1083. PADDL X5, X4
  1084. PXOR X4, X7
  1085. PSHUFB X14, X7
  1086. PADDL X7, X6
  1087. PXOR X6, X5
  1088. MOVO X5, X8
  1089. PSLLL $0x19, X8
  1090. PSRLL $0x07, X5
  1091. PXOR X8, X5
  1092. PSHUFL $0x39, X5, X5
  1093. PSHUFL $0x4e, X6, X6
  1094. PSHUFL $0x93, X7, X7
  1095. PADDL 112(BP), X4
  1096. PADDL X5, X4
  1097. PXOR X4, X7
  1098. PSHUFB X13, X7
  1099. PADDL X7, X6
  1100. PXOR X6, X5
  1101. MOVO X5, X8
  1102. PSLLL $0x14, X8
  1103. PSRLL $0x0c, X5
  1104. PXOR X8, X5
  1105. PADDL 128(BP), X4
  1106. PADDL X5, X4
  1107. PXOR X4, X7
  1108. PSHUFB X14, X7
  1109. PADDL X7, X6
  1110. PXOR X6, X5
  1111. MOVO X5, X8
  1112. PSLLL $0x19, X8
  1113. PSRLL $0x07, X5
  1114. PXOR X8, X5
  1115. PSHUFL $0x39, X7, X7
  1116. PSHUFL $0x4e, X6, X6
  1117. PSHUFL $0x93, X5, X5
  1118. PADDL 144(BP), X4
  1119. PADDL X5, X4
  1120. PXOR X4, X7
  1121. PSHUFB X13, X7
  1122. PADDL X7, X6
  1123. PXOR X6, X5
  1124. MOVO X5, X8
  1125. PSLLL $0x14, X8
  1126. PSRLL $0x0c, X5
  1127. PXOR X8, X5
  1128. PADDL 160(BP), X4
  1129. PADDL X5, X4
  1130. PXOR X4, X7
  1131. PSHUFB X14, X7
  1132. PADDL X7, X6
  1133. PXOR X6, X5
  1134. MOVO X5, X8
  1135. PSLLL $0x19, X8
  1136. PSRLL $0x07, X5
  1137. PXOR X8, X5
  1138. PSHUFL $0x39, X5, X5
  1139. PSHUFL $0x4e, X6, X6
  1140. PSHUFL $0x93, X7, X7
  1141. PADDL 176(BP), X4
  1142. PADDL X5, X4
  1143. PXOR X4, X7
  1144. PSHUFB X13, X7
  1145. PADDL X7, X6
  1146. PXOR X6, X5
  1147. MOVO X5, X8
  1148. PSLLL $0x14, X8
  1149. PSRLL $0x0c, X5
  1150. PXOR X8, X5
  1151. PADDL 192(BP), X4
  1152. PADDL X5, X4
  1153. PXOR X4, X7
  1154. PSHUFB X14, X7
  1155. PADDL X7, X6
  1156. PXOR X6, X5
  1157. MOVO X5, X8
  1158. PSLLL $0x19, X8
  1159. PSRLL $0x07, X5
  1160. PXOR X8, X5
  1161. PSHUFL $0x39, X7, X7
  1162. PSHUFL $0x4e, X6, X6
  1163. PSHUFL $0x93, X5, X5
  1164. PADDL 208(BP), X4
  1165. PADDL X5, X4
  1166. PXOR X4, X7
  1167. PSHUFB X13, X7
  1168. PADDL X7, X6
  1169. PXOR X6, X5
  1170. MOVO X5, X8
  1171. PSLLL $0x14, X8
  1172. PSRLL $0x0c, X5
  1173. PXOR X8, X5
  1174. PADDL 224(BP), X4
  1175. PADDL X5, X4
  1176. PXOR X4, X7
  1177. PSHUFB X14, X7
  1178. PADDL X7, X6
  1179. PXOR X6, X5
  1180. MOVO X5, X8
  1181. PSLLL $0x19, X8
  1182. PSRLL $0x07, X5
  1183. PXOR X8, X5
  1184. PSHUFL $0x39, X5, X5
  1185. PSHUFL $0x4e, X6, X6
  1186. PSHUFL $0x93, X7, X7
  1187. PADDL 240(BP), X4
  1188. PADDL X5, X4
  1189. PXOR X4, X7
  1190. PSHUFB X13, X7
  1191. PADDL X7, X6
  1192. PXOR X6, X5
  1193. MOVO X5, X8
  1194. PSLLL $0x14, X8
  1195. PSRLL $0x0c, X5
  1196. PXOR X8, X5
  1197. PADDL 256(BP), X4
  1198. PADDL X5, X4
  1199. PXOR X4, X7
  1200. PSHUFB X14, X7
  1201. PADDL X7, X6
  1202. PXOR X6, X5
  1203. MOVO X5, X8
  1204. PSLLL $0x19, X8
  1205. PSRLL $0x07, X5
  1206. PXOR X8, X5
  1207. PSHUFL $0x39, X7, X7
  1208. PSHUFL $0x4e, X6, X6
  1209. PSHUFL $0x93, X5, X5
  1210. PADDL 272(BP), X4
  1211. PADDL X5, X4
  1212. PXOR X4, X7
  1213. PSHUFB X13, X7
  1214. PADDL X7, X6
  1215. PXOR X6, X5
  1216. MOVO X5, X8
  1217. PSLLL $0x14, X8
  1218. PSRLL $0x0c, X5
  1219. PXOR X8, X5
  1220. PADDL 288(BP), X4
  1221. PADDL X5, X4
  1222. PXOR X4, X7
  1223. PSHUFB X14, X7
  1224. PADDL X7, X6
  1225. PXOR X6, X5
  1226. MOVO X5, X8
  1227. PSLLL $0x19, X8
  1228. PSRLL $0x07, X5
  1229. PXOR X8, X5
  1230. PSHUFL $0x39, X5, X5
  1231. PSHUFL $0x4e, X6, X6
  1232. PSHUFL $0x93, X7, X7
  1233. PADDL 304(BP), X4
  1234. PADDL X5, X4
  1235. PXOR X4, X7
  1236. PSHUFB X13, X7
  1237. PADDL X7, X6
  1238. PXOR X6, X5
  1239. MOVO X5, X8
  1240. PSLLL $0x14, X8
  1241. PSRLL $0x0c, X5
  1242. PXOR X8, X5
  1243. PADDL 320(BP), X4
  1244. PADDL X5, X4
  1245. PXOR X4, X7
  1246. PSHUFB X14, X7
  1247. PADDL X7, X6
  1248. PXOR X6, X5
  1249. MOVO X5, X8
  1250. PSLLL $0x19, X8
  1251. PSRLL $0x07, X5
  1252. PXOR X8, X5
  1253. PSHUFL $0x39, X7, X7
  1254. PSHUFL $0x4e, X6, X6
  1255. PSHUFL $0x93, X5, X5
  1256. PADDL 336(BP), X4
  1257. PADDL X5, X4
  1258. PXOR X4, X7
  1259. PSHUFB X13, X7
  1260. PADDL X7, X6
  1261. PXOR X6, X5
  1262. MOVO X5, X8
  1263. PSLLL $0x14, X8
  1264. PSRLL $0x0c, X5
  1265. PXOR X8, X5
  1266. PADDL 352(BP), X4
  1267. PADDL X5, X4
  1268. PXOR X4, X7
  1269. PSHUFB X14, X7
  1270. PADDL X7, X6
  1271. PXOR X6, X5
  1272. MOVO X5, X8
  1273. PSLLL $0x19, X8
  1274. PSRLL $0x07, X5
  1275. PXOR X8, X5
  1276. PSHUFL $0x39, X5, X5
  1277. PSHUFL $0x4e, X6, X6
  1278. PSHUFL $0x93, X7, X7
  1279. PADDL 368(BP), X4
  1280. PADDL X5, X4
  1281. PXOR X4, X7
  1282. PSHUFB X13, X7
  1283. PADDL X7, X6
  1284. PXOR X6, X5
  1285. MOVO X5, X8
  1286. PSLLL $0x14, X8
  1287. PSRLL $0x0c, X5
  1288. PXOR X8, X5
  1289. PADDL 384(BP), X4
  1290. PADDL X5, X4
  1291. PXOR X4, X7
  1292. PSHUFB X14, X7
  1293. PADDL X7, X6
  1294. PXOR X6, X5
  1295. MOVO X5, X8
  1296. PSLLL $0x19, X8
  1297. PSRLL $0x07, X5
  1298. PXOR X8, X5
  1299. PSHUFL $0x39, X7, X7
  1300. PSHUFL $0x4e, X6, X6
  1301. PSHUFL $0x93, X5, X5
  1302. PADDL 400(BP), X4
  1303. PADDL X5, X4
  1304. PXOR X4, X7
  1305. PSHUFB X13, X7
  1306. PADDL X7, X6
  1307. PXOR X6, X5
  1308. MOVO X5, X8
  1309. PSLLL $0x14, X8
  1310. PSRLL $0x0c, X5
  1311. PXOR X8, X5
  1312. PADDL 416(BP), X4
  1313. PADDL X5, X4
  1314. PXOR X4, X7
  1315. PSHUFB X14, X7
  1316. PADDL X7, X6
  1317. PXOR X6, X5
  1318. MOVO X5, X8
  1319. PSLLL $0x19, X8
  1320. PSRLL $0x07, X5
  1321. PXOR X8, X5
  1322. PSHUFL $0x39, X5, X5
  1323. PSHUFL $0x4e, X6, X6
  1324. PSHUFL $0x93, X7, X7
  1325. PADDL 432(BP), X4
  1326. PADDL X5, X4
  1327. PXOR X4, X7
  1328. PSHUFB X13, X7
  1329. PADDL X7, X6
  1330. PXOR X6, X5
  1331. MOVO X5, X8
  1332. PSLLL $0x14, X8
  1333. PSRLL $0x0c, X5
  1334. PXOR X8, X5
  1335. PADDL 448(BP), X4
  1336. PADDL X5, X4
  1337. PXOR X4, X7
  1338. PSHUFB X14, X7
  1339. PADDL X7, X6
  1340. PXOR X6, X5
  1341. MOVO X5, X8
  1342. PSLLL $0x19, X8
  1343. PSRLL $0x07, X5
  1344. PXOR X8, X5
  1345. PSHUFL $0x39, X7, X7
  1346. PSHUFL $0x4e, X6, X6
  1347. PSHUFL $0x93, X5, X5
  1348. PADDL 464(BP), X4
  1349. PADDL X5, X4
  1350. PXOR X4, X7
  1351. PSHUFB X13, X7
  1352. PADDL X7, X6
  1353. PXOR X6, X5
  1354. MOVO X5, X8
  1355. PSLLL $0x14, X8
  1356. PSRLL $0x0c, X5
  1357. PXOR X8, X5
  1358. PADDL 480(BP), X4
  1359. PADDL X5, X4
  1360. PXOR X4, X7
  1361. PSHUFB X14, X7
  1362. PADDL X7, X6
  1363. PXOR X6, X5
  1364. MOVO X5, X8
  1365. PSLLL $0x19, X8
  1366. PSRLL $0x07, X5
  1367. PXOR X8, X5
  1368. PSHUFL $0x39, X5, X5
  1369. PSHUFL $0x4e, X6, X6
  1370. PSHUFL $0x93, X7, X7
  1371. PADDL 496(BP), X4
  1372. PADDL X5, X4
  1373. PXOR X4, X7
  1374. PSHUFB X13, X7
  1375. PADDL X7, X6
  1376. PXOR X6, X5
  1377. MOVO X5, X8
  1378. PSLLL $0x14, X8
  1379. PSRLL $0x0c, X5
  1380. PXOR X8, X5
  1381. PADDL 512(BP), X4
  1382. PADDL X5, X4
  1383. PXOR X4, X7
  1384. PSHUFB X14, X7
  1385. PADDL X7, X6
  1386. PXOR X6, X5
  1387. MOVO X5, X8
  1388. PSLLL $0x19, X8
  1389. PSRLL $0x07, X5
  1390. PXOR X8, X5
  1391. PSHUFL $0x39, X7, X7
  1392. PSHUFL $0x4e, X6, X6
  1393. PSHUFL $0x93, X5, X5
  1394. PADDL 528(BP), X4
  1395. PADDL X5, X4
  1396. PXOR X4, X7
  1397. PSHUFB X13, X7
  1398. PADDL X7, X6
  1399. PXOR X6, X5
  1400. MOVO X5, X8
  1401. PSLLL $0x14, X8
  1402. PSRLL $0x0c, X5
  1403. PXOR X8, X5
  1404. PADDL 544(BP), X4
  1405. PADDL X5, X4
  1406. PXOR X4, X7
  1407. PSHUFB X14, X7
  1408. PADDL X7, X6
  1409. PXOR X6, X5
  1410. MOVO X5, X8
  1411. PSLLL $0x19, X8
  1412. PSRLL $0x07, X5
  1413. PXOR X8, X5
  1414. PSHUFL $0x39, X5, X5
  1415. PSHUFL $0x4e, X6, X6
  1416. PSHUFL $0x93, X7, X7
  1417. PADDL 560(BP), X4
  1418. PADDL X5, X4
  1419. PXOR X4, X7
  1420. PSHUFB X13, X7
  1421. PADDL X7, X6
  1422. PXOR X6, X5
  1423. MOVO X5, X8
  1424. PSLLL $0x14, X8
  1425. PSRLL $0x0c, X5
  1426. PXOR X8, X5
  1427. PADDL 576(BP), X4
  1428. PADDL X5, X4
  1429. PXOR X4, X7
  1430. PSHUFB X14, X7
  1431. PADDL X7, X6
  1432. PXOR X6, X5
  1433. MOVO X5, X8
  1434. PSLLL $0x19, X8
  1435. PSRLL $0x07, X5
  1436. PXOR X8, X5
  1437. PSHUFL $0x39, X7, X7
  1438. PSHUFL $0x4e, X6, X6
  1439. PSHUFL $0x93, X5, X5
  1440. PADDL 592(BP), X4
  1441. PADDL X5, X4
  1442. PXOR X4, X7
  1443. PSHUFB X13, X7
  1444. PADDL X7, X6
  1445. PXOR X6, X5
  1446. MOVO X5, X8
  1447. PSLLL $0x14, X8
  1448. PSRLL $0x0c, X5
  1449. PXOR X8, X5
  1450. PADDL 608(BP), X4
  1451. PADDL X5, X4
  1452. PXOR X4, X7
  1453. PSHUFB X14, X7
  1454. PADDL X7, X6
  1455. PXOR X6, X5
  1456. MOVO X5, X8
  1457. PSLLL $0x19, X8
  1458. PSRLL $0x07, X5
  1459. PXOR X8, X5
  1460. PSHUFL $0x39, X5, X5
  1461. PSHUFL $0x4e, X6, X6
  1462. PSHUFL $0x93, X7, X7
  1463. PADDL 624(BP), X4
  1464. PADDL X5, X4
  1465. PXOR X4, X7
  1466. PSHUFB X13, X7
  1467. PADDL X7, X6
  1468. PXOR X6, X5
  1469. MOVO X5, X8
  1470. PSLLL $0x14, X8
  1471. PSRLL $0x0c, X5
  1472. PXOR X8, X5
  1473. PADDL 640(BP), X4
  1474. PADDL X5, X4
  1475. PXOR X4, X7
  1476. PSHUFB X14, X7
  1477. PADDL X7, X6
  1478. PXOR X6, X5
  1479. MOVO X5, X8
  1480. PSLLL $0x19, X8
  1481. PSRLL $0x07, X5
  1482. PXOR X8, X5
  1483. PSHUFL $0x39, X7, X7
  1484. PSHUFL $0x4e, X6, X6
  1485. PSHUFL $0x93, X5, X5
  1486. PXOR X4, X0
  1487. PXOR X5, X1
  1488. PXOR X6, X0
  1489. PXOR X7, X1
  1490. LEAQ 64(SI), SI
  1491. SUBQ $0x40, DX
  1492. JNE loop
  1493. MOVO X15, (BP)
  1494. MOVQ (BP), R9
  1495. MOVQ R9, (BX)
  1496. MOVOU X0, (AX)
  1497. MOVOU X1, 16(AX)
  1498. RET
  1499. // func hashBlocksSSE4(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte)
  1500. // Requires: SSE2, SSE4.1, SSSE3
  1501. TEXT ·hashBlocksSSE4(SB), $32-48
  1502. MOVQ h+0(FP), AX
  1503. MOVQ c+8(FP), BX
  1504. MOVL flag+16(FP), CX
  1505. MOVQ blocks_base+24(FP), SI
  1506. MOVQ blocks_len+32(FP), DX
  1507. MOVQ SP, BP
  1508. ADDQ $0x0f, BP
  1509. ANDQ $-16, BP
  1510. MOVQ (BX), R9
  1511. MOVQ R9, (BP)
  1512. MOVQ CX, 8(BP)
  1513. MOVOU (AX), X0
  1514. MOVOU 16(AX), X1
  1515. MOVOU iv0<>+0(SB), X2
  1516. MOVOU iv1<>+0(SB), X3
  1517. MOVOU counter<>+0(SB), X12
  1518. MOVOU rol16<>+0(SB), X13
  1519. MOVOU rol8<>+0(SB), X14
  1520. MOVO (BP), X15
  1521. loop:
  1522. MOVO X0, X4
  1523. MOVO X1, X5
  1524. MOVO X2, X6
  1525. MOVO X3, X7
  1526. PADDQ X12, X15
  1527. PXOR X15, X7
  1528. MOVL (SI), X8
  1529. PINSRD $0x01, 8(SI), X8
  1530. PINSRD $0x02, 16(SI), X8
  1531. PINSRD $0x03, 24(SI), X8
  1532. MOVL 4(SI), X9
  1533. PINSRD $0x01, 12(SI), X9
  1534. PINSRD $0x02, 20(SI), X9
  1535. PINSRD $0x03, 28(SI), X9
  1536. MOVL 32(SI), X10
  1537. PINSRD $0x01, 40(SI), X10
  1538. PINSRD $0x02, 48(SI), X10
  1539. PINSRD $0x03, 56(SI), X10
  1540. MOVL 36(SI), X11
  1541. PINSRD $0x01, 44(SI), X11
  1542. PINSRD $0x02, 52(SI), X11
  1543. PINSRD $0x03, 60(SI), X11
  1544. PADDL X8, X4
  1545. PADDL X5, X4
  1546. PXOR X4, X7
  1547. PSHUFB X13, X7
  1548. PADDL X7, X6
  1549. PXOR X6, X5
  1550. MOVO X5, X8
  1551. PSLLL $0x14, X8
  1552. PSRLL $0x0c, X5
  1553. PXOR X8, X5
  1554. PADDL X9, X4
  1555. PADDL X5, X4
  1556. PXOR X4, X7
  1557. PSHUFB X14, X7
  1558. PADDL X7, X6
  1559. PXOR X6, X5
  1560. MOVO X5, X8
  1561. PSLLL $0x19, X8
  1562. PSRLL $0x07, X5
  1563. PXOR X8, X5
  1564. PSHUFL $0x39, X5, X5
  1565. PSHUFL $0x4e, X6, X6
  1566. PSHUFL $0x93, X7, X7
  1567. PADDL X10, X4
  1568. PADDL X5, X4
  1569. PXOR X4, X7
  1570. PSHUFB X13, X7
  1571. PADDL X7, X6
  1572. PXOR X6, X5
  1573. MOVO X5, X8
  1574. PSLLL $0x14, X8
  1575. PSRLL $0x0c, X5
  1576. PXOR X8, X5
  1577. PADDL X11, X4
  1578. PADDL X5, X4
  1579. PXOR X4, X7
  1580. PSHUFB X14, X7
  1581. PADDL X7, X6
  1582. PXOR X6, X5
  1583. MOVO X5, X8
  1584. PSLLL $0x19, X8
  1585. PSRLL $0x07, X5
  1586. PXOR X8, X5
  1587. PSHUFL $0x39, X7, X7
  1588. PSHUFL $0x4e, X6, X6
  1589. PSHUFL $0x93, X5, X5
  1590. MOVL 56(SI), X8
  1591. PINSRD $0x01, 16(SI), X8
  1592. PINSRD $0x02, 36(SI), X8
  1593. PINSRD $0x03, 52(SI), X8
  1594. MOVL 40(SI), X9
  1595. PINSRD $0x01, 32(SI), X9
  1596. PINSRD $0x02, 60(SI), X9
  1597. PINSRD $0x03, 24(SI), X9
  1598. MOVL 4(SI), X10
  1599. PINSRD $0x01, (SI), X10
  1600. PINSRD $0x02, 44(SI), X10
  1601. PINSRD $0x03, 20(SI), X10
  1602. MOVL 48(SI), X11
  1603. PINSRD $0x01, 8(SI), X11
  1604. PINSRD $0x02, 28(SI), X11
  1605. PINSRD $0x03, 12(SI), X11
  1606. PADDL X8, X4
  1607. PADDL X5, X4
  1608. PXOR X4, X7
  1609. PSHUFB X13, X7
  1610. PADDL X7, X6
  1611. PXOR X6, X5
  1612. MOVO X5, X8
  1613. PSLLL $0x14, X8
  1614. PSRLL $0x0c, X5
  1615. PXOR X8, X5
  1616. PADDL X9, X4
  1617. PADDL X5, X4
  1618. PXOR X4, X7
  1619. PSHUFB X14, X7
  1620. PADDL X7, X6
  1621. PXOR X6, X5
  1622. MOVO X5, X8
  1623. PSLLL $0x19, X8
  1624. PSRLL $0x07, X5
  1625. PXOR X8, X5
  1626. PSHUFL $0x39, X5, X5
  1627. PSHUFL $0x4e, X6, X6
  1628. PSHUFL $0x93, X7, X7
  1629. PADDL X10, X4
  1630. PADDL X5, X4
  1631. PXOR X4, X7
  1632. PSHUFB X13, X7
  1633. PADDL X7, X6
  1634. PXOR X6, X5
  1635. MOVO X5, X8
  1636. PSLLL $0x14, X8
  1637. PSRLL $0x0c, X5
  1638. PXOR X8, X5
  1639. PADDL X11, X4
  1640. PADDL X5, X4
  1641. PXOR X4, X7
  1642. PSHUFB X14, X7
  1643. PADDL X7, X6
  1644. PXOR X6, X5
  1645. MOVO X5, X8
  1646. PSLLL $0x19, X8
  1647. PSRLL $0x07, X5
  1648. PXOR X8, X5
  1649. PSHUFL $0x39, X7, X7
  1650. PSHUFL $0x4e, X6, X6
  1651. PSHUFL $0x93, X5, X5
  1652. MOVL 44(SI), X8
  1653. PINSRD $0x01, 48(SI), X8
  1654. PINSRD $0x02, 20(SI), X8
  1655. PINSRD $0x03, 60(SI), X8
  1656. MOVL 32(SI), X9
  1657. PINSRD $0x01, (SI), X9
  1658. PINSRD $0x02, 8(SI), X9
  1659. PINSRD $0x03, 52(SI), X9
  1660. MOVL 40(SI), X10
  1661. PINSRD $0x01, 12(SI), X10
  1662. PINSRD $0x02, 28(SI), X10
  1663. PINSRD $0x03, 36(SI), X10
  1664. MOVL 56(SI), X11
  1665. PINSRD $0x01, 24(SI), X11
  1666. PINSRD $0x02, 4(SI), X11
  1667. PINSRD $0x03, 16(SI), X11
  1668. PADDL X8, X4
  1669. PADDL X5, X4
  1670. PXOR X4, X7
  1671. PSHUFB X13, X7
  1672. PADDL X7, X6
  1673. PXOR X6, X5
  1674. MOVO X5, X8
  1675. PSLLL $0x14, X8
  1676. PSRLL $0x0c, X5
  1677. PXOR X8, X5
  1678. PADDL X9, X4
  1679. PADDL X5, X4
  1680. PXOR X4, X7
  1681. PSHUFB X14, X7
  1682. PADDL X7, X6
  1683. PXOR X6, X5
  1684. MOVO X5, X8
  1685. PSLLL $0x19, X8
  1686. PSRLL $0x07, X5
  1687. PXOR X8, X5
  1688. PSHUFL $0x39, X5, X5
  1689. PSHUFL $0x4e, X6, X6
  1690. PSHUFL $0x93, X7, X7
  1691. PADDL X10, X4
  1692. PADDL X5, X4
  1693. PXOR X4, X7
  1694. PSHUFB X13, X7
  1695. PADDL X7, X6
  1696. PXOR X6, X5
  1697. MOVO X5, X8
  1698. PSLLL $0x14, X8
  1699. PSRLL $0x0c, X5
  1700. PXOR X8, X5
  1701. PADDL X11, X4
  1702. PADDL X5, X4
  1703. PXOR X4, X7
  1704. PSHUFB X14, X7
  1705. PADDL X7, X6
  1706. PXOR X6, X5
  1707. MOVO X5, X8
  1708. PSLLL $0x19, X8
  1709. PSRLL $0x07, X5
  1710. PXOR X8, X5
  1711. PSHUFL $0x39, X7, X7
  1712. PSHUFL $0x4e, X6, X6
  1713. PSHUFL $0x93, X5, X5
  1714. MOVL 28(SI), X8
  1715. PINSRD $0x01, 12(SI), X8
  1716. PINSRD $0x02, 52(SI), X8
  1717. PINSRD $0x03, 44(SI), X8
  1718. MOVL 36(SI), X9
  1719. PINSRD $0x01, 4(SI), X9
  1720. PINSRD $0x02, 48(SI), X9
  1721. PINSRD $0x03, 56(SI), X9
  1722. MOVL 8(SI), X10
  1723. PINSRD $0x01, 20(SI), X10
  1724. PINSRD $0x02, 16(SI), X10
  1725. PINSRD $0x03, 60(SI), X10
  1726. MOVL 24(SI), X11
  1727. PINSRD $0x01, 40(SI), X11
  1728. PINSRD $0x02, (SI), X11
  1729. PINSRD $0x03, 32(SI), X11
  1730. PADDL X8, X4
  1731. PADDL X5, X4
  1732. PXOR X4, X7
  1733. PSHUFB X13, X7
  1734. PADDL X7, X6
  1735. PXOR X6, X5
  1736. MOVO X5, X8
  1737. PSLLL $0x14, X8
  1738. PSRLL $0x0c, X5
  1739. PXOR X8, X5
  1740. PADDL X9, X4
  1741. PADDL X5, X4
  1742. PXOR X4, X7
  1743. PSHUFB X14, X7
  1744. PADDL X7, X6
  1745. PXOR X6, X5
  1746. MOVO X5, X8
  1747. PSLLL $0x19, X8
  1748. PSRLL $0x07, X5
  1749. PXOR X8, X5
  1750. PSHUFL $0x39, X5, X5
  1751. PSHUFL $0x4e, X6, X6
  1752. PSHUFL $0x93, X7, X7
  1753. PADDL X10, X4
  1754. PADDL X5, X4
  1755. PXOR X4, X7
  1756. PSHUFB X13, X7
  1757. PADDL X7, X6
  1758. PXOR X6, X5
  1759. MOVO X5, X8
  1760. PSLLL $0x14, X8
  1761. PSRLL $0x0c, X5
  1762. PXOR X8, X5
  1763. PADDL X11, X4
  1764. PADDL X5, X4
  1765. PXOR X4, X7
  1766. PSHUFB X14, X7
  1767. PADDL X7, X6
  1768. PXOR X6, X5
  1769. MOVO X5, X8
  1770. PSLLL $0x19, X8
  1771. PSRLL $0x07, X5
  1772. PXOR X8, X5
  1773. PSHUFL $0x39, X7, X7
  1774. PSHUFL $0x4e, X6, X6
  1775. PSHUFL $0x93, X5, X5
  1776. MOVL 36(SI), X8
  1777. PINSRD $0x01, 20(SI), X8
  1778. PINSRD $0x02, 8(SI), X8
  1779. PINSRD $0x03, 40(SI), X8
  1780. MOVL (SI), X9
  1781. PINSRD $0x01, 28(SI), X9
  1782. PINSRD $0x02, 16(SI), X9
  1783. PINSRD $0x03, 60(SI), X9
  1784. MOVL 56(SI), X10
  1785. PINSRD $0x01, 44(SI), X10
  1786. PINSRD $0x02, 24(SI), X10
  1787. PINSRD $0x03, 12(SI), X10
  1788. MOVL 4(SI), X11
  1789. PINSRD $0x01, 48(SI), X11
  1790. PINSRD $0x02, 32(SI), X11
  1791. PINSRD $0x03, 52(SI), X11
  1792. PADDL X8, X4
  1793. PADDL X5, X4
  1794. PXOR X4, X7
  1795. PSHUFB X13, X7
  1796. PADDL X7, X6
  1797. PXOR X6, X5
  1798. MOVO X5, X8
  1799. PSLLL $0x14, X8
  1800. PSRLL $0x0c, X5
  1801. PXOR X8, X5
  1802. PADDL X9, X4
  1803. PADDL X5, X4
  1804. PXOR X4, X7
  1805. PSHUFB X14, X7
  1806. PADDL X7, X6
  1807. PXOR X6, X5
  1808. MOVO X5, X8
  1809. PSLLL $0x19, X8
  1810. PSRLL $0x07, X5
  1811. PXOR X8, X5
  1812. PSHUFL $0x39, X5, X5
  1813. PSHUFL $0x4e, X6, X6
  1814. PSHUFL $0x93, X7, X7
  1815. PADDL X10, X4
  1816. PADDL X5, X4
  1817. PXOR X4, X7
  1818. PSHUFB X13, X7
  1819. PADDL X7, X6
  1820. PXOR X6, X5
  1821. MOVO X5, X8
  1822. PSLLL $0x14, X8
  1823. PSRLL $0x0c, X5
  1824. PXOR X8, X5
  1825. PADDL X11, X4
  1826. PADDL X5, X4
  1827. PXOR X4, X7
  1828. PSHUFB X14, X7
  1829. PADDL X7, X6
  1830. PXOR X6, X5
  1831. MOVO X5, X8
  1832. PSLLL $0x19, X8
  1833. PSRLL $0x07, X5
  1834. PXOR X8, X5
  1835. PSHUFL $0x39, X7, X7
  1836. PSHUFL $0x4e, X6, X6
  1837. PSHUFL $0x93, X5, X5
  1838. MOVL 8(SI), X8
  1839. PINSRD $0x01, 24(SI), X8
  1840. PINSRD $0x02, (SI), X8
  1841. PINSRD $0x03, 32(SI), X8
  1842. MOVL 48(SI), X9
  1843. PINSRD $0x01, 40(SI), X9
  1844. PINSRD $0x02, 44(SI), X9
  1845. PINSRD $0x03, 12(SI), X9
  1846. MOVL 16(SI), X10
  1847. PINSRD $0x01, 28(SI), X10
  1848. PINSRD $0x02, 60(SI), X10
  1849. PINSRD $0x03, 4(SI), X10
  1850. MOVL 52(SI), X11
  1851. PINSRD $0x01, 20(SI), X11
  1852. PINSRD $0x02, 56(SI), X11
  1853. PINSRD $0x03, 36(SI), X11
  1854. PADDL X8, X4
  1855. PADDL X5, X4
  1856. PXOR X4, X7
  1857. PSHUFB X13, X7
  1858. PADDL X7, X6
  1859. PXOR X6, X5
  1860. MOVO X5, X8
  1861. PSLLL $0x14, X8
  1862. PSRLL $0x0c, X5
  1863. PXOR X8, X5
  1864. PADDL X9, X4
  1865. PADDL X5, X4
  1866. PXOR X4, X7
  1867. PSHUFB X14, X7
  1868. PADDL X7, X6
  1869. PXOR X6, X5
  1870. MOVO X5, X8
  1871. PSLLL $0x19, X8
  1872. PSRLL $0x07, X5
  1873. PXOR X8, X5
  1874. PSHUFL $0x39, X5, X5
  1875. PSHUFL $0x4e, X6, X6
  1876. PSHUFL $0x93, X7, X7
  1877. PADDL X10, X4
  1878. PADDL X5, X4
  1879. PXOR X4, X7
  1880. PSHUFB X13, X7
  1881. PADDL X7, X6
  1882. PXOR X6, X5
  1883. MOVO X5, X8
  1884. PSLLL $0x14, X8
  1885. PSRLL $0x0c, X5
  1886. PXOR X8, X5
  1887. PADDL X11, X4
  1888. PADDL X5, X4
  1889. PXOR X4, X7
  1890. PSHUFB X14, X7
  1891. PADDL X7, X6
  1892. PXOR X6, X5
  1893. MOVO X5, X8
  1894. PSLLL $0x19, X8
  1895. PSRLL $0x07, X5
  1896. PXOR X8, X5
  1897. PSHUFL $0x39, X7, X7
  1898. PSHUFL $0x4e, X6, X6
  1899. PSHUFL $0x93, X5, X5
  1900. MOVL 48(SI), X8
  1901. PINSRD $0x01, 4(SI), X8
  1902. PINSRD $0x02, 56(SI), X8
  1903. PINSRD $0x03, 16(SI), X8
  1904. MOVL 20(SI), X9
  1905. PINSRD $0x01, 60(SI), X9
  1906. PINSRD $0x02, 52(SI), X9
  1907. PINSRD $0x03, 40(SI), X9
  1908. MOVL (SI), X10
  1909. PINSRD $0x01, 24(SI), X10
  1910. PINSRD $0x02, 36(SI), X10
  1911. PINSRD $0x03, 32(SI), X10
  1912. MOVL 28(SI), X11
  1913. PINSRD $0x01, 12(SI), X11
  1914. PINSRD $0x02, 8(SI), X11
  1915. PINSRD $0x03, 44(SI), X11
  1916. PADDL X8, X4
  1917. PADDL X5, X4
  1918. PXOR X4, X7
  1919. PSHUFB X13, X7
  1920. PADDL X7, X6
  1921. PXOR X6, X5
  1922. MOVO X5, X8
  1923. PSLLL $0x14, X8
  1924. PSRLL $0x0c, X5
  1925. PXOR X8, X5
  1926. PADDL X9, X4
  1927. PADDL X5, X4
  1928. PXOR X4, X7
  1929. PSHUFB X14, X7
  1930. PADDL X7, X6
  1931. PXOR X6, X5
  1932. MOVO X5, X8
  1933. PSLLL $0x19, X8
  1934. PSRLL $0x07, X5
  1935. PXOR X8, X5
  1936. PSHUFL $0x39, X5, X5
  1937. PSHUFL $0x4e, X6, X6
  1938. PSHUFL $0x93, X7, X7
  1939. PADDL X10, X4
  1940. PADDL X5, X4
  1941. PXOR X4, X7
  1942. PSHUFB X13, X7
  1943. PADDL X7, X6
  1944. PXOR X6, X5
  1945. MOVO X5, X8
  1946. PSLLL $0x14, X8
  1947. PSRLL $0x0c, X5
  1948. PXOR X8, X5
  1949. PADDL X11, X4
  1950. PADDL X5, X4
  1951. PXOR X4, X7
  1952. PSHUFB X14, X7
  1953. PADDL X7, X6
  1954. PXOR X6, X5
  1955. MOVO X5, X8
  1956. PSLLL $0x19, X8
  1957. PSRLL $0x07, X5
  1958. PXOR X8, X5
  1959. PSHUFL $0x39, X7, X7
  1960. PSHUFL $0x4e, X6, X6
  1961. PSHUFL $0x93, X5, X5
  1962. MOVL 52(SI), X8
  1963. PINSRD $0x01, 28(SI), X8
  1964. PINSRD $0x02, 48(SI), X8
  1965. PINSRD $0x03, 12(SI), X8
  1966. MOVL 44(SI), X9
  1967. PINSRD $0x01, 56(SI), X9
  1968. PINSRD $0x02, 4(SI), X9
  1969. PINSRD $0x03, 36(SI), X9
  1970. MOVL 20(SI), X10
  1971. PINSRD $0x01, 60(SI), X10
  1972. PINSRD $0x02, 32(SI), X10
  1973. PINSRD $0x03, 8(SI), X10
  1974. MOVL (SI), X11
  1975. PINSRD $0x01, 16(SI), X11
  1976. PINSRD $0x02, 24(SI), X11
  1977. PINSRD $0x03, 40(SI), X11
  1978. PADDL X8, X4
  1979. PADDL X5, X4
  1980. PXOR X4, X7
  1981. PSHUFB X13, X7
  1982. PADDL X7, X6
  1983. PXOR X6, X5
  1984. MOVO X5, X8
  1985. PSLLL $0x14, X8
  1986. PSRLL $0x0c, X5
  1987. PXOR X8, X5
  1988. PADDL X9, X4
  1989. PADDL X5, X4
  1990. PXOR X4, X7
  1991. PSHUFB X14, X7
  1992. PADDL X7, X6
  1993. PXOR X6, X5
  1994. MOVO X5, X8
  1995. PSLLL $0x19, X8
  1996. PSRLL $0x07, X5
  1997. PXOR X8, X5
  1998. PSHUFL $0x39, X5, X5
  1999. PSHUFL $0x4e, X6, X6
  2000. PSHUFL $0x93, X7, X7
  2001. PADDL X10, X4
  2002. PADDL X5, X4
  2003. PXOR X4, X7
  2004. PSHUFB X13, X7
  2005. PADDL X7, X6
  2006. PXOR X6, X5
  2007. MOVO X5, X8
  2008. PSLLL $0x14, X8
  2009. PSRLL $0x0c, X5
  2010. PXOR X8, X5
  2011. PADDL X11, X4
  2012. PADDL X5, X4
  2013. PXOR X4, X7
  2014. PSHUFB X14, X7
  2015. PADDL X7, X6
  2016. PXOR X6, X5
  2017. MOVO X5, X8
  2018. PSLLL $0x19, X8
  2019. PSRLL $0x07, X5
  2020. PXOR X8, X5
  2021. PSHUFL $0x39, X7, X7
  2022. PSHUFL $0x4e, X6, X6
  2023. PSHUFL $0x93, X5, X5
  2024. MOVL 24(SI), X8
  2025. PINSRD $0x01, 56(SI), X8
  2026. PINSRD $0x02, 44(SI), X8
  2027. PINSRD $0x03, (SI), X8
  2028. MOVL 60(SI), X9
  2029. PINSRD $0x01, 36(SI), X9
  2030. PINSRD $0x02, 12(SI), X9
  2031. PINSRD $0x03, 32(SI), X9
  2032. MOVL 48(SI), X10
  2033. PINSRD $0x01, 52(SI), X10
  2034. PINSRD $0x02, 4(SI), X10
  2035. PINSRD $0x03, 40(SI), X10
  2036. MOVL 8(SI), X11
  2037. PINSRD $0x01, 28(SI), X11
  2038. PINSRD $0x02, 16(SI), X11
  2039. PINSRD $0x03, 20(SI), X11
  2040. PADDL X8, X4
  2041. PADDL X5, X4
  2042. PXOR X4, X7
  2043. PSHUFB X13, X7
  2044. PADDL X7, X6
  2045. PXOR X6, X5
  2046. MOVO X5, X8
  2047. PSLLL $0x14, X8
  2048. PSRLL $0x0c, X5
  2049. PXOR X8, X5
  2050. PADDL X9, X4
  2051. PADDL X5, X4
  2052. PXOR X4, X7
  2053. PSHUFB X14, X7
  2054. PADDL X7, X6
  2055. PXOR X6, X5
  2056. MOVO X5, X8
  2057. PSLLL $0x19, X8
  2058. PSRLL $0x07, X5
  2059. PXOR X8, X5
  2060. PSHUFL $0x39, X5, X5
  2061. PSHUFL $0x4e, X6, X6
  2062. PSHUFL $0x93, X7, X7
  2063. PADDL X10, X4
  2064. PADDL X5, X4
  2065. PXOR X4, X7
  2066. PSHUFB X13, X7
  2067. PADDL X7, X6
  2068. PXOR X6, X5
  2069. MOVO X5, X8
  2070. PSLLL $0x14, X8
  2071. PSRLL $0x0c, X5
  2072. PXOR X8, X5
  2073. PADDL X11, X4
  2074. PADDL X5, X4
  2075. PXOR X4, X7
  2076. PSHUFB X14, X7
  2077. PADDL X7, X6
  2078. PXOR X6, X5
  2079. MOVO X5, X8
  2080. PSLLL $0x19, X8
  2081. PSRLL $0x07, X5
  2082. PXOR X8, X5
  2083. PSHUFL $0x39, X7, X7
  2084. PSHUFL $0x4e, X6, X6
  2085. PSHUFL $0x93, X5, X5
  2086. MOVL 40(SI), X8
  2087. PINSRD $0x01, 32(SI), X8
  2088. PINSRD $0x02, 28(SI), X8
  2089. PINSRD $0x03, 4(SI), X8
  2090. MOVL 8(SI), X9
  2091. PINSRD $0x01, 16(SI), X9
  2092. PINSRD $0x02, 24(SI), X9
  2093. PINSRD $0x03, 20(SI), X9
  2094. MOVL 60(SI), X10
  2095. PINSRD $0x01, 36(SI), X10
  2096. PINSRD $0x02, 12(SI), X10
  2097. PINSRD $0x03, 52(SI), X10
  2098. MOVL 44(SI), X11
  2099. PINSRD $0x01, 56(SI), X11
  2100. PINSRD $0x02, 48(SI), X11
  2101. PINSRD $0x03, (SI), X11
  2102. PADDL X8, X4
  2103. PADDL X5, X4
  2104. PXOR X4, X7
  2105. PSHUFB X13, X7
  2106. PADDL X7, X6
  2107. PXOR X6, X5
  2108. MOVO X5, X8
  2109. PSLLL $0x14, X8
  2110. PSRLL $0x0c, X5
  2111. PXOR X8, X5
  2112. PADDL X9, X4
  2113. PADDL X5, X4
  2114. PXOR X4, X7
  2115. PSHUFB X14, X7
  2116. PADDL X7, X6
  2117. PXOR X6, X5
  2118. MOVO X5, X8
  2119. PSLLL $0x19, X8
  2120. PSRLL $0x07, X5
  2121. PXOR X8, X5
  2122. PSHUFL $0x39, X5, X5
  2123. PSHUFL $0x4e, X6, X6
  2124. PSHUFL $0x93, X7, X7
  2125. PADDL X10, X4
  2126. PADDL X5, X4
  2127. PXOR X4, X7
  2128. PSHUFB X13, X7
  2129. PADDL X7, X6
  2130. PXOR X6, X5
  2131. MOVO X5, X8
  2132. PSLLL $0x14, X8
  2133. PSRLL $0x0c, X5
  2134. PXOR X8, X5
  2135. PADDL X11, X4
  2136. PADDL X5, X4
  2137. PXOR X4, X7
  2138. PSHUFB X14, X7
  2139. PADDL X7, X6
  2140. PXOR X6, X5
  2141. MOVO X5, X8
  2142. PSLLL $0x19, X8
  2143. PSRLL $0x07, X5
  2144. PXOR X8, X5
  2145. PSHUFL $0x39, X7, X7
  2146. PSHUFL $0x4e, X6, X6
  2147. PSHUFL $0x93, X5, X5
  2148. PXOR X4, X0
  2149. PXOR X5, X1
  2150. PXOR X6, X0
  2151. PXOR X7, X1
  2152. LEAQ 64(SI), SI
  2153. SUBQ $0x40, DX
  2154. JNE loop
  2155. MOVO X15, (BP)
  2156. MOVQ (BP), R9
  2157. MOVQ R9, (BX)
  2158. MOVOU X0, (AX)
  2159. MOVOU X1, 16(AX)
  2160. RET