goc25519sm_amd64.s 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824
  1. // Copyright © 2021 Jeffrey H. Johnson <trnsz@pobox.com>.
  2. // Copyright © 2021 Filippo Valsorda.
  3. // Copyright © 2012 The Go Authors.
  4. //
  5. // All rights reserved.
  6. // Use of this source code is governed by the BSD-style
  7. // license that can be found in the LICENSE file.
  8. // This code was translated into a form compatible with
  9. // Go's 6a from the public domain sources in SUPERCOP:
  10. // https://bench.cr.yp.to/supercop.html
  11. // +build amd64,gc,!purego
  12. #include "textflag.h"
  13. #define REDMASK51 0x0007FFFFFFFFFFFF
  14. // These constants cannot be encoded in non-MOVQ immediates.
  15. // We access them directly from memory instead.
  16. DATA ·_121666_213(SB)/8, $996687872
  17. GLOBL ·_121666_213(SB), 8, $8
  18. DATA ·_2P0(SB)/8, $0xFFFFFFFFFFFDA
  19. GLOBL ·_2P0(SB), 8, $8
  20. DATA ·_2P1234(SB)/8, $0xFFFFFFFFFFFFE
  21. GLOBL ·_2P1234(SB), 8, $8
  22. // func freeze(inout *[5]uint64)
  23. TEXT ·freeze(SB), 7, $0-8
  24. MOVQ inout+0(FP), DI
  25. MOVQ 0(DI), SI
  26. MOVQ 8(DI), DX
  27. MOVQ 16(DI), CX
  28. MOVQ 24(DI), R8
  29. MOVQ 32(DI), R9
  30. MOVQ $REDMASK51, AX
  31. MOVQ AX, R10
  32. SUBQ $18, R10
  33. MOVQ $3, R11
  34. REDUCELOOP:
  35. MOVQ SI, R12
  36. SHRQ $51, R12
  37. ANDQ AX, SI
  38. ADDQ R12, DX
  39. MOVQ DX, R12
  40. SHRQ $51, R12
  41. ANDQ AX, DX
  42. ADDQ R12, CX
  43. MOVQ CX, R12
  44. SHRQ $51, R12
  45. ANDQ AX, CX
  46. ADDQ R12, R8
  47. MOVQ R8, R12
  48. SHRQ $51, R12
  49. ANDQ AX, R8
  50. ADDQ R12, R9
  51. MOVQ R9, R12
  52. SHRQ $51, R12
  53. ANDQ AX, R9
  54. IMUL3Q $19, R12, R12
  55. ADDQ R12, SI
  56. SUBQ $1, R11
  57. JA REDUCELOOP
  58. MOVQ $1, R12
  59. CMPQ R10, SI
  60. CMOVQLT R11, R12
  61. CMPQ AX, DX
  62. CMOVQNE R11, R12
  63. CMPQ AX, CX
  64. CMOVQNE R11, R12
  65. CMPQ AX, R8
  66. CMOVQNE R11, R12
  67. CMPQ AX, R9
  68. CMOVQNE R11, R12
  69. NEGQ R12
  70. ANDQ R12, AX
  71. ANDQ R12, R10
  72. SUBQ R10, SI
  73. SUBQ AX, DX
  74. SUBQ AX, CX
  75. SUBQ AX, R8
  76. SUBQ AX, R9
  77. MOVQ SI, 0(DI)
  78. MOVQ DX, 8(DI)
  79. MOVQ CX, 16(DI)
  80. MOVQ R8, 24(DI)
  81. MOVQ R9, 32(DI)
  82. RET
  83. // func ladderstep(inout *[5][5]uint64)
  84. TEXT ·ladderstep(SB), 0, $296-8
  85. MOVQ inout+0(FP), DI
  86. MOVQ 40(DI), SI
  87. MOVQ 48(DI), DX
  88. MOVQ 56(DI), CX
  89. MOVQ 64(DI), R8
  90. MOVQ 72(DI), R9
  91. MOVQ SI, AX
  92. MOVQ DX, R10
  93. MOVQ CX, R11
  94. MOVQ R8, R12
  95. MOVQ R9, R13
  96. ADDQ ·_2P0(SB), AX
  97. ADDQ ·_2P1234(SB), R10
  98. ADDQ ·_2P1234(SB), R11
  99. ADDQ ·_2P1234(SB), R12
  100. ADDQ ·_2P1234(SB), R13
  101. ADDQ 80(DI), SI
  102. ADDQ 88(DI), DX
  103. ADDQ 96(DI), CX
  104. ADDQ 104(DI), R8
  105. ADDQ 112(DI), R9
  106. SUBQ 80(DI), AX
  107. SUBQ 88(DI), R10
  108. SUBQ 96(DI), R11
  109. SUBQ 104(DI), R12
  110. SUBQ 112(DI), R13
  111. MOVQ SI, 0(SP)
  112. MOVQ DX, 8(SP)
  113. MOVQ CX, 16(SP)
  114. MOVQ R8, 24(SP)
  115. MOVQ R9, 32(SP)
  116. MOVQ AX, 40(SP)
  117. MOVQ R10, 48(SP)
  118. MOVQ R11, 56(SP)
  119. MOVQ R12, 64(SP)
  120. MOVQ R13, 72(SP)
  121. MOVQ 40(SP), AX
  122. MULQ 40(SP)
  123. MOVQ AX, SI
  124. MOVQ DX, CX
  125. MOVQ 40(SP), AX
  126. SHLQ $1, AX
  127. MULQ 48(SP)
  128. MOVQ AX, R8
  129. MOVQ DX, R9
  130. MOVQ 40(SP), AX
  131. SHLQ $1, AX
  132. MULQ 56(SP)
  133. MOVQ AX, R10
  134. MOVQ DX, R11
  135. MOVQ 40(SP), AX
  136. SHLQ $1, AX
  137. MULQ 64(SP)
  138. MOVQ AX, R12
  139. MOVQ DX, R13
  140. MOVQ 40(SP), AX
  141. SHLQ $1, AX
  142. MULQ 72(SP)
  143. MOVQ AX, R14
  144. MOVQ DX, R15
  145. MOVQ 48(SP), AX
  146. MULQ 48(SP)
  147. ADDQ AX, R10
  148. ADCQ DX, R11
  149. MOVQ 48(SP), AX
  150. SHLQ $1, AX
  151. MULQ 56(SP)
  152. ADDQ AX, R12
  153. ADCQ DX, R13
  154. MOVQ 48(SP), AX
  155. SHLQ $1, AX
  156. MULQ 64(SP)
  157. ADDQ AX, R14
  158. ADCQ DX, R15
  159. MOVQ 48(SP), DX
  160. IMUL3Q $38, DX, AX
  161. MULQ 72(SP)
  162. ADDQ AX, SI
  163. ADCQ DX, CX
  164. MOVQ 56(SP), AX
  165. MULQ 56(SP)
  166. ADDQ AX, R14
  167. ADCQ DX, R15
  168. MOVQ 56(SP), DX
  169. IMUL3Q $38, DX, AX
  170. MULQ 64(SP)
  171. ADDQ AX, SI
  172. ADCQ DX, CX
  173. MOVQ 56(SP), DX
  174. IMUL3Q $38, DX, AX
  175. MULQ 72(SP)
  176. ADDQ AX, R8
  177. ADCQ DX, R9
  178. MOVQ 64(SP), DX
  179. IMUL3Q $19, DX, AX
  180. MULQ 64(SP)
  181. ADDQ AX, R8
  182. ADCQ DX, R9
  183. MOVQ 64(SP), DX
  184. IMUL3Q $38, DX, AX
  185. MULQ 72(SP)
  186. ADDQ AX, R10
  187. ADCQ DX, R11
  188. MOVQ 72(SP), DX
  189. IMUL3Q $19, DX, AX
  190. MULQ 72(SP)
  191. ADDQ AX, R12
  192. ADCQ DX, R13
  193. MOVQ $REDMASK51, DX
  194. SHLQ $13, SI, CX
  195. ANDQ DX, SI
  196. SHLQ $13, R8, R9
  197. ANDQ DX, R8
  198. ADDQ CX, R8
  199. SHLQ $13, R10, R11
  200. ANDQ DX, R10
  201. ADDQ R9, R10
  202. SHLQ $13, R12, R13
  203. ANDQ DX, R12
  204. ADDQ R11, R12
  205. SHLQ $13, R14, R15
  206. ANDQ DX, R14
  207. ADDQ R13, R14
  208. IMUL3Q $19, R15, CX
  209. ADDQ CX, SI
  210. MOVQ SI, CX
  211. SHRQ $51, CX
  212. ADDQ R8, CX
  213. ANDQ DX, SI
  214. MOVQ CX, R8
  215. SHRQ $51, CX
  216. ADDQ R10, CX
  217. ANDQ DX, R8
  218. MOVQ CX, R9
  219. SHRQ $51, CX
  220. ADDQ R12, CX
  221. ANDQ DX, R9
  222. MOVQ CX, AX
  223. SHRQ $51, CX
  224. ADDQ R14, CX
  225. ANDQ DX, AX
  226. MOVQ CX, R10
  227. SHRQ $51, CX
  228. IMUL3Q $19, CX, CX
  229. ADDQ CX, SI
  230. ANDQ DX, R10
  231. MOVQ SI, 80(SP)
  232. MOVQ R8, 88(SP)
  233. MOVQ R9, 96(SP)
  234. MOVQ AX, 104(SP)
  235. MOVQ R10, 112(SP)
  236. MOVQ 0(SP), AX
  237. MULQ 0(SP)
  238. MOVQ AX, SI
  239. MOVQ DX, CX
  240. MOVQ 0(SP), AX
  241. SHLQ $1, AX
  242. MULQ 8(SP)
  243. MOVQ AX, R8
  244. MOVQ DX, R9
  245. MOVQ 0(SP), AX
  246. SHLQ $1, AX
  247. MULQ 16(SP)
  248. MOVQ AX, R10
  249. MOVQ DX, R11
  250. MOVQ 0(SP), AX
  251. SHLQ $1, AX
  252. MULQ 24(SP)
  253. MOVQ AX, R12
  254. MOVQ DX, R13
  255. MOVQ 0(SP), AX
  256. SHLQ $1, AX
  257. MULQ 32(SP)
  258. MOVQ AX, R14
  259. MOVQ DX, R15
  260. MOVQ 8(SP), AX
  261. MULQ 8(SP)
  262. ADDQ AX, R10
  263. ADCQ DX, R11
  264. MOVQ 8(SP), AX
  265. SHLQ $1, AX
  266. MULQ 16(SP)
  267. ADDQ AX, R12
  268. ADCQ DX, R13
  269. MOVQ 8(SP), AX
  270. SHLQ $1, AX
  271. MULQ 24(SP)
  272. ADDQ AX, R14
  273. ADCQ DX, R15
  274. MOVQ 8(SP), DX
  275. IMUL3Q $38, DX, AX
  276. MULQ 32(SP)
  277. ADDQ AX, SI
  278. ADCQ DX, CX
  279. MOVQ 16(SP), AX
  280. MULQ 16(SP)
  281. ADDQ AX, R14
  282. ADCQ DX, R15
  283. MOVQ 16(SP), DX
  284. IMUL3Q $38, DX, AX
  285. MULQ 24(SP)
  286. ADDQ AX, SI
  287. ADCQ DX, CX
  288. MOVQ 16(SP), DX
  289. IMUL3Q $38, DX, AX
  290. MULQ 32(SP)
  291. ADDQ AX, R8
  292. ADCQ DX, R9
  293. MOVQ 24(SP), DX
  294. IMUL3Q $19, DX, AX
  295. MULQ 24(SP)
  296. ADDQ AX, R8
  297. ADCQ DX, R9
  298. MOVQ 24(SP), DX
  299. IMUL3Q $38, DX, AX
  300. MULQ 32(SP)
  301. ADDQ AX, R10
  302. ADCQ DX, R11
  303. MOVQ 32(SP), DX
  304. IMUL3Q $19, DX, AX
  305. MULQ 32(SP)
  306. ADDQ AX, R12
  307. ADCQ DX, R13
  308. MOVQ $REDMASK51, DX
  309. SHLQ $13, SI, CX
  310. ANDQ DX, SI
  311. SHLQ $13, R8, R9
  312. ANDQ DX, R8
  313. ADDQ CX, R8
  314. SHLQ $13, R10, R11
  315. ANDQ DX, R10
  316. ADDQ R9, R10
  317. SHLQ $13, R12, R13
  318. ANDQ DX, R12
  319. ADDQ R11, R12
  320. SHLQ $13, R14, R15
  321. ANDQ DX, R14
  322. ADDQ R13, R14
  323. IMUL3Q $19, R15, CX
  324. ADDQ CX, SI
  325. MOVQ SI, CX
  326. SHRQ $51, CX
  327. ADDQ R8, CX
  328. ANDQ DX, SI
  329. MOVQ CX, R8
  330. SHRQ $51, CX
  331. ADDQ R10, CX
  332. ANDQ DX, R8
  333. MOVQ CX, R9
  334. SHRQ $51, CX
  335. ADDQ R12, CX
  336. ANDQ DX, R9
  337. MOVQ CX, AX
  338. SHRQ $51, CX
  339. ADDQ R14, CX
  340. ANDQ DX, AX
  341. MOVQ CX, R10
  342. SHRQ $51, CX
  343. IMUL3Q $19, CX, CX
  344. ADDQ CX, SI
  345. ANDQ DX, R10
  346. MOVQ SI, 120(SP)
  347. MOVQ R8, 128(SP)
  348. MOVQ R9, 136(SP)
  349. MOVQ AX, 144(SP)
  350. MOVQ R10, 152(SP)
  351. MOVQ SI, SI
  352. MOVQ R8, DX
  353. MOVQ R9, CX
  354. MOVQ AX, R8
  355. MOVQ R10, R9
  356. ADDQ ·_2P0(SB), SI
  357. ADDQ ·_2P1234(SB), DX
  358. ADDQ ·_2P1234(SB), CX
  359. ADDQ ·_2P1234(SB), R8
  360. ADDQ ·_2P1234(SB), R9
  361. SUBQ 80(SP), SI
  362. SUBQ 88(SP), DX
  363. SUBQ 96(SP), CX
  364. SUBQ 104(SP), R8
  365. SUBQ 112(SP), R9
  366. MOVQ SI, 160(SP)
  367. MOVQ DX, 168(SP)
  368. MOVQ CX, 176(SP)
  369. MOVQ R8, 184(SP)
  370. MOVQ R9, 192(SP)
  371. MOVQ 120(DI), SI
  372. MOVQ 128(DI), DX
  373. MOVQ 136(DI), CX
  374. MOVQ 144(DI), R8
  375. MOVQ 152(DI), R9
  376. MOVQ SI, AX
  377. MOVQ DX, R10
  378. MOVQ CX, R11
  379. MOVQ R8, R12
  380. MOVQ R9, R13
  381. ADDQ ·_2P0(SB), AX
  382. ADDQ ·_2P1234(SB), R10
  383. ADDQ ·_2P1234(SB), R11
  384. ADDQ ·_2P1234(SB), R12
  385. ADDQ ·_2P1234(SB), R13
  386. ADDQ 160(DI), SI
  387. ADDQ 168(DI), DX
  388. ADDQ 176(DI), CX
  389. ADDQ 184(DI), R8
  390. ADDQ 192(DI), R9
  391. SUBQ 160(DI), AX
  392. SUBQ 168(DI), R10
  393. SUBQ 176(DI), R11
  394. SUBQ 184(DI), R12
  395. SUBQ 192(DI), R13
  396. MOVQ SI, 200(SP)
  397. MOVQ DX, 208(SP)
  398. MOVQ CX, 216(SP)
  399. MOVQ R8, 224(SP)
  400. MOVQ R9, 232(SP)
  401. MOVQ AX, 240(SP)
  402. MOVQ R10, 248(SP)
  403. MOVQ R11, 256(SP)
  404. MOVQ R12, 264(SP)
  405. MOVQ R13, 272(SP)
  406. MOVQ 224(SP), SI
  407. IMUL3Q $19, SI, AX
  408. MOVQ AX, 280(SP)
  409. MULQ 56(SP)
  410. MOVQ AX, SI
  411. MOVQ DX, CX
  412. MOVQ 232(SP), DX
  413. IMUL3Q $19, DX, AX
  414. MOVQ AX, 288(SP)
  415. MULQ 48(SP)
  416. ADDQ AX, SI
  417. ADCQ DX, CX
  418. MOVQ 200(SP), AX
  419. MULQ 40(SP)
  420. ADDQ AX, SI
  421. ADCQ DX, CX
  422. MOVQ 200(SP), AX
  423. MULQ 48(SP)
  424. MOVQ AX, R8
  425. MOVQ DX, R9
  426. MOVQ 200(SP), AX
  427. MULQ 56(SP)
  428. MOVQ AX, R10
  429. MOVQ DX, R11
  430. MOVQ 200(SP), AX
  431. MULQ 64(SP)
  432. MOVQ AX, R12
  433. MOVQ DX, R13
  434. MOVQ 200(SP), AX
  435. MULQ 72(SP)
  436. MOVQ AX, R14
  437. MOVQ DX, R15
  438. MOVQ 208(SP), AX
  439. MULQ 40(SP)
  440. ADDQ AX, R8
  441. ADCQ DX, R9
  442. MOVQ 208(SP), AX
  443. MULQ 48(SP)
  444. ADDQ AX, R10
  445. ADCQ DX, R11
  446. MOVQ 208(SP), AX
  447. MULQ 56(SP)
  448. ADDQ AX, R12
  449. ADCQ DX, R13
  450. MOVQ 208(SP), AX
  451. MULQ 64(SP)
  452. ADDQ AX, R14
  453. ADCQ DX, R15
  454. MOVQ 208(SP), DX
  455. IMUL3Q $19, DX, AX
  456. MULQ 72(SP)
  457. ADDQ AX, SI
  458. ADCQ DX, CX
  459. MOVQ 216(SP), AX
  460. MULQ 40(SP)
  461. ADDQ AX, R10
  462. ADCQ DX, R11
  463. MOVQ 216(SP), AX
  464. MULQ 48(SP)
  465. ADDQ AX, R12
  466. ADCQ DX, R13
  467. MOVQ 216(SP), AX
  468. MULQ 56(SP)
  469. ADDQ AX, R14
  470. ADCQ DX, R15
  471. MOVQ 216(SP), DX
  472. IMUL3Q $19, DX, AX
  473. MULQ 64(SP)
  474. ADDQ AX, SI
  475. ADCQ DX, CX
  476. MOVQ 216(SP), DX
  477. IMUL3Q $19, DX, AX
  478. MULQ 72(SP)
  479. ADDQ AX, R8
  480. ADCQ DX, R9
  481. MOVQ 224(SP), AX
  482. MULQ 40(SP)
  483. ADDQ AX, R12
  484. ADCQ DX, R13
  485. MOVQ 224(SP), AX
  486. MULQ 48(SP)
  487. ADDQ AX, R14
  488. ADCQ DX, R15
  489. MOVQ 280(SP), AX
  490. MULQ 64(SP)
  491. ADDQ AX, R8
  492. ADCQ DX, R9
  493. MOVQ 280(SP), AX
  494. MULQ 72(SP)
  495. ADDQ AX, R10
  496. ADCQ DX, R11
  497. MOVQ 232(SP), AX
  498. MULQ 40(SP)
  499. ADDQ AX, R14
  500. ADCQ DX, R15
  501. MOVQ 288(SP), AX
  502. MULQ 56(SP)
  503. ADDQ AX, R8
  504. ADCQ DX, R9
  505. MOVQ 288(SP), AX
  506. MULQ 64(SP)
  507. ADDQ AX, R10
  508. ADCQ DX, R11
  509. MOVQ 288(SP), AX
  510. MULQ 72(SP)
  511. ADDQ AX, R12
  512. ADCQ DX, R13
  513. MOVQ $REDMASK51, DX
  514. SHLQ $13, SI, CX
  515. ANDQ DX, SI
  516. SHLQ $13, R8, R9
  517. ANDQ DX, R8
  518. ADDQ CX, R8
  519. SHLQ $13, R10, R11
  520. ANDQ DX, R10
  521. ADDQ R9, R10
  522. SHLQ $13, R12, R13
  523. ANDQ DX, R12
  524. ADDQ R11, R12
  525. SHLQ $13, R14, R15
  526. ANDQ DX, R14
  527. ADDQ R13, R14
  528. IMUL3Q $19, R15, CX
  529. ADDQ CX, SI
  530. MOVQ SI, CX
  531. SHRQ $51, CX
  532. ADDQ R8, CX
  533. MOVQ CX, R8
  534. SHRQ $51, CX
  535. ANDQ DX, SI
  536. ADDQ R10, CX
  537. MOVQ CX, R9
  538. SHRQ $51, CX
  539. ANDQ DX, R8
  540. ADDQ R12, CX
  541. MOVQ CX, AX
  542. SHRQ $51, CX
  543. ANDQ DX, R9
  544. ADDQ R14, CX
  545. MOVQ CX, R10
  546. SHRQ $51, CX
  547. ANDQ DX, AX
  548. IMUL3Q $19, CX, CX
  549. ADDQ CX, SI
  550. ANDQ DX, R10
  551. MOVQ SI, 40(SP)
  552. MOVQ R8, 48(SP)
  553. MOVQ R9, 56(SP)
  554. MOVQ AX, 64(SP)
  555. MOVQ R10, 72(SP)
  556. MOVQ 264(SP), SI
  557. IMUL3Q $19, SI, AX
  558. MOVQ AX, 200(SP)
  559. MULQ 16(SP)
  560. MOVQ AX, SI
  561. MOVQ DX, CX
  562. MOVQ 272(SP), DX
  563. IMUL3Q $19, DX, AX
  564. MOVQ AX, 208(SP)
  565. MULQ 8(SP)
  566. ADDQ AX, SI
  567. ADCQ DX, CX
  568. MOVQ 240(SP), AX
  569. MULQ 0(SP)
  570. ADDQ AX, SI
  571. ADCQ DX, CX
  572. MOVQ 240(SP), AX
  573. MULQ 8(SP)
  574. MOVQ AX, R8
  575. MOVQ DX, R9
  576. MOVQ 240(SP), AX
  577. MULQ 16(SP)
  578. MOVQ AX, R10
  579. MOVQ DX, R11
  580. MOVQ 240(SP), AX
  581. MULQ 24(SP)
  582. MOVQ AX, R12
  583. MOVQ DX, R13
  584. MOVQ 240(SP), AX
  585. MULQ 32(SP)
  586. MOVQ AX, R14
  587. MOVQ DX, R15
  588. MOVQ 248(SP), AX
  589. MULQ 0(SP)
  590. ADDQ AX, R8
  591. ADCQ DX, R9
  592. MOVQ 248(SP), AX
  593. MULQ 8(SP)
  594. ADDQ AX, R10
  595. ADCQ DX, R11
  596. MOVQ 248(SP), AX
  597. MULQ 16(SP)
  598. ADDQ AX, R12
  599. ADCQ DX, R13
  600. MOVQ 248(SP), AX
  601. MULQ 24(SP)
  602. ADDQ AX, R14
  603. ADCQ DX, R15
  604. MOVQ 248(SP), DX
  605. IMUL3Q $19, DX, AX
  606. MULQ 32(SP)
  607. ADDQ AX, SI
  608. ADCQ DX, CX
  609. MOVQ 256(SP), AX
  610. MULQ 0(SP)
  611. ADDQ AX, R10
  612. ADCQ DX, R11
  613. MOVQ 256(SP), AX
  614. MULQ 8(SP)
  615. ADDQ AX, R12
  616. ADCQ DX, R13
  617. MOVQ 256(SP), AX
  618. MULQ 16(SP)
  619. ADDQ AX, R14
  620. ADCQ DX, R15
  621. MOVQ 256(SP), DX
  622. IMUL3Q $19, DX, AX
  623. MULQ 24(SP)
  624. ADDQ AX, SI
  625. ADCQ DX, CX
  626. MOVQ 256(SP), DX
  627. IMUL3Q $19, DX, AX
  628. MULQ 32(SP)
  629. ADDQ AX, R8
  630. ADCQ DX, R9
  631. MOVQ 264(SP), AX
  632. MULQ 0(SP)
  633. ADDQ AX, R12
  634. ADCQ DX, R13
  635. MOVQ 264(SP), AX
  636. MULQ 8(SP)
  637. ADDQ AX, R14
  638. ADCQ DX, R15
  639. MOVQ 200(SP), AX
  640. MULQ 24(SP)
  641. ADDQ AX, R8
  642. ADCQ DX, R9
  643. MOVQ 200(SP), AX
  644. MULQ 32(SP)
  645. ADDQ AX, R10
  646. ADCQ DX, R11
  647. MOVQ 272(SP), AX
  648. MULQ 0(SP)
  649. ADDQ AX, R14
  650. ADCQ DX, R15
  651. MOVQ 208(SP), AX
  652. MULQ 16(SP)
  653. ADDQ AX, R8
  654. ADCQ DX, R9
  655. MOVQ 208(SP), AX
  656. MULQ 24(SP)
  657. ADDQ AX, R10
  658. ADCQ DX, R11
  659. MOVQ 208(SP), AX
  660. MULQ 32(SP)
  661. ADDQ AX, R12
  662. ADCQ DX, R13
  663. MOVQ $REDMASK51, DX
  664. SHLQ $13, SI, CX
  665. ANDQ DX, SI
  666. SHLQ $13, R8, R9
  667. ANDQ DX, R8
  668. ADDQ CX, R8
  669. SHLQ $13, R10, R11
  670. ANDQ DX, R10
  671. ADDQ R9, R10
  672. SHLQ $13, R12, R13
  673. ANDQ DX, R12
  674. ADDQ R11, R12
  675. SHLQ $13, R14, R15
  676. ANDQ DX, R14
  677. ADDQ R13, R14
  678. IMUL3Q $19, R15, CX
  679. ADDQ CX, SI
  680. MOVQ SI, CX
  681. SHRQ $51, CX
  682. ADDQ R8, CX
  683. MOVQ CX, R8
  684. SHRQ $51, CX
  685. ANDQ DX, SI
  686. ADDQ R10, CX
  687. MOVQ CX, R9
  688. SHRQ $51, CX
  689. ANDQ DX, R8
  690. ADDQ R12, CX
  691. MOVQ CX, AX
  692. SHRQ $51, CX
  693. ANDQ DX, R9
  694. ADDQ R14, CX
  695. MOVQ CX, R10
  696. SHRQ $51, CX
  697. ANDQ DX, AX
  698. IMUL3Q $19, CX, CX
  699. ADDQ CX, SI
  700. ANDQ DX, R10
  701. MOVQ SI, DX
  702. MOVQ R8, CX
  703. MOVQ R9, R11
  704. MOVQ AX, R12
  705. MOVQ R10, R13
  706. ADDQ ·_2P0(SB), DX
  707. ADDQ ·_2P1234(SB), CX
  708. ADDQ ·_2P1234(SB), R11
  709. ADDQ ·_2P1234(SB), R12
  710. ADDQ ·_2P1234(SB), R13
  711. ADDQ 40(SP), SI
  712. ADDQ 48(SP), R8
  713. ADDQ 56(SP), R9
  714. ADDQ 64(SP), AX
  715. ADDQ 72(SP), R10
  716. SUBQ 40(SP), DX
  717. SUBQ 48(SP), CX
  718. SUBQ 56(SP), R11
  719. SUBQ 64(SP), R12
  720. SUBQ 72(SP), R13
  721. MOVQ SI, 120(DI)
  722. MOVQ R8, 128(DI)
  723. MOVQ R9, 136(DI)
  724. MOVQ AX, 144(DI)
  725. MOVQ R10, 152(DI)
  726. MOVQ DX, 160(DI)
  727. MOVQ CX, 168(DI)
  728. MOVQ R11, 176(DI)
  729. MOVQ R12, 184(DI)
  730. MOVQ R13, 192(DI)
  731. MOVQ 120(DI), AX
  732. MULQ 120(DI)
  733. MOVQ AX, SI
  734. MOVQ DX, CX
  735. MOVQ 120(DI), AX
  736. SHLQ $1, AX
  737. MULQ 128(DI)
  738. MOVQ AX, R8
  739. MOVQ DX, R9
  740. MOVQ 120(DI), AX
  741. SHLQ $1, AX
  742. MULQ 136(DI)
  743. MOVQ AX, R10
  744. MOVQ DX, R11
  745. MOVQ 120(DI), AX
  746. SHLQ $1, AX
  747. MULQ 144(DI)
  748. MOVQ AX, R12
  749. MOVQ DX, R13
  750. MOVQ 120(DI), AX
  751. SHLQ $1, AX
  752. MULQ 152(DI)
  753. MOVQ AX, R14
  754. MOVQ DX, R15
  755. MOVQ 128(DI), AX
  756. MULQ 128(DI)
  757. ADDQ AX, R10
  758. ADCQ DX, R11
  759. MOVQ 128(DI), AX
  760. SHLQ $1, AX
  761. MULQ 136(DI)
  762. ADDQ AX, R12
  763. ADCQ DX, R13
  764. MOVQ 128(DI), AX
  765. SHLQ $1, AX
  766. MULQ 144(DI)
  767. ADDQ AX, R14
  768. ADCQ DX, R15
  769. MOVQ 128(DI), DX
  770. IMUL3Q $38, DX, AX
  771. MULQ 152(DI)
  772. ADDQ AX, SI
  773. ADCQ DX, CX
  774. MOVQ 136(DI), AX
  775. MULQ 136(DI)
  776. ADDQ AX, R14
  777. ADCQ DX, R15
  778. MOVQ 136(DI), DX
  779. IMUL3Q $38, DX, AX
  780. MULQ 144(DI)
  781. ADDQ AX, SI
  782. ADCQ DX, CX
  783. MOVQ 136(DI), DX
  784. IMUL3Q $38, DX, AX
  785. MULQ 152(DI)
  786. ADDQ AX, R8
  787. ADCQ DX, R9
  788. MOVQ 144(DI), DX
  789. IMUL3Q $19, DX, AX
  790. MULQ 144(DI)
  791. ADDQ AX, R8
  792. ADCQ DX, R9
  793. MOVQ 144(DI), DX
  794. IMUL3Q $38, DX, AX
  795. MULQ 152(DI)
  796. ADDQ AX, R10
  797. ADCQ DX, R11
  798. MOVQ 152(DI), DX
  799. IMUL3Q $19, DX, AX
  800. MULQ 152(DI)
  801. ADDQ AX, R12
  802. ADCQ DX, R13
  803. MOVQ $REDMASK51, DX
  804. SHLQ $13, SI, CX
  805. ANDQ DX, SI
  806. SHLQ $13, R8, R9
  807. ANDQ DX, R8
  808. ADDQ CX, R8
  809. SHLQ $13, R10, R11
  810. ANDQ DX, R10
  811. ADDQ R9, R10
  812. SHLQ $13, R12, R13
  813. ANDQ DX, R12
  814. ADDQ R11, R12
  815. SHLQ $13, R14, R15
  816. ANDQ DX, R14
  817. ADDQ R13, R14
  818. IMUL3Q $19, R15, CX
  819. ADDQ CX, SI
  820. MOVQ SI, CX
  821. SHRQ $51, CX
  822. ADDQ R8, CX
  823. ANDQ DX, SI
  824. MOVQ CX, R8
  825. SHRQ $51, CX
  826. ADDQ R10, CX
  827. ANDQ DX, R8
  828. MOVQ CX, R9
  829. SHRQ $51, CX
  830. ADDQ R12, CX
  831. ANDQ DX, R9
  832. MOVQ CX, AX
  833. SHRQ $51, CX
  834. ADDQ R14, CX
  835. ANDQ DX, AX
  836. MOVQ CX, R10
  837. SHRQ $51, CX
  838. IMUL3Q $19, CX, CX
  839. ADDQ CX, SI
  840. ANDQ DX, R10
  841. MOVQ SI, 120(DI)
  842. MOVQ R8, 128(DI)
  843. MOVQ R9, 136(DI)
  844. MOVQ AX, 144(DI)
  845. MOVQ R10, 152(DI)
  846. MOVQ 160(DI), AX
  847. MULQ 160(DI)
  848. MOVQ AX, SI
  849. MOVQ DX, CX
  850. MOVQ 160(DI), AX
  851. SHLQ $1, AX
  852. MULQ 168(DI)
  853. MOVQ AX, R8
  854. MOVQ DX, R9
  855. MOVQ 160(DI), AX
  856. SHLQ $1, AX
  857. MULQ 176(DI)
  858. MOVQ AX, R10
  859. MOVQ DX, R11
  860. MOVQ 160(DI), AX
  861. SHLQ $1, AX
  862. MULQ 184(DI)
  863. MOVQ AX, R12
  864. MOVQ DX, R13
  865. MOVQ 160(DI), AX
  866. SHLQ $1, AX
  867. MULQ 192(DI)
  868. MOVQ AX, R14
  869. MOVQ DX, R15
  870. MOVQ 168(DI), AX
  871. MULQ 168(DI)
  872. ADDQ AX, R10
  873. ADCQ DX, R11
  874. MOVQ 168(DI), AX
  875. SHLQ $1, AX
  876. MULQ 176(DI)
  877. ADDQ AX, R12
  878. ADCQ DX, R13
  879. MOVQ 168(DI), AX
  880. SHLQ $1, AX
  881. MULQ 184(DI)
  882. ADDQ AX, R14
  883. ADCQ DX, R15
  884. MOVQ 168(DI), DX
  885. IMUL3Q $38, DX, AX
  886. MULQ 192(DI)
  887. ADDQ AX, SI
  888. ADCQ DX, CX
  889. MOVQ 176(DI), AX
  890. MULQ 176(DI)
  891. ADDQ AX, R14
  892. ADCQ DX, R15
  893. MOVQ 176(DI), DX
  894. IMUL3Q $38, DX, AX
  895. MULQ 184(DI)
  896. ADDQ AX, SI
  897. ADCQ DX, CX
  898. MOVQ 176(DI), DX
  899. IMUL3Q $38, DX, AX
  900. MULQ 192(DI)
  901. ADDQ AX, R8
  902. ADCQ DX, R9
  903. MOVQ 184(DI), DX
  904. IMUL3Q $19, DX, AX
  905. MULQ 184(DI)
  906. ADDQ AX, R8
  907. ADCQ DX, R9
  908. MOVQ 184(DI), DX
  909. IMUL3Q $38, DX, AX
  910. MULQ 192(DI)
  911. ADDQ AX, R10
  912. ADCQ DX, R11
  913. MOVQ 192(DI), DX
  914. IMUL3Q $19, DX, AX
  915. MULQ 192(DI)
  916. ADDQ AX, R12
  917. ADCQ DX, R13
  918. MOVQ $REDMASK51, DX
  919. SHLQ $13, SI, CX
  920. ANDQ DX, SI
  921. SHLQ $13, R8, R9
  922. ANDQ DX, R8
  923. ADDQ CX, R8
  924. SHLQ $13, R10, R11
  925. ANDQ DX, R10
  926. ADDQ R9, R10
  927. SHLQ $13, R12, R13
  928. ANDQ DX, R12
  929. ADDQ R11, R12
  930. SHLQ $13, R14, R15
  931. ANDQ DX, R14
  932. ADDQ R13, R14
  933. IMUL3Q $19, R15, CX
  934. ADDQ CX, SI
  935. MOVQ SI, CX
  936. SHRQ $51, CX
  937. ADDQ R8, CX
  938. ANDQ DX, SI
  939. MOVQ CX, R8
  940. SHRQ $51, CX
  941. ADDQ R10, CX
  942. ANDQ DX, R8
  943. MOVQ CX, R9
  944. SHRQ $51, CX
  945. ADDQ R12, CX
  946. ANDQ DX, R9
  947. MOVQ CX, AX
  948. SHRQ $51, CX
  949. ADDQ R14, CX
  950. ANDQ DX, AX
  951. MOVQ CX, R10
  952. SHRQ $51, CX
  953. IMUL3Q $19, CX, CX
  954. ADDQ CX, SI
  955. ANDQ DX, R10
  956. MOVQ SI, 160(DI)
  957. MOVQ R8, 168(DI)
  958. MOVQ R9, 176(DI)
  959. MOVQ AX, 184(DI)
  960. MOVQ R10, 192(DI)
  961. MOVQ 184(DI), SI
  962. IMUL3Q $19, SI, AX
  963. MOVQ AX, 0(SP)
  964. MULQ 16(DI)
  965. MOVQ AX, SI
  966. MOVQ DX, CX
  967. MOVQ 192(DI), DX
  968. IMUL3Q $19, DX, AX
  969. MOVQ AX, 8(SP)
  970. MULQ 8(DI)
  971. ADDQ AX, SI
  972. ADCQ DX, CX
  973. MOVQ 160(DI), AX
  974. MULQ 0(DI)
  975. ADDQ AX, SI
  976. ADCQ DX, CX
  977. MOVQ 160(DI), AX
  978. MULQ 8(DI)
  979. MOVQ AX, R8
  980. MOVQ DX, R9
  981. MOVQ 160(DI), AX
  982. MULQ 16(DI)
  983. MOVQ AX, R10
  984. MOVQ DX, R11
  985. MOVQ 160(DI), AX
  986. MULQ 24(DI)
  987. MOVQ AX, R12
  988. MOVQ DX, R13
  989. MOVQ 160(DI), AX
  990. MULQ 32(DI)
  991. MOVQ AX, R14
  992. MOVQ DX, R15
  993. MOVQ 168(DI), AX
  994. MULQ 0(DI)
  995. ADDQ AX, R8
  996. ADCQ DX, R9
  997. MOVQ 168(DI), AX
  998. MULQ 8(DI)
  999. ADDQ AX, R10
  1000. ADCQ DX, R11
  1001. MOVQ 168(DI), AX
  1002. MULQ 16(DI)
  1003. ADDQ AX, R12
  1004. ADCQ DX, R13
  1005. MOVQ 168(DI), AX
  1006. MULQ 24(DI)
  1007. ADDQ AX, R14
  1008. ADCQ DX, R15
  1009. MOVQ 168(DI), DX
  1010. IMUL3Q $19, DX, AX
  1011. MULQ 32(DI)
  1012. ADDQ AX, SI
  1013. ADCQ DX, CX
  1014. MOVQ 176(DI), AX
  1015. MULQ 0(DI)
  1016. ADDQ AX, R10
  1017. ADCQ DX, R11
  1018. MOVQ 176(DI), AX
  1019. MULQ 8(DI)
  1020. ADDQ AX, R12
  1021. ADCQ DX, R13
  1022. MOVQ 176(DI), AX
  1023. MULQ 16(DI)
  1024. ADDQ AX, R14
  1025. ADCQ DX, R15
  1026. MOVQ 176(DI), DX
  1027. IMUL3Q $19, DX, AX
  1028. MULQ 24(DI)
  1029. ADDQ AX, SI
  1030. ADCQ DX, CX
  1031. MOVQ 176(DI), DX
  1032. IMUL3Q $19, DX, AX
  1033. MULQ 32(DI)
  1034. ADDQ AX, R8
  1035. ADCQ DX, R9
  1036. MOVQ 184(DI), AX
  1037. MULQ 0(DI)
  1038. ADDQ AX, R12
  1039. ADCQ DX, R13
  1040. MOVQ 184(DI), AX
  1041. MULQ 8(DI)
  1042. ADDQ AX, R14
  1043. ADCQ DX, R15
  1044. MOVQ 0(SP), AX
  1045. MULQ 24(DI)
  1046. ADDQ AX, R8
  1047. ADCQ DX, R9
  1048. MOVQ 0(SP), AX
  1049. MULQ 32(DI)
  1050. ADDQ AX, R10
  1051. ADCQ DX, R11
  1052. MOVQ 192(DI), AX
  1053. MULQ 0(DI)
  1054. ADDQ AX, R14
  1055. ADCQ DX, R15
  1056. MOVQ 8(SP), AX
  1057. MULQ 16(DI)
  1058. ADDQ AX, R8
  1059. ADCQ DX, R9
  1060. MOVQ 8(SP), AX
  1061. MULQ 24(DI)
  1062. ADDQ AX, R10
  1063. ADCQ DX, R11
  1064. MOVQ 8(SP), AX
  1065. MULQ 32(DI)
  1066. ADDQ AX, R12
  1067. ADCQ DX, R13
  1068. MOVQ $REDMASK51, DX
  1069. SHLQ $13, SI, CX
  1070. ANDQ DX, SI
  1071. SHLQ $13, R8, R9
  1072. ANDQ DX, R8
  1073. ADDQ CX, R8
  1074. SHLQ $13, R10, R11
  1075. ANDQ DX, R10
  1076. ADDQ R9, R10
  1077. SHLQ $13, R12, R13
  1078. ANDQ DX, R12
  1079. ADDQ R11, R12
  1080. SHLQ $13, R14, R15
  1081. ANDQ DX, R14
  1082. ADDQ R13, R14
  1083. IMUL3Q $19, R15, CX
  1084. ADDQ CX, SI
  1085. MOVQ SI, CX
  1086. SHRQ $51, CX
  1087. ADDQ R8, CX
  1088. MOVQ CX, R8
  1089. SHRQ $51, CX
  1090. ANDQ DX, SI
  1091. ADDQ R10, CX
  1092. MOVQ CX, R9
  1093. SHRQ $51, CX
  1094. ANDQ DX, R8
  1095. ADDQ R12, CX
  1096. MOVQ CX, AX
  1097. SHRQ $51, CX
  1098. ANDQ DX, R9
  1099. ADDQ R14, CX
  1100. MOVQ CX, R10
  1101. SHRQ $51, CX
  1102. ANDQ DX, AX
  1103. IMUL3Q $19, CX, CX
  1104. ADDQ CX, SI
  1105. ANDQ DX, R10
  1106. MOVQ SI, 160(DI)
  1107. MOVQ R8, 168(DI)
  1108. MOVQ R9, 176(DI)
  1109. MOVQ AX, 184(DI)
  1110. MOVQ R10, 192(DI)
  1111. MOVQ 144(SP), SI
  1112. IMUL3Q $19, SI, AX
  1113. MOVQ AX, 0(SP)
  1114. MULQ 96(SP)
  1115. MOVQ AX, SI
  1116. MOVQ DX, CX
  1117. MOVQ 152(SP), DX
  1118. IMUL3Q $19, DX, AX
  1119. MOVQ AX, 8(SP)
  1120. MULQ 88(SP)
  1121. ADDQ AX, SI
  1122. ADCQ DX, CX
  1123. MOVQ 120(SP), AX
  1124. MULQ 80(SP)
  1125. ADDQ AX, SI
  1126. ADCQ DX, CX
  1127. MOVQ 120(SP), AX
  1128. MULQ 88(SP)
  1129. MOVQ AX, R8
  1130. MOVQ DX, R9
  1131. MOVQ 120(SP), AX
  1132. MULQ 96(SP)
  1133. MOVQ AX, R10
  1134. MOVQ DX, R11
  1135. MOVQ 120(SP), AX
  1136. MULQ 104(SP)
  1137. MOVQ AX, R12
  1138. MOVQ DX, R13
  1139. MOVQ 120(SP), AX
  1140. MULQ 112(SP)
  1141. MOVQ AX, R14
  1142. MOVQ DX, R15
  1143. MOVQ 128(SP), AX
  1144. MULQ 80(SP)
  1145. ADDQ AX, R8
  1146. ADCQ DX, R9
  1147. MOVQ 128(SP), AX
  1148. MULQ 88(SP)
  1149. ADDQ AX, R10
  1150. ADCQ DX, R11
  1151. MOVQ 128(SP), AX
  1152. MULQ 96(SP)
  1153. ADDQ AX, R12
  1154. ADCQ DX, R13
  1155. MOVQ 128(SP), AX
  1156. MULQ 104(SP)
  1157. ADDQ AX, R14
  1158. ADCQ DX, R15
  1159. MOVQ 128(SP), DX
  1160. IMUL3Q $19, DX, AX
  1161. MULQ 112(SP)
  1162. ADDQ AX, SI
  1163. ADCQ DX, CX
  1164. MOVQ 136(SP), AX
  1165. MULQ 80(SP)
  1166. ADDQ AX, R10
  1167. ADCQ DX, R11
  1168. MOVQ 136(SP), AX
  1169. MULQ 88(SP)
  1170. ADDQ AX, R12
  1171. ADCQ DX, R13
  1172. MOVQ 136(SP), AX
  1173. MULQ 96(SP)
  1174. ADDQ AX, R14
  1175. ADCQ DX, R15
  1176. MOVQ 136(SP), DX
  1177. IMUL3Q $19, DX, AX
  1178. MULQ 104(SP)
  1179. ADDQ AX, SI
  1180. ADCQ DX, CX
  1181. MOVQ 136(SP), DX
  1182. IMUL3Q $19, DX, AX
  1183. MULQ 112(SP)
  1184. ADDQ AX, R8
  1185. ADCQ DX, R9
  1186. MOVQ 144(SP), AX
  1187. MULQ 80(SP)
  1188. ADDQ AX, R12
  1189. ADCQ DX, R13
  1190. MOVQ 144(SP), AX
  1191. MULQ 88(SP)
  1192. ADDQ AX, R14
  1193. ADCQ DX, R15
  1194. MOVQ 0(SP), AX
  1195. MULQ 104(SP)
  1196. ADDQ AX, R8
  1197. ADCQ DX, R9
  1198. MOVQ 0(SP), AX
  1199. MULQ 112(SP)
  1200. ADDQ AX, R10
  1201. ADCQ DX, R11
  1202. MOVQ 152(SP), AX
  1203. MULQ 80(SP)
  1204. ADDQ AX, R14
  1205. ADCQ DX, R15
  1206. MOVQ 8(SP), AX
  1207. MULQ 96(SP)
  1208. ADDQ AX, R8
  1209. ADCQ DX, R9
  1210. MOVQ 8(SP), AX
  1211. MULQ 104(SP)
  1212. ADDQ AX, R10
  1213. ADCQ DX, R11
  1214. MOVQ 8(SP), AX
  1215. MULQ 112(SP)
  1216. ADDQ AX, R12
  1217. ADCQ DX, R13
  1218. MOVQ $REDMASK51, DX
  1219. SHLQ $13, SI, CX
  1220. ANDQ DX, SI
  1221. SHLQ $13, R8, R9
  1222. ANDQ DX, R8
  1223. ADDQ CX, R8
  1224. SHLQ $13, R10, R11
  1225. ANDQ DX, R10
  1226. ADDQ R9, R10
  1227. SHLQ $13, R12, R13
  1228. ANDQ DX, R12
  1229. ADDQ R11, R12
  1230. SHLQ $13, R14, R15
  1231. ANDQ DX, R14
  1232. ADDQ R13, R14
  1233. IMUL3Q $19, R15, CX
  1234. ADDQ CX, SI
  1235. MOVQ SI, CX
  1236. SHRQ $51, CX
  1237. ADDQ R8, CX
  1238. MOVQ CX, R8
  1239. SHRQ $51, CX
  1240. ANDQ DX, SI
  1241. ADDQ R10, CX
  1242. MOVQ CX, R9
  1243. SHRQ $51, CX
  1244. ANDQ DX, R8
  1245. ADDQ R12, CX
  1246. MOVQ CX, AX
  1247. SHRQ $51, CX
  1248. ANDQ DX, R9
  1249. ADDQ R14, CX
  1250. MOVQ CX, R10
  1251. SHRQ $51, CX
  1252. ANDQ DX, AX
  1253. IMUL3Q $19, CX, CX
  1254. ADDQ CX, SI
  1255. ANDQ DX, R10
  1256. MOVQ SI, 40(DI)
  1257. MOVQ R8, 48(DI)
  1258. MOVQ R9, 56(DI)
  1259. MOVQ AX, 64(DI)
  1260. MOVQ R10, 72(DI)
  1261. MOVQ 160(SP), AX
  1262. MULQ ·_121666_213(SB)
  1263. SHRQ $13, AX
  1264. MOVQ AX, SI
  1265. MOVQ DX, CX
  1266. MOVQ 168(SP), AX
  1267. MULQ ·_121666_213(SB)
  1268. SHRQ $13, AX
  1269. ADDQ AX, CX
  1270. MOVQ DX, R8
  1271. MOVQ 176(SP), AX
  1272. MULQ ·_121666_213(SB)
  1273. SHRQ $13, AX
  1274. ADDQ AX, R8
  1275. MOVQ DX, R9
  1276. MOVQ 184(SP), AX
  1277. MULQ ·_121666_213(SB)
  1278. SHRQ $13, AX
  1279. ADDQ AX, R9
  1280. MOVQ DX, R10
  1281. MOVQ 192(SP), AX
  1282. MULQ ·_121666_213(SB)
  1283. SHRQ $13, AX
  1284. ADDQ AX, R10
  1285. IMUL3Q $19, DX, DX
  1286. ADDQ DX, SI
  1287. ADDQ 80(SP), SI
  1288. ADDQ 88(SP), CX
  1289. ADDQ 96(SP), R8
  1290. ADDQ 104(SP), R9
  1291. ADDQ 112(SP), R10
  1292. MOVQ SI, 80(DI)
  1293. MOVQ CX, 88(DI)
  1294. MOVQ R8, 96(DI)
  1295. MOVQ R9, 104(DI)
  1296. MOVQ R10, 112(DI)
  1297. MOVQ 104(DI), SI
  1298. IMUL3Q $19, SI, AX
  1299. MOVQ AX, 0(SP)
  1300. MULQ 176(SP)
  1301. MOVQ AX, SI
  1302. MOVQ DX, CX
  1303. MOVQ 112(DI), DX
  1304. IMUL3Q $19, DX, AX
  1305. MOVQ AX, 8(SP)
  1306. MULQ 168(SP)
  1307. ADDQ AX, SI
  1308. ADCQ DX, CX
  1309. MOVQ 80(DI), AX
  1310. MULQ 160(SP)
  1311. ADDQ AX, SI
  1312. ADCQ DX, CX
  1313. MOVQ 80(DI), AX
  1314. MULQ 168(SP)
  1315. MOVQ AX, R8
  1316. MOVQ DX, R9
  1317. MOVQ 80(DI), AX
  1318. MULQ 176(SP)
  1319. MOVQ AX, R10
  1320. MOVQ DX, R11
  1321. MOVQ 80(DI), AX
  1322. MULQ 184(SP)
  1323. MOVQ AX, R12
  1324. MOVQ DX, R13
  1325. MOVQ 80(DI), AX
  1326. MULQ 192(SP)
  1327. MOVQ AX, R14
  1328. MOVQ DX, R15
  1329. MOVQ 88(DI), AX
  1330. MULQ 160(SP)
  1331. ADDQ AX, R8
  1332. ADCQ DX, R9
  1333. MOVQ 88(DI), AX
  1334. MULQ 168(SP)
  1335. ADDQ AX, R10
  1336. ADCQ DX, R11
  1337. MOVQ 88(DI), AX
  1338. MULQ 176(SP)
  1339. ADDQ AX, R12
  1340. ADCQ DX, R13
  1341. MOVQ 88(DI), AX
  1342. MULQ 184(SP)
  1343. ADDQ AX, R14
  1344. ADCQ DX, R15
  1345. MOVQ 88(DI), DX
  1346. IMUL3Q $19, DX, AX
  1347. MULQ 192(SP)
  1348. ADDQ AX, SI
  1349. ADCQ DX, CX
  1350. MOVQ 96(DI), AX
  1351. MULQ 160(SP)
  1352. ADDQ AX, R10
  1353. ADCQ DX, R11
  1354. MOVQ 96(DI), AX
  1355. MULQ 168(SP)
  1356. ADDQ AX, R12
  1357. ADCQ DX, R13
  1358. MOVQ 96(DI), AX
  1359. MULQ 176(SP)
  1360. ADDQ AX, R14
  1361. ADCQ DX, R15
  1362. MOVQ 96(DI), DX
  1363. IMUL3Q $19, DX, AX
  1364. MULQ 184(SP)
  1365. ADDQ AX, SI
  1366. ADCQ DX, CX
  1367. MOVQ 96(DI), DX
  1368. IMUL3Q $19, DX, AX
  1369. MULQ 192(SP)
  1370. ADDQ AX, R8
  1371. ADCQ DX, R9
  1372. MOVQ 104(DI), AX
  1373. MULQ 160(SP)
  1374. ADDQ AX, R12
  1375. ADCQ DX, R13
  1376. MOVQ 104(DI), AX
  1377. MULQ 168(SP)
  1378. ADDQ AX, R14
  1379. ADCQ DX, R15
  1380. MOVQ 0(SP), AX
  1381. MULQ 184(SP)
  1382. ADDQ AX, R8
  1383. ADCQ DX, R9
  1384. MOVQ 0(SP), AX
  1385. MULQ 192(SP)
  1386. ADDQ AX, R10
  1387. ADCQ DX, R11
  1388. MOVQ 112(DI), AX
  1389. MULQ 160(SP)
  1390. ADDQ AX, R14
  1391. ADCQ DX, R15
  1392. MOVQ 8(SP), AX
  1393. MULQ 176(SP)
  1394. ADDQ AX, R8
  1395. ADCQ DX, R9
  1396. MOVQ 8(SP), AX
  1397. MULQ 184(SP)
  1398. ADDQ AX, R10
  1399. ADCQ DX, R11
  1400. MOVQ 8(SP), AX
  1401. MULQ 192(SP)
  1402. ADDQ AX, R12
  1403. ADCQ DX, R13
  1404. MOVQ $REDMASK51, DX
  1405. SHLQ $13, SI, CX
  1406. ANDQ DX, SI
  1407. SHLQ $13, R8, R9
  1408. ANDQ DX, R8
  1409. ADDQ CX, R8
  1410. SHLQ $13, R10, R11
  1411. ANDQ DX, R10
  1412. ADDQ R9, R10
  1413. SHLQ $13, R12, R13
  1414. ANDQ DX, R12
  1415. ADDQ R11, R12
  1416. SHLQ $13, R14, R15
  1417. ANDQ DX, R14
  1418. ADDQ R13, R14
  1419. IMUL3Q $19, R15, CX
  1420. ADDQ CX, SI
  1421. MOVQ SI, CX
  1422. SHRQ $51, CX
  1423. ADDQ R8, CX
  1424. MOVQ CX, R8
  1425. SHRQ $51, CX
  1426. ANDQ DX, SI
  1427. ADDQ R10, CX
  1428. MOVQ CX, R9
  1429. SHRQ $51, CX
  1430. ANDQ DX, R8
  1431. ADDQ R12, CX
  1432. MOVQ CX, AX
  1433. SHRQ $51, CX
  1434. ANDQ DX, R9
  1435. ADDQ R14, CX
  1436. MOVQ CX, R10
  1437. SHRQ $51, CX
  1438. ANDQ DX, AX
  1439. IMUL3Q $19, CX, CX
  1440. ADDQ CX, SI
  1441. ANDQ DX, R10
  1442. MOVQ SI, 80(DI)
  1443. MOVQ R8, 88(DI)
  1444. MOVQ R9, 96(DI)
  1445. MOVQ AX, 104(DI)
  1446. MOVQ R10, 112(DI)
  1447. RET
  1448. // func cswap(inout *[4][5]uint64, v uint64)
  1449. TEXT ·cswap(SB), 7, $0
  1450. MOVQ inout+0(FP), DI
  1451. MOVQ v+8(FP), SI
  1452. SUBQ $1, SI
  1453. NOTQ SI
  1454. MOVQ SI, X15
  1455. PSHUFD $0x44, X15, X15
  1456. MOVOU 0(DI), X0
  1457. MOVOU 16(DI), X2
  1458. MOVOU 32(DI), X4
  1459. MOVOU 48(DI), X6
  1460. MOVOU 64(DI), X8
  1461. MOVOU 80(DI), X1
  1462. MOVOU 96(DI), X3
  1463. MOVOU 112(DI), X5
  1464. MOVOU 128(DI), X7
  1465. MOVOU 144(DI), X9
  1466. MOVO X1, X10
  1467. MOVO X3, X11
  1468. MOVO X5, X12
  1469. MOVO X7, X13
  1470. MOVO X9, X14
  1471. PXOR X0, X10
  1472. PXOR X2, X11
  1473. PXOR X4, X12
  1474. PXOR X6, X13
  1475. PXOR X8, X14
  1476. PAND X15, X10
  1477. PAND X15, X11
  1478. PAND X15, X12
  1479. PAND X15, X13
  1480. PAND X15, X14
  1481. PXOR X10, X0
  1482. PXOR X10, X1
  1483. PXOR X11, X2
  1484. PXOR X11, X3
  1485. PXOR X12, X4
  1486. PXOR X12, X5
  1487. PXOR X13, X6
  1488. PXOR X13, X7
  1489. PXOR X14, X8
  1490. PXOR X14, X9
  1491. MOVOU X0, 0(DI)
  1492. MOVOU X2, 16(DI)
  1493. MOVOU X4, 32(DI)
  1494. MOVOU X6, 48(DI)
  1495. MOVOU X8, 64(DI)
  1496. MOVOU X1, 80(DI)
  1497. MOVOU X3, 96(DI)
  1498. MOVOU X5, 112(DI)
  1499. MOVOU X7, 128(DI)
  1500. MOVOU X9, 144(DI)
  1501. RET
  1502. // func mul(outp *uint64, xp *uint64, yp *uint64)
  1503. TEXT ·mul(SB), NOSPLIT, $0
  1504. MOVQ outp+0(FP), DI
  1505. MOVQ xp+8(FP), BX
  1506. MOVQ yp+16(FP), CX
  1507. // Calculate r0
  1508. MOVQ 0(BX), AX // rax <-- x0
  1509. MULQ 0(CX) // rdx, rax <-- x0*y0
  1510. MOVQ AX, SI // r00 = rax
  1511. MOVQ DX, BP // r01 = rdx
  1512. MOVQ 8(BX), DX // rdx <-- x1
  1513. IMUL3Q $19, DX, AX // rax <-- x1*19
  1514. MULQ 32(CX) // rdx, rax <-- x1_19*y4
  1515. ADDQ AX, SI // r00 += rax
  1516. ADCQ DX, BP // r01 += rdx
  1517. MOVQ 16(BX), DX // rdx <-- x2
  1518. IMUL3Q $19, DX, AX // rax <-- x2*19
  1519. MULQ 24(CX) // rdx, rax <-- x2_19*y3
  1520. ADDQ AX, SI // r00 += rax
  1521. ADCQ DX, BP // r01 += rdx
  1522. MOVQ 24(BX), DX // rdx <-- x3
  1523. IMUL3Q $19, DX, AX // rax <-- x3*19
  1524. MULQ 16(CX) // rdx, rax <-- x3_19 * y2
  1525. ADDQ AX, SI // r00 += rax
  1526. ADCQ DX, BP // r01 += rdx
  1527. MOVQ 32(BX), DX // rdx <-- x4
  1528. IMUL3Q $19, DX, AX // rax <-- x4*19
  1529. MULQ 8(CX) // rdx rax <-- x4_19*y1
  1530. ADDQ AX, SI // r00 += rax
  1531. ADCQ DX, BP // r01 += rdx
  1532. // Calculate r1
  1533. MOVQ 0(BX), AX
  1534. MULQ 8(CX)
  1535. MOVQ AX, R8 // r10
  1536. MOVQ DX, R9 // r11
  1537. MOVQ 8(BX), AX
  1538. MULQ 0(CX)
  1539. ADDQ AX, R8
  1540. ADCQ DX, R9
  1541. MOVQ 16(BX), DX
  1542. IMUL3Q $19, DX, AX
  1543. MULQ 32(CX)
  1544. ADDQ AX, R8
  1545. ADCQ DX, R9
  1546. MOVQ 24(BX), DX
  1547. IMUL3Q $19, DX, AX
  1548. MULQ 24(CX)
  1549. ADDQ AX, R8
  1550. ADCQ DX, R9
  1551. MOVQ 32(BX), DX
  1552. IMUL3Q $19, DX, AX
  1553. MULQ 16(CX)
  1554. ADDQ AX, R8
  1555. ADCQ DX, R9
  1556. // Calculate r2
  1557. MOVQ 0(BX), AX
  1558. MULQ 16(CX)
  1559. MOVQ AX, R10 // r20
  1560. MOVQ DX, R11 // r21
  1561. MOVQ 8(BX), AX
  1562. MULQ 8(CX)
  1563. ADDQ AX, R10
  1564. ADCQ DX, R11
  1565. MOVQ 16(BX), AX
  1566. MULQ 0(CX)
  1567. ADDQ AX, R10
  1568. ADCQ DX, R11
  1569. MOVQ 24(BX), DX
  1570. IMUL3Q $19, DX, AX
  1571. MULQ 32(CX)
  1572. ADDQ AX, R10
  1573. ADCQ DX, R11
  1574. MOVQ 32(BX), DX
  1575. IMUL3Q $19, DX, AX
  1576. MULQ 24(CX)
  1577. ADDQ AX, R10
  1578. ADCQ DX, R11
  1579. // Calculate r3
  1580. MOVQ 0(BX), AX
  1581. MULQ 24(CX)
  1582. MOVQ AX, R12 // r30
  1583. MOVQ DX, R13 // r31
  1584. MOVQ 8(BX), AX
  1585. MULQ 16(CX)
  1586. ADDQ AX, R12
  1587. ADCQ DX, R13
  1588. MOVQ 16(BX), AX
  1589. MULQ 8(CX)
  1590. ADDQ AX, R12
  1591. ADCQ DX, R13
  1592. MOVQ 24(BX), AX
  1593. MULQ 0(CX)
  1594. ADDQ AX, R12
  1595. ADCQ DX, R13
  1596. MOVQ 32(BX), DX
  1597. IMUL3Q $19, DX, AX
  1598. MULQ 32(CX)
  1599. ADDQ AX, R12
  1600. ADCQ DX, R13
  1601. // Calculate r4
  1602. MOVQ 0(BX), AX
  1603. MULQ 32(CX)
  1604. MOVQ AX, R14 // r40
  1605. MOVQ DX, R15 // r41
  1606. MOVQ 8(BX), AX
  1607. MULQ 24(CX)
  1608. ADDQ AX, R14
  1609. ADCQ DX, R15
  1610. MOVQ 16(BX), AX
  1611. MULQ 16(CX)
  1612. ADDQ AX, R14
  1613. ADCQ DX, R15
  1614. MOVQ 24(BX), AX
  1615. MULQ 8(CX)
  1616. ADDQ AX, R14
  1617. ADCQ DX, R15
  1618. MOVQ 32(BX), AX
  1619. MULQ 0(CX)
  1620. ADDQ AX, R14
  1621. ADCQ DX, R15
  1622. MOVQ $2251799813685247, AX // (1<<51) - 1
  1623. SHLQ $13, SI, BP // r01 = shld with r00
  1624. ANDQ AX, SI // r00 &= mask51
  1625. SHLQ $13, R8, R9 // r11 = shld with r10
  1626. ANDQ AX, R8 // r10 &= mask51
  1627. ADDQ BP, R8 // r10 += r01
  1628. SHLQ $13, R10, R11 // r21 = shld with r20
  1629. ANDQ AX, R10 // r20 &= mask51
  1630. ADDQ R9, R10 // r20 += r11
  1631. SHLQ $13, R12, R13 // r31 = shld with r30
  1632. ANDQ AX, R12 // r30 &= mask51
  1633. ADDQ R11, R12 // r30 += r21
  1634. SHLQ $13, R14, R15 // r41 = shld with r40
  1635. ANDQ AX, R14 // r40 &= mask51
  1636. ADDQ R13, R14 // r40 += r31
  1637. IMUL3Q $19, R15, R15 // r41 = r41*19
  1638. ADDQ R15, SI // r00 += r41
  1639. MOVQ SI, DX // rdx <-- r00
  1640. SHRQ $51, DX // rdx <-- r00 >> 51
  1641. ADDQ DX, R8 // r10 += r00 >> 51
  1642. MOVQ R8, DX // rdx <-- r10
  1643. SHRQ $51, DX // rdx <-- r10 >> 51
  1644. ANDQ AX, SI // r00 &= mask51
  1645. ADDQ DX, R10 // r20 += r10 >> 51
  1646. MOVQ R10, DX // rdx <-- r20
  1647. SHRQ $51, DX // rdx <-- r20 >> 51
  1648. ANDQ AX, R8 // r10 &= mask51
  1649. ADDQ DX, R12 // r30 += r20 >> 51
  1650. MOVQ R12, DX // rdx <-- r30
  1651. SHRQ $51, DX // rdx <-- r30 >> 51
  1652. ANDQ AX, R10 // r20 &= mask51
  1653. ADDQ DX, R14 // r40 += r30 >> 51
  1654. MOVQ R14, DX // rdx <-- r40
  1655. SHRQ $51, DX // rdx <-- r40 >> 51
  1656. ANDQ AX, R12 // r30 &= mask51
  1657. IMUL3Q $19, DX, DX // rdx <-- (r40 >> 51) * 19
  1658. ADDQ DX, SI // r00 += (r40 >> 51) *19
  1659. ANDQ AX, R14 // r40 &= mask51
  1660. MOVQ SI, 0(DI)
  1661. MOVQ R8, 8(DI)
  1662. MOVQ R10, 16(DI)
  1663. MOVQ R12, 24(DI)
  1664. MOVQ R14, 32(DI)
  1665. RET
  1666. // func square(outp *uint64, xp *uint64)
  1667. TEXT ·square(SB), NOSPLIT, $0
  1668. MOVQ outp+0(FP), DI
  1669. MOVQ xp+8(FP), SI
  1670. // r0 = x0*x0 + x1*38*x4 + x2*38*x3
  1671. MOVQ 0(SI), AX
  1672. MULQ 0(SI)
  1673. MOVQ AX, CX // r00
  1674. MOVQ DX, R8 // r01
  1675. MOVQ 8(SI), DX
  1676. IMUL3Q $38, DX, AX
  1677. MULQ 32(SI)
  1678. ADDQ AX, CX
  1679. ADCQ DX, R8
  1680. MOVQ 16(SI), DX
  1681. IMUL3Q $38, DX, AX
  1682. MULQ 24(SI)
  1683. ADDQ AX, CX
  1684. ADCQ DX, R8
  1685. // r1 = x0*2*x1 + x2*38*x4 + x3*19*x3
  1686. MOVQ 0(SI), AX
  1687. SHLQ $1, AX
  1688. MULQ 8(SI)
  1689. MOVQ AX, R9 // r10
  1690. MOVQ DX, R10 // r11
  1691. MOVQ 16(SI), DX
  1692. IMUL3Q $38, DX, AX
  1693. MULQ 32(SI)
  1694. ADDQ AX, R9
  1695. ADCQ DX, R10
  1696. MOVQ 24(SI), DX
  1697. IMUL3Q $19, DX, AX
  1698. MULQ 24(SI)
  1699. ADDQ AX, R9
  1700. ADCQ DX, R10
  1701. // r2 = x0*2*x2 + x1*x1 + x3*38*x4
  1702. MOVQ 0(SI), AX
  1703. SHLQ $1, AX
  1704. MULQ 16(SI)
  1705. MOVQ AX, R11 // r20
  1706. MOVQ DX, R12 // r21
  1707. MOVQ 8(SI), AX
  1708. MULQ 8(SI)
  1709. ADDQ AX, R11
  1710. ADCQ DX, R12
  1711. MOVQ 24(SI), DX
  1712. IMUL3Q $38, DX, AX
  1713. MULQ 32(SI)
  1714. ADDQ AX, R11
  1715. ADCQ DX, R12
  1716. // r3 = x0*2*x3 + x1*2*x2 + x4*19*x4
  1717. MOVQ 0(SI), AX
  1718. SHLQ $1, AX
  1719. MULQ 24(SI)
  1720. MOVQ AX, R13 // r30
  1721. MOVQ DX, R14 // r31
  1722. MOVQ 8(SI), AX
  1723. SHLQ $1, AX
  1724. MULQ 16(SI)
  1725. ADDQ AX, R13
  1726. ADCQ DX, R14
  1727. MOVQ 32(SI), DX
  1728. IMUL3Q $19, DX, AX
  1729. MULQ 32(SI)
  1730. ADDQ AX, R13
  1731. ADCQ DX, R14
  1732. // r4 = x0*2*x4 + x1*2*x3 + x2*x2
  1733. MOVQ 0(SI), AX
  1734. SHLQ $1, AX
  1735. MULQ 32(SI)
  1736. MOVQ AX, R15 // r40
  1737. MOVQ DX, BX // r41
  1738. MOVQ 8(SI), AX
  1739. SHLQ $1, AX
  1740. MULQ 24(SI)
  1741. ADDQ AX, R15
  1742. ADCQ DX, BX
  1743. MOVQ 16(SI), AX
  1744. MULQ 16(SI)
  1745. ADDQ AX, R15
  1746. ADCQ DX, BX
  1747. // Reduce
  1748. MOVQ $2251799813685247, AX // (1<<51) - 1
  1749. SHLQ $13, CX, R8 // r01 = shld with r00
  1750. ANDQ AX, CX // r00 &= mask51
  1751. SHLQ $13, R9, R10 // r11 = shld with r10
  1752. ANDQ AX, R9 // r10 &= mask51
  1753. ADDQ R8, R9 // r10 += r01
  1754. SHLQ $13, R11, R12 // r21 = shld with r20
  1755. ANDQ AX, R11 // r20 &= mask51
  1756. ADDQ R10, R11 // r20 += r11
  1757. SHLQ $13, R13, R14 // r31 = shld with r30
  1758. ANDQ AX, R13 // r30 &= mask51
  1759. ADDQ R12, R13 // r30 += r21
  1760. SHLQ $13, R15, BX // r41 = shld with r40
  1761. ANDQ AX, R15 // r40 &= mask51
  1762. ADDQ R14, R15 // r40 += r31
  1763. IMUL3Q $19, BX, DX // r41 = r41*19
  1764. ADDQ DX, CX // r00 += r41
  1765. MOVQ CX, DX // rdx <-- r00
  1766. SHRQ $51, DX // rdx <-- r00 >> 51
  1767. ADDQ DX, R9 // r10 += r00 >> 51
  1768. MOVQ R9, DX // rdx <-- r10
  1769. SHRQ $51, DX // rdx <-- r10 >> 51
  1770. ANDQ AX, CX // r00 &= mask51
  1771. ADDQ DX, R11 // r20 += r10 >> 51
  1772. MOVQ R11, DX // rdx <-- r20
  1773. SHRQ $51, DX // rdx <-- r20 >> 51
  1774. ANDQ AX, R9 // r10 &= mask51
  1775. ADDQ DX, R13 // r30 += r20 >> 51
  1776. MOVQ R13, DX // rdx <-- r30
  1777. SHRQ $51, DX // rdx <-- r30 >> 51
  1778. ANDQ AX, R11 // r20 &= mask51
  1779. ADDQ DX, R15 // r40 += r30 >> 51
  1780. MOVQ R15, DX // rdx <-- r40
  1781. SHRQ $51, DX // rdx <-- r40 >> 51
  1782. ANDQ AX, R13 // r30 &= mask51
  1783. IMUL3Q $19, DX, DX // rdx <-- (r40 >> 51) * 19
  1784. ADDQ DX, CX // r00 += (r40 >> 51) *19
  1785. ANDQ AX, R15 // r40 &= mask51
  1786. MOVQ CX, 0(DI)
  1787. MOVQ R9, 8(DI)
  1788. MOVQ R11, 16(DI)
  1789. MOVQ R13, 24(DI)
  1790. MOVQ R15, 32(DI)
  1791. RET