1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600 |
- // License: GPLv3 Copyright: 2024, Kovid Goyal, <kovid at kovidgoyal.net>
- //go:build ignore
- // See https://www.quasilyte.dev/blog/post/go-asm-complementary-reference/
- // for differences between AT&T and Go assembly
- package main
- import (
- "bytes"
- "fmt"
- "go/types"
- "io"
- "os"
- "path/filepath"
- "runtime"
- "strconv"
- "strings"
- "unsafe"
- )
- var _ = fmt.Print
- type Register struct {
- Name string
- Size int
- Restricted bool
- }
- func (r Register) String() string { return r.Name }
- func (r Register) ARMFullWidth() string { return fmt.Sprintf("%s.B%d", r, r.Size/8) }
- func (r Register) AddressInRegister() string { return fmt.Sprintf("(%s)", r) }
- type Arch string
- const (
- X86 Arch = "386"
- AMD64 Arch = "amd64"
- ARM64 Arch = "arm64"
- )
- type ISA struct {
- Bits int
- Goarch Arch
- Registers []Register
- UsedRegisters map[Register]bool
- Sizes types.Sizes
- GeneralPurposeRegisterSize int
- HasSIMD bool
- }
- const ByteSlice types.BasicKind = 100001
- func (isa *ISA) NativeAdd() string {
- if isa.Goarch == ARM64 {
- return "ADD"
- }
- if isa.GeneralPurposeRegisterSize == 32 {
- return "ADDL"
- }
- return "ADDQ"
- }
- func (isa *ISA) NativeSubtract() string {
- if isa.Goarch == ARM64 {
- return "SUB"
- }
- if isa.GeneralPurposeRegisterSize == 32 {
- return "SUBL"
- }
- return "SUBQ"
- }
- func (isa *ISA) add_regs(size int, names ...string) {
- for _, r := range names {
- isa.Registers = append(isa.Registers, Register{r, size, false})
- }
- }
- func (ans *ISA) add_x86_regs() {
- ans.add_regs(ans.GeneralPurposeRegisterSize, `AX`, `BX`, `DX`, `SI`, `DI`, `BP`)
- if ans.GeneralPurposeRegisterSize == 64 {
- ans.add_regs(ans.GeneralPurposeRegisterSize, `R8`, `R9`, `R10`, `R11`, `R12`, `R13`, `R14`, `R15`)
- }
- // CX is used for shift and rotate instructions
- ans.Registers = append(ans.Registers, Register{`CX`, ans.GeneralPurposeRegisterSize, true})
- // SP is the stack pointer used by the Go runtime
- ans.Registers = append(ans.Registers, Register{`SP`, ans.GeneralPurposeRegisterSize, true})
- ans.add_regs(128, `X0`, `X1`, `X2`, `X3`, `X4`, `X5`, `X6`, `X7`, `X8`, `X9`, `X10`, `X11`, `X12`, `X13`, `X14`, `X15`)
- if ans.Goarch == AMD64 {
- ans.add_regs(256,
- `Y0`, `Y1`, `Y2`, `Y3`, `Y4`, `Y5`, `Y6`, `Y7`, `Y8`, `Y9`, `Y10`, `Y11`, `Y12`, `Y13`, `Y14`, `Y15`)
- }
- }
- func Createi386ISA(bits int) ISA {
- ans := ISA{
- Bits: bits,
- GeneralPurposeRegisterSize: 32,
- Goarch: X86,
- Sizes: types.SizesFor(runtime.Compiler, string(X86)),
- HasSIMD: bits == 128,
- }
- ans.add_x86_regs()
- return ans
- }
- func CreateAMD64ISA(bits int) ISA {
- ans := ISA{
- Bits: bits,
- GeneralPurposeRegisterSize: 64,
- Goarch: AMD64,
- Sizes: types.SizesFor(runtime.Compiler, string(AMD64)),
- HasSIMD: true,
- }
- ans.add_x86_regs()
- return ans
- }
- func CreateARM64ISA(bits int) ISA {
- ans := ISA{
- Bits: bits,
- Goarch: ARM64,
- GeneralPurposeRegisterSize: 64,
- Sizes: types.SizesFor(runtime.Compiler, string(ARM64)),
- HasSIMD: bits == 128,
- }
- ans.add_regs(ans.GeneralPurposeRegisterSize,
- `R0`, `R1`, `R2`, `R3`, `R4`, `R5`, `R6`, `R7`, `R8`, `R9`, `R10`, `R11`, `R12`, `R13`, `R14`, `R15`)
- ans.add_regs(128,
- `V0`, `V1`, `V2`, `V3`, `V4`, `V5`, `V6`, `V7`, `V8`, `V9`, `V10`, `V11`, `V12`, `V13`, `V14`, `V15`,
- `V16`, `V17`, `V18`, `V19`, `V20`, `V21`, `V22`, `V23`, `V24`, `V25`, `V26`, `V27`, `V28`, `V29`, `V30`, `V31`,
- )
- return ans
- }
- func AsVar(s types.BasicKind, name string) *types.Var {
- var t types.Type
- switch s {
- case ByteSlice:
- t = types.NewSlice(types.Typ[types.Byte])
- default:
- t = types.Typ[s]
- }
- return types.NewParam(0, nil, name, t)
- }
- type FunctionParam struct {
- Name string
- Type types.BasicKind
- }
- type Function struct {
- Name string
- Desc string
- Params, Returns []FunctionParam
- UsedRegisters map[Register]bool
- Size int
- ISA ISA
- ParamOffsets, ReturnOffsets []int
- Instructions []string
- Used256BitReg bool
- }
- func (f *Function) Reg() Register {
- for _, r := range f.ISA.Registers {
- if !r.Restricted && r.Size == f.ISA.GeneralPurposeRegisterSize && !f.UsedRegisters[r] {
- f.UsedRegisters[r] = true
- return r
- }
- }
- b := []string{}
- for _, r := range f.ISA.Registers {
- if !r.Restricted && r.Size == f.ISA.GeneralPurposeRegisterSize {
- b = append(b, r.Name)
- }
- }
- panic(fmt.Sprint("No available general purpose registers, used registers: ", strings.Join(b, ", ")))
- }
- func (f *Function) RegForShifts() Register {
- if f.ISA.Goarch == ARM64 {
- return f.Reg()
- }
- for _, r := range f.ISA.Registers {
- if r.Name == "CX" {
- if f.UsedRegisters[r] {
- panic("The register for shifts is already used")
- }
- return r
- }
- }
- panic("No register for shifts found")
- }
- func (f *Function) Vec(size ...int) Register {
- szq := f.ISA.Bits
- if len(size) > 0 {
- szq = size[0]
- }
- if f.ISA.Goarch == ARM64 {
- for _, r := range f.ISA.Registers {
- if r.Size == szq && !r.Restricted && !f.UsedRegisters[r] {
- f.UsedRegisters[r] = true
- if r.Size > 128 {
- f.Used256BitReg = true
- }
- return r
- }
- }
- } else {
- // In Intels crazy architecture AVX registers and SSE registers are the same hardware register so changing
- // one can change the other. Sigh.
- used := make(map[uint32]bool, len(f.UsedRegisters))
- for r, is_used := range f.UsedRegisters {
- if is_used && r.Size > f.ISA.GeneralPurposeRegisterSize {
- used[r.ARMId()] = true
- }
- }
- for _, r := range f.ISA.Registers {
- if r.Size == szq && !r.Restricted && !used[r.ARMId()] {
- f.UsedRegisters[r] = true
- if r.Size > 128 {
- f.Used256BitReg = true
- }
- return r
- }
- }
- }
- panic("No available vector registers")
- }
- func (f *Function) ReleaseReg(r ...Register) {
- for _, x := range r {
- f.UsedRegisters[x] = false
- }
- }
- func (f *Function) instr(items ...any) {
- sarr := make([]string, len(items))
- for i, val := range items {
- var f string
- if i > 0 && i < len(items)-1 {
- f = "%s,"
- } else {
- f = "%s"
- }
- sarr[i] = fmt.Sprintf(f, val)
- }
- f.Instructions = append(f.Instructions, "\t"+strings.Join(sarr, " "))
- }
- func (f *Function) MemLoadForBasicType(t types.BasicKind) string {
- if f.ISA.Goarch == ARM64 {
- switch t {
- case types.Uint8:
- return "MOVBU"
- case types.Int8:
- return "MOVB"
- case types.Uint16:
- return "MOVHU"
- case types.Int16:
- return "MOVH"
- case types.Uint32:
- return "MOVWU"
- case types.Int32:
- return "MOVW"
- case types.Uint64, types.Uintptr, ByteSlice, types.String, types.Uint, types.Int64, types.Int:
- return `MOVD`
- }
- } else {
- if f.ISA.GeneralPurposeRegisterSize == 32 {
- switch t {
- case types.Uint8:
- return "MOVBLZX"
- case types.Int8:
- return "MOVBLSX"
- case types.Uint16:
- return "MOVWLZX"
- case types.Int16:
- return "MOVWLSX"
- case types.Uint32, types.Uintptr, types.Int32, ByteSlice, types.String, types.Int, types.Uint:
- return "MOVL"
- }
- } else {
- switch t {
- case types.Uint8:
- return "MOVBQZX"
- case types.Int8:
- return "MOVBQSX"
- case types.Uint16:
- return "MOVWQZX"
- case types.Int16:
- return "MOVWQSX"
- case types.Uint32:
- return "MOVLQZX"
- case types.Int32:
- return "MOVLQSX"
- case types.Int64, types.Uint64, types.Uintptr, ByteSlice, types.String, types.Int, types.Uint:
- return "MOVQ"
- }
- }
- }
- panic(fmt.Sprint("Unknown type: ", t))
- }
- func (f *Function) LoadUnsignedBytesFromMemory(addr string, n int, dest Register) {
- defer f.AddTrailingComment(dest, "=", n, "byte(s) from the memory pointed to by", addr)
- switch n {
- case 1:
- if dest.Size != f.ISA.GeneralPurposeRegisterSize {
- panic(fmt.Sprintf("cannot load %d bytes into vector register", n))
- }
- f.instr(f.MemLoadForBasicType(types.Byte), addr, dest)
- case 2:
- if dest.Size != f.ISA.GeneralPurposeRegisterSize {
- panic(fmt.Sprintf("cannot load %d bytes into vector register", n))
- }
- f.instr(f.MemLoadForBasicType(types.Uint16), addr, dest)
- case 4:
- if dest.Size != f.ISA.GeneralPurposeRegisterSize {
- panic(fmt.Sprintf("cannot load %d bytes into vector register", n))
- }
- f.instr(f.MemLoadForBasicType(types.Uint32), addr, dest)
- case 8:
- if dest.Size != f.ISA.GeneralPurposeRegisterSize {
- panic(fmt.Sprintf("cannot load %d bytes into vector register", n))
- }
- if dest.Size*8 > f.ISA.GeneralPurposeRegisterSize {
- panic(fmt.Sprintf("cannot load %d bytes into %d bit register", n, dest.Size))
- }
- f.instr(f.MemLoadForBasicType(types.Uint64), addr, dest)
- default:
- if dest.Size*8 != f.ISA.GeneralPurposeRegisterSize {
- panic(fmt.Sprintf("cannot load %d bytes into %d bit register", n, dest.Size))
- }
- f.instr(f.MemLoadForBasicType(types.Uintptr), addr, dest)
- }
- }
- func (f *Function) LoadParam(p string) Register {
- r := f.Reg()
- for i, q := range f.Params {
- if q.Name == p {
- offset := f.ParamOffsets[i]
- mov := f.MemLoadForBasicType(q.Type)
- f.instr(mov, fmt.Sprintf("%s+%d(FP)", q.Name, offset), r)
- f.AddTrailingComment("load the function parameter", p, "into", r)
- }
- }
- return r
- }
- func (f *Function) set_return_value(offset int, q FunctionParam, val any) {
- mov := f.MemLoadForBasicType(q.Type)
- vr := val_repr_for_arithmetic(val)
- defer f.AddTrailingComment("save the value:", val, "to the function return parameter:", q.Name)
- if f.ISA.Goarch == ARM64 && strings.HasPrefix(vr, `$`) {
- // no way to store an immediate value into a memory address
- temp := f.Reg()
- f.SetRegisterTo(temp, val)
- defer f.ReleaseReg(temp)
- vr = temp.Name
- }
- f.instr(mov, vr, fmt.Sprintf("%s+%d(FP)", q.Name, offset))
- }
- func (f *Function) SetReturnValue(p string, val any) {
- for i, q := range f.Returns {
- if q.Name == p {
- f.set_return_value(f.ReturnOffsets[i], q, val)
- break
- }
- }
- }
- func (f *Function) CountTrailingZeros(r, ans Register) {
- if r.Size == f.ISA.GeneralPurposeRegisterSize {
- if f.ISA.Goarch == ARM64 {
- f.instr("RBIT", r, r)
- f.AddTrailingComment("reverse the bits")
- f.instr("CLZ", r, ans)
- f.AddTrailingComment(ans, "= number of leading zeros in", r)
- } else {
- f.instr("BSFL", ans, ans)
- f.AddTrailingComment(ans, "= number of trailing zeros in", r)
- }
- } else {
- panic("cannot count trailing zeros in a vector register")
- }
- }
- func (f *Function) Comment(x ...any) {
- f.Instructions = append(f.Instructions, space_join("\t//", x...))
- }
- func shrn8b_immediate4(a, b Register) uint32 {
- return (0x0f0c84 << 8) | (a.ARMId()<<5 | b.ARMId())
- }
- func encode_cmgt16b(a, b, dest Register) (ans uint32) {
- return 0x271<<21 | b.ARMId()<<16 | 0xd<<10 | a.ARMId()<<5 | dest.ARMId()
- }
- func (f *Function) MaskForCountDestructive(vec, ans Register) {
- // vec is clobbered by this function
- f.Comment("Count the number of bytes to the first 0xff byte and put the result in", ans)
- if f.ISA.Goarch == ARM64 {
- // See https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
- f.Comment("Go assembler doesn't support the shrn instruction, below we have: shrn.8b", vec, vec, "#4")
- f.Comment("It is shifting right by four bits in every 16 bit word and truncating to 8 bits storing the result in the lower 64 bits of", vec)
- f.instr("WORD", fmt.Sprintf("$0x%x", shrn8b_immediate4(vec, vec)))
- f.instr("FMOVD", "F"+vec.Name[1:], ans)
- f.AddTrailingComment("Extract the lower 64 bits from", vec, "and put them into", ans)
- } else {
- if f.ISA.Bits == 128 {
- f.instr("PMOVMSKB", vec, ans)
- } else {
- f.instr("VPMOVMSKB", vec, ans)
- }
- f.AddTrailingComment(ans, "= mask of the highest bit in every byte in", vec)
- }
- }
- func (f *Function) shift_self(right bool, self, amt any) {
- op := ""
- if right {
- op = "SHRQ"
- if f.ISA.Goarch == ARM64 {
- op = "LSR"
- }
- } else {
- op = "SHLQ"
- if f.ISA.Goarch == ARM64 {
- op = "LSL"
- }
- }
- switch v := amt.(type) {
- case Register:
- if f.ISA.Goarch != ARM64 && v.Name != "CX" {
- panic("On Intel only the CX register can be used for shifts")
- }
- f.instr(op, v, self)
- default:
- f.instr(op, val_repr_for_arithmetic(v), self)
- }
- }
- func (f *Function) ShiftSelfRight(self, amt any) {
- f.shift_self(true, self, amt)
- }
- func (f *Function) ShiftSelfLeft(self, amt any) {
- f.shift_self(false, self, amt)
- }
- func (f *Function) ShiftMaskRightDestructive(mask, amt any) {
- // The amt register is clobbered by this function
- switch n := amt.(type) {
- case Register:
- if f.ISA.Goarch == ARM64 {
- f.Comment("The mask has 4 bits per byte, so multiply", n, "by 4")
- f.ShiftSelfLeft(n, 2)
- }
- f.ShiftSelfRight(mask, n)
- case int:
- if f.ISA.Goarch == ARM64 {
- n <<= 2
- }
- f.ShiftSelfRight(mask, n)
- default:
- panic(fmt.Sprintf("Cannot shift by: %s", amt))
- }
- }
- func (f *Function) CountLeadingZeroBytesInMask(src, ans Register) {
- if f.ISA.Goarch == ARM64 {
- f.CountTrailingZeros(src, ans)
- f.instr("UBFX", "$2", ans, "$30", ans)
- f.AddTrailingComment(ans, ">>= 2 (divide by 4)")
- } else {
- f.CountTrailingZeros(src, ans)
- }
- }
- func (f *Function) CountBytesToFirstMatchDestructive(vec, ans Register) {
- // vec is clobbered by this function
- f.Comment("Count the number of bytes to the first 0xff byte and put the result in", ans)
- f.MaskForCountDestructive(vec, ans)
- f.CountLeadingZeroBytesInMask(ans, ans)
- f.BlankLine()
- }
- func (f *Function) LoadParamLen(p string) Register {
- r := f.Reg()
- for i, q := range f.Params {
- if q.Name == p {
- offset := f.ParamOffsets[i]
- if q.Type == ByteSlice || q.Type == types.String {
- offset += int(f.ISA.Sizes.Sizeof(types.Typ[types.Uintptr]))
- }
- mov := f.MemLoadForBasicType(q.Type)
- f.instr(mov, fmt.Sprintf("%s_len+%d(FP)", q.Name, offset), r)
- f.AddTrailingComment("load the length of the function parameter", q.Name, "into", r)
- break
- }
- }
- return r
- }
- func (f *Function) unaligned_move() string {
- switch f.ISA.Goarch {
- case X86, AMD64:
- if f.ISA.Bits == 128 {
- return "MOVOU"
- }
- return "VMOVDQU"
- default:
- panic("Unknown arch: " + string(f.ISA.Goarch))
- }
- }
- func (f *Function) aligned_move() string {
- switch f.ISA.Goarch {
- case X86, AMD64:
- if f.ISA.Bits == 128 {
- return "MOVOA"
- }
- return "VMOVDQA"
- default:
- panic("Unknown arch: " + string(f.ISA.Goarch))
- }
- }
- func (f *Function) LoadPointerUnaligned(register_containing_pointer_value Register, dest Register) {
- addr := register_containing_pointer_value.AddressInRegister()
- if f.ISA.Goarch == ARM64 {
- f.instr(`VLD1`, addr, "["+dest.ARMFullWidth()+"]")
- } else {
- f.instr(f.unaligned_move(), addr, dest)
- }
- f.AddTrailingComment("load memory from the address in", register_containing_pointer_value, "to", dest)
- }
- func (f *Function) LoadPointerAligned(register_containing_pointer_value Register, dest Register) {
- addr := register_containing_pointer_value.AddressInRegister()
- if f.ISA.Goarch == ARM64 {
- f.instr(`VLD1`, addr, "["+dest.ARMFullWidth()+"]")
- } else {
- f.instr(f.aligned_move(), addr, dest)
- }
- f.AddTrailingComment("load memory from the address in", register_containing_pointer_value, "to", dest)
- }
- func (f *Function) StoreUnalignedToPointer(vec, register_containing_pointer_value Register) {
- if f.ISA.Goarch == ARM64 {
- f.instr(`VST1`, "["+vec.ARMFullWidth()+"]", fmt.Sprintf("(%s)", register_containing_pointer_value))
- } else {
- f.instr(f.unaligned_move(), vec, fmt.Sprintf("(%s)", register_containing_pointer_value))
- }
- f.AddTrailingComment("store the value of", vec, "in to the memory whose address is in:", register_containing_pointer_value)
- }
- func (f *Function) test_if_zero(a Register) {
- if f.ISA.Goarch == ARM64 {
- f.instr("AND", a, a, a)
- }
- switch a.Size {
- case 32:
- f.instr("TESTL", a, a)
- case 64:
- f.instr("TESTQ", a, a)
- case 128:
- f.instr("PTEST", a, a)
- default:
- f.instr("VPTEST", a, a)
- }
- f.AddTrailingComment("test if", a, "is zero")
- }
- func (f *Function) JumpTo(label string) {
- f.instr("JMP", label)
- f.AddTrailingComment("jump to:", label)
- }
- func (f *Function) jump_on_zero_check(a Register, label string, on_zero bool) {
- if f.ISA.Goarch == ARM64 {
- if a.Size > f.ISA.GeneralPurposeRegisterSize {
- temp := f.Vec()
- defer f.ReleaseReg(temp)
- f.instr("VDUP", a.Name+".D[1]", temp)
- f.AddTrailingComment(`duplicate the upper 64 bits of`, a, "into the lower and upper 64 bits of", temp)
- f.Or(a, temp, temp)
- a = f.Reg()
- defer f.ReleaseReg(a)
- f.instr("FMOVD", "F"+temp.Name[1:], a)
- f.AddTrailingComment(a, "= lower 64bits of", temp)
- }
- if on_zero {
- f.instr("CBZ", a, label)
- } else {
- f.instr("CBNZ", a, label)
- }
- } else {
- f.test_if_zero(a)
- if on_zero {
- f.instr("JZ", label)
- } else {
- f.instr("JNZ", label)
- }
- }
- }
- func (f *Function) JumpIfZero(a Register, label string) {
- f.jump_on_zero_check(a, label, true)
- f.AddTrailingComment("jump to:", label, "if", a, "is zero")
- }
- func (f *Function) JumpIfNonZero(a Register, label string) {
- f.jump_on_zero_check(a, label, false)
- f.AddTrailingComment("jump to:", label, "if", a, "is non-zero")
- }
- func (f *Function) compare(a, b Register) {
- if f.ISA.Goarch == ARM64 {
- f.instr("CMP", b, a)
- } else {
- if a.Size == 32 {
- f.instr("CMPL", a, b)
- } else {
- f.instr("CMPQ", a, b)
- }
- }
- f.AddTrailingComment("compare", a, "to", b)
- }
- func (f *Function) JumpIfLessThan(a, b Register, label string) {
- f.compare(a, b)
- if f.ISA.Goarch == ARM64 {
- f.instr("BLT", label)
- } else {
- f.instr("JLT", label)
- }
- f.AddTrailingComment("jump to:", label, "if", a, "<", b)
- }
- func (f *Function) JumpIfLessThanOrEqual(a, b Register, label string) {
- f.compare(a, b)
- if f.ISA.Goarch == ARM64 {
- f.instr("BLE", label)
- } else {
- f.instr("JLE", label)
- }
- f.AddTrailingComment("jump to:", label, "if", a, "<=", b)
- }
- func (f *Function) JumpIfEqual(a, b Register, label string) {
- f.compare(a, b)
- if f.ISA.Goarch == ARM64 {
- f.instr("BEQ", label)
- } else {
- f.instr("JE", label)
- }
- f.AddTrailingComment("jump to:", label, "if", a, "==", b)
- }
- func (f *Function) Or(a, b, dest Register) {
- if f.ISA.Goarch == ARM64 {
- f.instr("VORR", a.ARMFullWidth(), b.ARMFullWidth(), dest.ARMFullWidth())
- } else {
- if f.ISA.Bits == 128 {
- switch dest.Name {
- case b.Name:
- f.instr("POR", a, b)
- case a.Name:
- f.instr("POR", b, a)
- default:
- f.CopyRegister(b, dest)
- f.instr("POR", a, dest)
- }
- } else {
- f.instr("VPOR", a, b, dest)
- }
- }
- f.AddTrailingComment(dest, "=", a, "|", b, "(bitwise)")
- }
- func (f *Function) And(a, b, dest Register) {
- if f.ISA.Goarch == ARM64 {
- f.instr("VAND", a.ARMFullWidth(), b.ARMFullWidth(), dest.ARMFullWidth())
- } else {
- if f.ISA.Bits == 128 {
- switch dest.Name {
- case b.Name:
- f.instr("PAND", a, b)
- case a.Name:
- f.instr("PAND", b, a)
- default:
- f.CopyRegister(b, dest)
- f.instr("PAND", a, dest)
- }
- } else {
- f.instr("VPAND", a, b, dest)
- }
- }
- f.AddTrailingComment(dest, "=", a, "&", b, "(bitwise)")
- }
- func (f *Function) ClearRegisterToZero(r Register) {
- defer func() { f.AddTrailingComment("set", r, "to zero") }()
- if f.ISA.Goarch == ARM64 {
- if r.Size == f.ISA.GeneralPurposeRegisterSize {
- f.instr(f.MemLoadForBasicType(types.Int32), val_repr_for_arithmetic(0), r)
- } else {
- f.instr("VMOVI", "$0", r.ARMFullWidth())
- }
- return
- }
- switch r.Size {
- case 128:
- f.instr("PXOR", r, r)
- case f.ISA.GeneralPurposeRegisterSize:
- if r.Size == 32 {
- f.instr("XORL", r, r)
- } else {
- f.instr("XORQ", r, r)
- }
- case 256:
- f.instr("VPXOR", r, r, r)
- }
- }
- func (f *Function) AllOnesRegister(r Register) {
- switch r.Size {
- default:
- f.CmpEqEpi8(r, r, r)
- case f.ISA.GeneralPurposeRegisterSize:
- if f.ISA.Goarch == ARM64 {
- f.instr("MOVD", "$-1", r)
- } else {
- switch r.Size {
- case 32:
- f.instr("MOVL", "$0xFFFFFFFF")
- case 64:
- f.instr("MOVQ", "$0xFFFFFFFFFFFFFFFF")
- }
- }
- f.AddTrailingComment(r, "= all ones")
- }
- }
- func (f *Function) CopyRegister(a, ans Register) {
- if a.Size != ans.Size {
- panic("Can only copy registers of equal sizes")
- }
- if a.Size > f.ISA.GeneralPurposeRegisterSize {
- if f.ISA.Goarch == ARM64 {
- f.instr("VDUP", a.Name[1:]+".D2", ans.Name+".D2")
- } else {
- f.instr(f.aligned_move(), a, ans)
- }
- } else {
- if f.ISA.Goarch == ARM64 {
- f.instr("MOVD", a, ans)
- } else {
- if a.Size == 32 {
- f.instr("MOVL", a, ans)
- } else {
- f.instr("MOVQ", a, ans)
- }
- }
- }
- f.AddTrailingComment(ans, "=", a)
- }
- func (f *Function) SetRegisterTo(self Register, val any) {
- switch v := val.(type) {
- case Register:
- f.CopyRegister(self, v)
- case int:
- if self.Size != f.ISA.GeneralPurposeRegisterSize {
- panic("TODO: Cannot yet set constant values in vector registers")
- }
- switch v {
- case 0:
- f.ClearRegisterToZero(self)
- case -1:
- f.AllOnesRegister(self)
- default:
- if f.ISA.Goarch == ARM64 {
- f.instr("MOVD", val_repr_for_arithmetic(v), self)
- } else {
- f.instr("MOVL", val_repr_for_arithmetic(v), self)
- }
- f.AddTrailingComment(self, "= ", v)
- }
- case string:
- f.instr(f.MemLoadForBasicType(types.Uintptr), v)
- f.AddTrailingComment(self, "=", self.Size/8, "bytes at the address", v)
- default:
- panic(fmt.Sprintf("cannot set register to value: %#v", val))
- }
- }
- func (r Register) ARMId() uint32 {
- num, err := strconv.Atoi(r.Name[1:])
- if err != nil {
- panic(err)
- }
- return uint32(num)
- }
- func (f *Function) cmp(a, b, ans Register, op, c_rep string) {
- if a.Size != b.Size || a.Size != ans.Size {
- panic("Can only compare registers of equal sizes")
- }
- if f.ISA.Goarch == ARM64 {
- if op == "EQ" {
- f.instr("VCMEQ", a.ARMFullWidth(), b.ARMFullWidth(), ans.ARMFullWidth())
- } else {
- f.instr("WORD", fmt.Sprintf("$0x%x", encode_cmgt16b(a, b, ans)))
- }
- } else {
- fop := `PCMP` + op + "B"
- if f.ISA.Bits == 128 {
- if op == "EQ" {
- switch ans.Name {
- case a.Name:
- f.instr(fop, b, ans)
- case b.Name:
- f.instr(fop, a, ans)
- default:
- f.CopyRegister(a, ans)
- f.instr(fop, b, ans)
- }
- } else {
- // order matters, we want destination aka 2nd arg to be both a and ans
- switch ans.Name {
- case a.Name:
- f.instr(fop, b, a)
- case b.Name:
- vec := f.Vec(a.Size)
- f.CopyRegister(a, vec)
- f.instr(fop, b, vec)
- f.CopyRegister(vec, b)
- f.ReleaseReg(vec)
- default:
- f.CopyRegister(a, ans)
- f.instr(fop, b, ans)
- }
- }
- } else {
- f.instr("V"+fop, b, a, ans)
- }
- }
- f.AddTrailingComment(ans, "= 0xff on every byte where", a.Name+"[n]", c_rep, b.Name+"[n] and zero elsewhere")
- }
- func (f *Function) CmpGtEpi8(a, b, ans Register) {
- f.cmp(a, b, ans, "GT", ">")
- }
- func (f *Function) CmpLtEpi8(a, b, ans Register) {
- f.cmp(b, a, ans, "GT", ">")
- }
- func (f *Function) CmpEqEpi8(a, b, ans Register) {
- f.cmp(a, b, ans, "EQ", "==")
- }
- func (f *Function) Set1Epi8(val any, vec Register) {
- if vec.Size != 128 && vec.Size != 256 {
- panic("Set1Epi8 only works on vector registers")
- }
- do_shuffle_load := func(r Register) {
- f.instr("MOVL", r, vec)
- shuffle_mask := f.Vec()
- f.ClearRegisterToZero(shuffle_mask)
- f.instr("PSHUFB", shuffle_mask, vec)
- f.ReleaseReg(shuffle_mask)
- }
- switch v := val.(type) {
- default:
- panic("unknown type for set1_epi8")
- case int:
- switch v {
- case 0:
- f.ClearRegisterToZero(vec)
- return
- case -1:
- f.AllOnesRegister(vec)
- return
- }
- r := f.Reg()
- defer f.ReleaseReg(r)
- f.SetRegisterTo(r, v)
- f.Set1Epi8(r, vec)
- case Register:
- f.Comment("Set all bytes of", vec, "to the lowest byte in", v)
- if v.Size != f.ISA.GeneralPurposeRegisterSize {
- panic("Can only set1_epi8 from a general purpose register")
- }
- if f.ISA.Goarch == ARM64 {
- f.instr("VMOV", v, vec.ARMFullWidth())
- } else {
- switch vec.Size {
- case 128:
- do_shuffle_load(v)
- case 256:
- temp := f.Vec(128)
- defer f.ReleaseReg(temp)
- f.instr("VMOVD", v, temp)
- f.instr("VPBROADCASTB", temp, vec)
- }
- }
- defer f.Comment()
- case string:
- f.Comment("Set all bytes of", vec, "to the first byte in", v)
- if f.ISA.Goarch == ARM64 {
- r := f.LoadParam(v)
- f.instr("VMOV", r, vec.ARMFullWidth())
- f.ReleaseReg(r)
- return
- }
- switch vec.Size {
- case 128:
- r := f.LoadParam(v)
- defer f.ReleaseReg(r)
- do_shuffle_load(r)
- case 256:
- f.instr("VPBROADCASTB", f.ParamPos(v), vec)
- }
- defer f.Comment()
- }
- }
- func (isa *ISA) structsize(vs []*types.Var) int64 {
- n := len(vs)
- if n == 0 {
- return 0
- }
- offsets := isa.Sizes.Offsetsof(vs)
- return offsets[n-1] + isa.Sizes.Sizeof(vs[n-1].Type())
- }
- func tuplevars(params []FunctionParam) []*types.Var {
- vars := make([]*types.Var, len(params))
- for i, p := range params {
- vars[i] = AsVar(p.Type, p.Name)
- }
- return vars
- }
- func NewFunction(isa ISA, name, description string, params, returns []FunctionParam) *Function {
- name = fmt.Sprintf("%s_%d", name, isa.Bits)
- ans := Function{Name: name, Desc: description, Params: params, Returns: returns, ISA: isa}
- vars := tuplevars(params)
- vars = append(vars, types.NewParam(0, nil, "sentinel", types.Typ[types.Uint64]))
- offsets := isa.Sizes.Offsetsof(vars)
- n := len(params)
- paramssize := int(offsets[n])
- ans.ParamOffsets = make([]int, n)
- ans.Size = paramssize
- for i := range ans.ParamOffsets {
- ans.ParamOffsets[i] = int(offsets[i])
- }
- if len(returns) > 0 {
- vars = tuplevars(returns)
- offsets = isa.Sizes.Offsetsof(vars)
- ans.ReturnOffsets = make([]int, len(offsets))
- for i, off := range offsets {
- ans.ReturnOffsets[i] = paramssize + int(off)
- }
- ans.Size += int(isa.structsize(vars))
- }
- return &ans
- }
- func (s *Function) ParamPos(name string) string {
- for n, i := range s.Params {
- if i.Name == name {
- return fmt.Sprintf("%s+%d(FP)", i.Name, s.ParamOffsets[n])
- }
- }
- panic(fmt.Errorf("Unknown parameter: %s", name))
- }
- func (s *Function) print_signature(w io.Writer) {
- fmt.Fprintf(w, "func %s(", s.Name)
- print_p := func(p FunctionParam) {
- var tname string
- if p.Type == ByteSlice {
- tname = "[]byte"
- } else {
- tname = types.Universe.Lookup(types.Typ[p.Type].String()).String()
- }
- tname, _ = strings.CutPrefix(tname, "type ")
- fmt.Fprintf(w, "%s %s", p.Name, tname)
- }
- for i, p := range s.Params {
- if i > 0 {
- fmt.Fprint(w, ", ")
- }
- print_p(p)
- }
- fmt.Fprint(w, ")")
- if len(s.Returns) == 0 {
- return
- }
- fmt.Fprint(w, " (")
- for i, p := range s.Returns {
- if i > 0 {
- fmt.Fprint(w, ", ")
- }
- print_p(p)
- }
- fmt.Fprint(w, ")")
- }
- func (s *Function) OutputStub(w io.Writer) {
- if s.Desc != "" {
- fmt.Fprintln(w, "// "+s.Desc)
- fmt.Fprintln(w, "//")
- }
- if s.ISA.HasSIMD {
- fmt.Fprintln(w, "//go:noescape")
- }
- s.print_signature(w)
- if s.ISA.HasSIMD {
- fmt.Fprintln(w)
- } else {
- fmt.Fprintln(w, "{")
- fmt.Fprintln(w, "panic(\"No SIMD implementations for this function\")")
- fmt.Fprintln(w, "}")
- }
- fmt.Fprintln(w)
- }
- func (s *Function) BlankLine() { s.Instructions = append(s.Instructions, "") }
- func (s *Function) Return() {
- if s.Used256BitReg {
- s.instr("VZEROUPPER")
- s.AddTrailingComment("zero upper bits of AVX registers to avoid dependencies when switching between SSE and AVX code")
- }
- s.instr("RET")
- s.AddTrailingComment("return from function")
- s.BlankLine()
- }
- func (s *Function) end_function() {
- amt := 16
- if s.Used256BitReg {
- amt = 32
- }
- s.instr(fmt.Sprintf("PCALIGN $%d\n", amt))
- s.Return()
- }
- func (s *Function) Label(name string) {
- s.Instructions = append(s.Instructions, name+":")
- s.AddTrailingComment("jump target")
- }
- func space_join(prefix string, x ...any) string {
- b := strings.Builder{}
- if prefix != "" {
- b.WriteString(prefix)
- b.WriteByte(' ')
- }
- for _, x := range x {
- b.WriteString(fmt.Sprint(x))
- b.WriteByte(' ')
- }
- return b.String()
- }
- func (s *Function) AddTrailingComment(x ...any) {
- s.Instructions[len(s.Instructions)-1] += space_join(" //", x...)
- }
- func val_repr_for_arithmetic(val any) (ans string) {
- switch v := val.(type) {
- case int:
- if v < 0 {
- return fmt.Sprintf("$%d", v)
- }
- return fmt.Sprintf("$0x%x", v)
- case string:
- return val.(string)
- case fmt.Stringer:
- return val.(fmt.Stringer).String()
- default:
- return fmt.Sprint(val)
- }
- }
- func (f *Function) AndSelf(self Register, val any) {
- switch f.ISA.Goarch {
- case ARM64:
- f.instr("AND", val_repr_for_arithmetic(val), self)
- case AMD64:
- f.instr("ANDQ", val_repr_for_arithmetic(val), self)
- case X86:
- f.instr("ANDL", val_repr_for_arithmetic(val), self)
- default:
- panic("Unknown architecture for AND")
- }
- f.AddTrailingComment(self, "&=", val)
- }
- func (f *Function) NegateSelf(self Register) {
- if f.ISA.Goarch == "ARM64" {
- f.instr("NEG", self, self)
- } else {
- f.instr("NEGQ", self)
- }
- f.AddTrailingComment(self, "*= -1")
- }
- func (f *Function) AddToSelf(self Register, val any) {
- f.instr(f.ISA.NativeAdd(), val_repr_for_arithmetic(val), self) // pos += sizeof(vec)
- f.AddTrailingComment(self, "+=", val)
- }
- func (f *Function) SubtractFromSelf(self Register, val any) {
- f.instr(f.ISA.NativeSubtract(), val_repr_for_arithmetic(val), self) // pos += sizeof(vec)
- f.AddTrailingComment(self, "-=", val)
- }
- func (s *Function) SetRegisterToOffset(dest Register, base_register Register, constant_offset int, offset_register Register) {
- if s.ISA.Goarch == ARM64 {
- s.SetRegisterTo(dest, constant_offset)
- s.AddToSelf(dest, base_register)
- s.AddToSelf(dest, offset_register)
- } else {
- addr := fmt.Sprintf("%d(%s)(%s*1)", constant_offset, base_register, offset_register)
- s.instr(s.ISA.LEA(), addr, dest)
- s.AddTrailingComment(dest, "=", base_register, "+", offset_register, "+", constant_offset)
- }
- }
- func (s *Function) OutputASM(w io.Writer) {
- if !s.ISA.HasSIMD {
- return
- }
- fmt.Fprint(w, "// ")
- s.print_signature(w)
- fmt.Fprintf(w, "\nTEXT ·%s(SB), NOSPLIT|TOPFRAME|NOFRAME, $0-%d\n", s.Name, s.Size)
- has_trailing_return := false
- for _, i := range s.Instructions {
- if len(i) == 0 {
- continue
- }
- if strings.HasPrefix(i, "\tRET ") {
- has_trailing_return = true
- } else {
- has_trailing_return = false
- }
- }
- if !has_trailing_return {
- s.Return()
- }
- for _, i := range s.Instructions {
- fmt.Fprintln(w, i)
- }
- fmt.Fprintln(w)
- }
- type State struct {
- ISA ISA
- ActiveFunction *Function
- ASMOutput, StubOutput strings.Builder
- TestASMOutput, TestStubOutput strings.Builder
- }
- var package_name = "simdstring"
- func NewState(isa ISA, build_tags ...string) *State {
- ans := &State{ISA: isa}
- if len(build_tags) == 0 {
- build_tags = append(build_tags, string(isa.Goarch))
- }
- build_tag := func(w io.Writer, is_test bool) {
- fmt.Fprintf(w, "//go:build %s\n", strings.Join(build_tags, " "))
- }
- asm := func(w io.Writer) {
- fmt.Fprintln(w, "// Generated by generate.go do not edit")
- fmt.Fprintln(w, "// vim: ft=goasm")
- build_tag(w, w == &ans.TestASMOutput)
- fmt.Fprintln(w, "\n#include \"go_asm.h\"")
- fmt.Fprintln(w, "#include \"textflag.h\"")
- fmt.Fprintln(w)
- }
- asm(&ans.ASMOutput)
- asm(&ans.TestASMOutput)
- stub := func(w io.Writer) {
- fmt.Fprintln(w, "// Generated by generate.go do not edit")
- build_tag(w, w == &ans.TestStubOutput)
- fmt.Fprintln(w, "\npackage "+package_name)
- fmt.Fprintln(w)
- }
- stub(&ans.StubOutput)
- stub(&ans.TestStubOutput)
- return ans
- }
- func (s *State) OutputFunction() {
- if s.ActiveFunction == nil {
- return
- }
- if strings.HasPrefix(s.ActiveFunction.Name, "test_") {
- s.ActiveFunction.OutputASM(&s.TestASMOutput)
- s.ActiveFunction.OutputStub(&s.TestStubOutput)
- } else {
- s.ActiveFunction.OutputASM(&s.ASMOutput)
- s.ActiveFunction.OutputStub(&s.StubOutput)
- }
- s.ActiveFunction = nil
- }
- func (s *State) NewFunction(name, description string, params, returns []FunctionParam) *Function {
- s.OutputFunction()
- s.ActiveFunction = NewFunction(s.ISA, name, description, params, returns)
- s.ActiveFunction.UsedRegisters = make(map[Register]bool)
- return s.ActiveFunction
- }
- func (f *Function) load_vec_from_param(param string) Register {
- src := f.LoadParam(param)
- vec := f.Vec()
- f.LoadPointerUnaligned(src, vec)
- f.ReleaseReg(src)
- return vec
- }
- func (f *Function) store_vec_in_param(vec Register, param string) {
- ans := f.LoadParam(param)
- f.StoreUnalignedToPointer(vec, ans)
- f.ReleaseReg(ans)
- }
- func (s *State) test_load() {
- f := s.NewFunction("test_load_asm", "Test loading of vector register", []FunctionParam{{"src", ByteSlice}, {"ans", ByteSlice}}, nil)
- if !s.ISA.HasSIMD {
- return
- }
- vec := f.load_vec_from_param("src")
- f.store_vec_in_param(vec, `ans`)
- }
- func (s *State) test_set1_epi8() {
- f := s.NewFunction("test_set1_epi8_asm", "Test broadcast of byte into vector", []FunctionParam{{"b", types.Byte}, {"ans", ByteSlice}}, nil)
- if !s.ISA.HasSIMD {
- return
- }
- vec := f.Vec()
- r := f.LoadParam("b")
- q := f.Reg()
- f.SetRegisterTo(q, int(' '))
- f.JumpIfEqual(r, q, "space")
- f.SetRegisterTo(q, 11)
- f.JumpIfEqual(r, q, "eleven")
- f.Set1Epi8("b", vec)
- f.store_vec_in_param(vec, `ans`)
- f.Return()
- f.Label("space")
- f.Set1Epi8(int(' '), vec)
- f.store_vec_in_param(vec, `ans`)
- f.Return()
- f.Label("eleven")
- f.Set1Epi8(-1, vec)
- f.store_vec_in_param(vec, `ans`)
- f.Return()
- }
- func (s *State) test_cmpeq_epi8() {
- f := s.NewFunction("test_cmpeq_epi8_asm", "Test byte comparison of two vectors", []FunctionParam{{"a", ByteSlice}, {"b", ByteSlice}, {"ans", ByteSlice}}, nil)
- if !s.ISA.HasSIMD {
- return
- }
- a := f.load_vec_from_param("a")
- b := f.load_vec_from_param("b")
- f.CmpEqEpi8(a, b, a)
- f.store_vec_in_param(a, "ans")
- }
- func (s *State) test_cmplt_epi8() {
- f := s.NewFunction(
- "test_cmplt_epi8_asm", "Test byte comparison of two vectors", []FunctionParam{{"a", ByteSlice}, {"b", ByteSlice}, {"which", types.Int}, {"ans", ByteSlice}}, nil)
- if !s.ISA.HasSIMD {
- return
- }
- which := f.LoadParam("which")
- a := f.load_vec_from_param("a")
- b := f.load_vec_from_param("b")
- r := f.Reg()
- f.SetRegisterTo(r, 1)
- f.JumpIfEqual(which, r, "one")
- f.SetRegisterTo(r, 2)
- f.JumpIfEqual(which, r, "two")
- ans := f.Vec()
- f.CmpLtEpi8(a, b, ans)
- f.store_vec_in_param(ans, "ans")
- f.Return()
- f.Label("one")
- f.CmpLtEpi8(a, b, a)
- f.store_vec_in_param(a, "ans")
- f.Return()
- f.Label("two")
- f.CmpLtEpi8(a, b, b)
- f.store_vec_in_param(b, "ans")
- f.Return()
- }
- func (s *State) test_or() {
- f := s.NewFunction("test_or_asm", "Test OR of two vectors", []FunctionParam{{"a", ByteSlice}, {"b", ByteSlice}, {"ans", ByteSlice}}, nil)
- if !s.ISA.HasSIMD {
- return
- }
- a := f.load_vec_from_param("a")
- b := f.load_vec_from_param("b")
- f.Or(a, b, a)
- f.store_vec_in_param(a, "ans")
- }
- func (s *State) test_jump_if_zero() {
- f := s.NewFunction("test_jump_if_zero_asm", "Test jump on zero register", []FunctionParam{{"a", ByteSlice}}, []FunctionParam{{"ans", types.Int}})
- if !s.ISA.HasSIMD {
- return
- }
- a := f.load_vec_from_param("a")
- f.JumpIfZero(a, "zero")
- f.SetReturnValue("ans", 1)
- f.Return()
- f.Label("zero")
- f.SetReturnValue("ans", 0)
- }
- func (s *State) test_count_to_match() {
- f := s.NewFunction("test_count_to_match_asm", "Test counting bytes to first match", []FunctionParam{{"a", ByteSlice}, {"b", types.Byte}}, []FunctionParam{{"ans", types.Int}})
- if !s.ISA.HasSIMD {
- return
- }
- a := f.load_vec_from_param("a")
- b := f.Vec()
- f.Set1Epi8("b", b)
- f.CmpEqEpi8(a, b, b)
- f.JumpIfZero(b, "fail")
- res := f.Reg()
- f.CountBytesToFirstMatchDestructive(b, res)
- f.SetReturnValue("ans", res)
- f.Return()
- f.Label("fail")
- f.SetReturnValue("ans", -1)
- }
- func (isa *ISA) LEA() string {
- if isa.GeneralPurposeRegisterSize == 32 {
- return "LEAL"
- }
- return "LEAQ"
- }
- func (s *State) index_func(f *Function, test_bytes func(bytes_to_test, test_ans Register)) {
- pos := f.Reg()
- test_ans := f.Vec()
- bytes_to_test := f.Vec()
- data_start := f.LoadParam(`data`)
- limit := f.LoadParamLen(`data`)
- f.JumpIfZero(limit, "fail")
- f.AddToSelf(limit, data_start)
- mask := f.Reg()
- vecsz := f.ISA.Bits / 8
- f.CopyRegister(data_start, pos)
- func() {
- unaligned_bytes := f.RegForShifts()
- defer f.ReleaseReg(unaligned_bytes)
- f.CopyRegister(data_start, unaligned_bytes)
- f.AndSelf(unaligned_bytes, vecsz-1)
- f.SubtractFromSelf(pos, unaligned_bytes)
- f.Comment(fmt.Sprintf("%s is now aligned to a %d byte boundary so loading from it is safe", pos, vecsz))
- f.LoadPointerAligned(pos, bytes_to_test)
- test_bytes(bytes_to_test, test_ans)
- f.MaskForCountDestructive(test_ans, mask)
- f.Comment("We need to shift out the possible extra bytes at the start of the string caused by the unaligned read")
- f.ShiftMaskRightDestructive(mask, unaligned_bytes)
- f.JumpIfZero(mask, "loop_start")
- f.CopyRegister(data_start, pos)
- f.JumpTo("byte_found_in_mask")
- }()
- f.Comment("Now loop over aligned blocks")
- f.Label("loop_start")
- f.AddToSelf(pos, vecsz)
- f.JumpIfLessThanOrEqual(limit, pos, "fail")
- f.LoadPointerAligned(pos, bytes_to_test)
- test_bytes(bytes_to_test, test_ans)
- f.JumpIfNonZero(test_ans, "byte_found_in_vec")
- f.JumpTo("loop_start")
- f.Label("byte_found_in_vec")
- f.MaskForCountDestructive(test_ans, mask)
- f.Comment("Get the result from", mask, "and return it")
- f.Label("byte_found_in_mask")
- f.CountLeadingZeroBytesInMask(mask, mask)
- f.AddToSelf(mask, pos)
- f.JumpIfLessThanOrEqual(limit, mask, "fail")
- f.SubtractFromSelf(mask, data_start)
- f.SetReturnValue("ans", mask)
- f.Return()
- f.Label("fail")
- f.SetReturnValue("ans", -1)
- f.Return()
- }
- func (s *State) indexbyte2_body(f *Function) {
- b1 := f.Vec()
- b2 := f.Vec()
- f.Set1Epi8("b1", b1)
- f.Set1Epi8("b2", b2)
- test_bytes := func(bytes_to_test, test_ans Register) {
- f.CmpEqEpi8(bytes_to_test, b1, test_ans)
- f.CmpEqEpi8(bytes_to_test, b2, bytes_to_test)
- f.Or(test_ans, bytes_to_test, test_ans)
- }
- s.index_func(f, test_bytes)
- }
- func (s *State) indexbyte2() {
- f := s.NewFunction("index_byte2_asm", "Find the index of either of two bytes", []FunctionParam{{"data", ByteSlice}, {"b1", types.Byte}, {"b2", types.Byte}}, []FunctionParam{{"ans", types.Int}})
- if s.ISA.HasSIMD {
- s.indexbyte2_body(f)
- }
- f = s.NewFunction("index_byte2_string_asm", "Find the index of either of two bytes", []FunctionParam{{"data", types.String}, {"b1", types.Byte}, {"b2", types.Byte}}, []FunctionParam{{"ans", types.Int}})
- if s.ISA.HasSIMD {
- s.indexbyte2_body(f)
- }
- }
- func (s *State) indexbyte_body(f *Function) {
- b := f.Vec()
- f.Set1Epi8("b", b)
- test_bytes := func(bytes_to_test, test_ans Register) {
- f.CmpEqEpi8(bytes_to_test, b, test_ans)
- }
- s.index_func(f, test_bytes)
- }
- func (s *State) indexbyte() {
- f := s.NewFunction("index_byte_asm", "Find the index of a byte", []FunctionParam{{"data", ByteSlice}, {"b", types.Byte}}, []FunctionParam{{"ans", types.Int}})
- if s.ISA.HasSIMD {
- s.indexbyte_body(f)
- }
- f = s.NewFunction("index_byte_string_asm", "Find the index of a byte", []FunctionParam{{"data", types.String}, {"b", types.Byte}}, []FunctionParam{{"ans", types.Int}})
- if s.ISA.HasSIMD {
- s.indexbyte_body(f)
- }
- }
- func (s *State) indexc0_body(f *Function) {
- lower := f.Vec()
- upper := f.Vec()
- del := f.Vec()
- f.Set1Epi8(-1, lower)
- f.Set1Epi8(int(' '), upper)
- f.Set1Epi8(0x7f, del)
- test_bytes := func(bytes_to_test, test_ans Register) {
- temp := f.Vec()
- defer f.ReleaseReg(temp)
- f.CmpEqEpi8(bytes_to_test, del, test_ans)
- f.CmpLtEpi8(bytes_to_test, upper, temp)
- f.CmpGtEpi8(bytes_to_test, lower, bytes_to_test)
- f.And(temp, bytes_to_test, bytes_to_test)
- f.Or(test_ans, bytes_to_test, test_ans)
- }
- s.index_func(f, test_bytes)
- }
- func (s *State) indexc0() {
- f := s.NewFunction("index_c0_asm", "Find the index of a C0 control code", []FunctionParam{{"data", ByteSlice}}, []FunctionParam{{"ans", types.Int}})
- if s.ISA.HasSIMD {
- s.indexc0_body(f)
- }
- f = s.NewFunction("index_c0_string_asm", "Find the index of a C0 control code", []FunctionParam{{"data", types.String}}, []FunctionParam{{"ans", types.Int}})
- if s.ISA.HasSIMD {
- s.indexc0_body(f)
- }
- }
- func (s *State) Generate() {
- s.test_load()
- s.test_set1_epi8()
- s.test_cmpeq_epi8()
- s.test_cmplt_epi8()
- s.test_or()
- s.test_jump_if_zero()
- s.test_count_to_match()
- s.indexbyte2()
- s.indexc0()
- s.indexbyte()
- s.OutputFunction()
- }
- // CLI {{{
- func exit(msg any) {
- fmt.Fprintf(os.Stderr, "%s\n", msg)
- os.Exit(1)
- }
- func write_file(name, text string) {
- b := unsafe.Slice(unsafe.StringData(text), len(text))
- if existing, err := os.ReadFile(name); err == nil && bytes.Equal(existing, b) {
- return
- }
- if err := os.WriteFile(name, b, 0664); err != nil {
- exit(err)
- }
- }
- func do_one(s *State) {
- s.Generate()
- if s.ISA.HasSIMD {
- write_file(fmt.Sprintf("asm_%d_%s_generated.s", s.ISA.Bits, s.ISA.Goarch), s.ASMOutput.String())
- write_file(fmt.Sprintf("asm_%d_%s_generated_test.s", s.ISA.Bits, s.ISA.Goarch), s.TestASMOutput.String())
- }
- write_file(fmt.Sprintf("asm_%d_%s_generated.go", s.ISA.Bits, s.ISA.Goarch), s.StubOutput.String())
- write_file(fmt.Sprintf("asm_%d_%s_generated_test.go", s.ISA.Bits, s.ISA.Goarch), s.TestStubOutput.String())
- }
- func create_isa(arch Arch, bits int) ISA {
- switch arch {
- case AMD64:
- return CreateAMD64ISA(bits)
- case ARM64:
- return CreateARM64ISA(bits)
- }
- panic("Unknown ISA arch")
- }
- func main() {
- output_dir, err := os.Getwd()
- if err != nil {
- exit(err)
- }
- if len(os.Args) > 1 {
- if output_dir, err = filepath.Abs(os.Args[len(os.Args)-1]); err != nil {
- exit(err)
- }
- }
- if err = os.MkdirAll(output_dir, 0755); err != nil {
- exit(err)
- }
- if err = os.Chdir(output_dir); err != nil {
- exit(err)
- }
- package_name = filepath.Base(output_dir)
- simd_arches := []Arch{AMD64, ARM64}
- a := make([]string, len(simd_arches))
- for i, arch := range simd_arches {
- a[i] = string(arch)
- }
- no_simd_build_tag := fmt.Sprintf("!(%s)", strings.Join(a, "||"))
- for _, bits := range []int{128, 256} {
- for _, arch := range simd_arches {
- s := NewState(create_isa(arch, bits))
- fmt.Fprintf(&s.StubOutput, "const HasSIMD%dCode = %#v\n", bits, s.ISA.HasSIMD)
- do_one(s)
- }
- s := NewState(CreateAMD64ISA(bits), no_simd_build_tag)
- s.ISA.HasSIMD = false
- fmt.Fprintf(&s.StubOutput, "const HasSIMD%dCode = false\n", bits)
- s.Generate()
- write_file(fmt.Sprintf("asm_other_%d_generated.go", bits), s.StubOutput.String())
- write_file(fmt.Sprintf("asm_other_%d_generated_test.go", bits), s.TestStubOutput.String())
- }
- }
- // }}}
|