123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598 |
- """
- Copyright (C) 2018 Alyssa Rosenzweig
- Copyright (c) 2013 Connor Abbott (connor@abbott.cx)
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- """
- import sys
- import pprint
- import struct
- program = []
- # Definitions from cwabbott's tools
- t6xx_alu_ops = {
- "fadd": 0x10,
- "fmul": 0x14,
- "fmin": 0x28,
- "fmax": 0x2C,
- "fmov": 0x30,
- "ffloor": 0x36,
- "fceil": 0x37,
- "fdot3": 0x3C,
- "fdot3r": 0x3D,
- "fdot4": 0x3E,
- "freduce": 0x3F,
- "iadd": 0x40,
- "isub": 0x46,
- "imul": 0x58,
- "imov": 0x7B,
- "feq": 0x80,
- "fne": 0x81,
- "flt": 0x82,
- "fle": 0x83,
- "f2i": 0x99,
- "ieq": 0xA0,
- "ine": 0xA1,
- "ilt": 0xA4,
- "ile": 0xA5,
- "ball": 0xA9,
- "bany": 0xB1,
- "i2f": 0xB8,
- "csel": 0xC5,
- "fatan_pt2": 0xE8,
- "frcp": 0xF0,
- "frsqrt": 0xF2,
- "fsqrt": 0xF3,
- "fexp2": 0xF4,
- "flog2": 0xF5,
- "fsin": 0xF6,
- "fcos": 0xF7,
- "fatan2_pt1": 0xF9,
- }
- t6xx_alu_bits = {
- "vmul": 17,
- "sadd": 19,
- "vadd": 21,
- "smul": 23,
- "lut": 25,
- "fb": 26,
- "branch": 27,
- "constants": 32
- }
- t6xx_alu_size_bits = {
- "vmul": 48,
- "sadd": 32,
- "vadd": 48,
- "smul": 32,
- "lut": 48,
- "fb": 16,
- "branch": 48
- }
- t6xx_outmod = {
- "none": 0,
- "pos": 1,
- "int": 2,
- "sat": 3
- }
- t6xx_reg_mode = {
- "half": 1,
- "full": 2
- }
- t6xx_dest_override = {
- "lower": 0,
- "upper": 1,
- "none": 2
- }
- t6xx_load_store_ops = {
- "ld_st_noop": 0x03,
- "ld_attr_16": 0x95,
- "ld_attr_32": 0x94,
- "ld_vary_16": 0x99,
- "ld_vary_32": 0x98,
- "ld_uniform_16": 0xAC,
- "ld_uniform_32": 0xB0,
- "st_vary_16": 0xD5,
- "st_vary_32": 0xD4
- }
- t6xx_tag = {
- "texture": 0x3,
- "load_store": 0x5,
- "alu4": 0x8,
- "alu8": 0x9,
- "alu12": 0xA,
- "alu16": 0xB,
- }
- def is_tag_alu(tag):
- return (tag >= t6xx_tag["alu4"]) and (tag <= t6xx_tag["alu16"])
- # Just an enum
- ALU = 0
- LDST = 1
- TEXTURE = 2
- # Constant types supported, mapping the constant prefix to the Python format
- # string and the coercion function
- constant_types = {
- "f": ("f", float),
- "h": ("e", float),
- "i": ("i", int),
- "s": ("h", int)
- }
- # TODO: Synthesise fbwrite stuff better
- fbwrite_op = 0x7
- # TODO: What else?
- texture_op = {
- "normal": 0x11,
- "texelfetch": 0x14
- }
- texture_fmt = {
- "2d": 0x02,
- "3d": 0x03
- }
-
- with open(sys.argv[1], "r") as f:
- for ln in f:
- space = ln.strip().split(" ")
- instruction = space[0]
- rest = " ".join(space[1:])
- arguments = [s.strip() for s in rest.split(",")]
- program += [(instruction, arguments)]
- swizzle_component = {
- "x": 0,
- "y": 1,
- "z": 2,
- "w": 3
- }
- def decode_reg_name(reg_name):
- ireg = 0
- upper = False
- half = False
- if reg_name[0] == 'r':
- ireg = int(reg_name[1:])
- elif reg_name[0] == 'h':
- rreg = int(reg_name[2:])
- # Decode half-register into its full register's half
- ireg = rreg >> 1
- upper = rreg & 1
- half = True
- else:
- # Special case for load/store addresses
- ireg = int(reg_name)
- return (ireg, half, upper)
- def standard_swizzle_from_parts(swizzle_parts):
- swizzle_s = swizzle_parts[1] if len(swizzle_parts) > 1 else "xyzw"
- swizzle = 0
- for (i, c) in enumerate(swizzle_s):
- swizzle |= swizzle_component[c] << (2 * i)
- return swizzle
- def mask_from_parts(mask_parts, large_mask):
- mask_s = mask_parts[1] if len(mask_parts) > 1 else "xyzw"
- if large_mask:
- mask = sum([(3 << (2*swizzle_component[c]) if c in mask_s else 0) for c in "xyzw"])
- else:
- mask = sum([(1 << swizzle_component[c] if c in mask_s else 0) for c in "xyzw"])
- return (mask, mask_s)
- def decode_reg(reg):
- if reg[0] == "#":
- # Not actually a register, instead an immediate float
- return (True, struct.unpack("H", struct.pack("e", float(reg[1:])))[0], 0, 0, 0, 0)
- # Function call syntax used in abs() modifier
- if reg[-1] == ')':
- reg = reg[:-1]
- swizzle_parts = reg.split(".")
- reg_name = swizzle_parts[0]
- modifiers = 0
- if reg_name[0] == '-':
- modifiers |= 2
- reg_name = reg_name[1:]
- if reg_name[0] == 'a':
- modifiers |= 1
- reg_name = reg_name[len("abs("):]
-
- (ireg, half, upper) = decode_reg_name(reg_name)
- return (False, ireg, standard_swizzle_from_parts(swizzle_parts), half, upper, modifiers)
- def decode_masked_reg(reg, large_mask):
- mask_parts = reg.split(".")
- reg_name = mask_parts[0]
- (ireg, half, upper) = decode_reg_name(reg_name)
- (mask, mask_s) = mask_from_parts(mask_parts, large_mask)
- component = max([0] + [swizzle_component[c] for c in "xyzw" if c in mask_s])
- return (ireg, mask, component, half, upper)
- # TODO: Fill these in XXX
- # Texture pipeline registers in r28-r29
- TEXTURE_BASE = 28
- def decode_texture_reg_number(reg):
- r = reg.split(".")[0]
- if r[0] == "r":
- return (True, int(r[1:]) - TEXTURE_BASE, 0)
- else:
- no = int(r[2:])
- return (False, (no >> 1) - TEXTURE_BASE, no & 1)
- def decode_texture_reg(reg):
- (full, select, upper) = decode_texture_reg_number(reg)
- # Swizzle mandatory for texture registers, afaict
- swizzle = reg.split(".")[1]
- swizzleL = swizzle_component[swizzle[0]]
- swizzleR = swizzle_component[swizzle[1]]
- return (full, select, upper, swizzleR, swizzleL)
- def decode_texture_out_reg(reg):
- (full, select, upper) = decode_texture_reg_number(reg)
- (mask, _) = mask_from_parts(reg.split("."), False)
- return (full, select, upper, mask)
- instruction_stream = []
- for p in program:
- ins = p[0]
- arguments = p[1]
- # for ALU, fbwrite, texture
- family = ins_mod = ins.split(".")[0]
- ins_op = (ins + ".").split(".")[1]
- ins_outmod = ("ins" + "." + ".").split(".")[2]
-
- if len(ins_outmod) == 0:
- ins_outmod = "none"
- out_mod = t6xx_outmod[ins_outmod]
- if ins in t6xx_load_store_ops:
- op = t6xx_load_store_ops[ins]
- (reg, mask, component, half, upper) = decode_masked_reg(p[1][0], False)
- (immediate, address, swizzle, half, upper, modifiers) = decode_reg(p[1][1])
- unknown = int(p[1][2], 16)
- b = (op << 0) | (reg << 8) | (mask << 13) | (swizzle << 17) | (unknown << 25) | (address << 51)
- instruction_stream += [(LDST, b)]
- elif ins_op in t6xx_alu_ops:
- op = t6xx_alu_ops[ins_op]
- (reg_out, mask, out_component, half0, upper0) = decode_masked_reg(p[1][0], True)
- (_, reg_in1, swizzle1, half1, upper1, mod1) = decode_reg(p[1][1])
- (immediate, reg_in2, swizzle2, half2, upper2, mod2) = decode_reg(p[1][2])
- if immediate:
- register_word = (reg_in1 << 0) | ((reg_in2 >> 11) << 5) | (reg_out << 10) | (1 << 15)
- else:
- register_word = (reg_in1 << 0) | (reg_in2 << 5) | (reg_out << 10)
- if ins_mod in ["vadd", "vmul", "lut"]:
- io_mode = t6xx_reg_mode["half" if half0 else "full"]
- repsel = 0
- i1half = half1
- i2block = 0
- output_override = 2 # NORMAL, TODO
- wr_mask = 0
- if half0:
- # TODO: half actually
- repsel = 2 * upper1
- else:
- repsel = upper1
- if half0:
- # Rare case...
- (_, halfmask, _, _, _) = decode_masked_reg(p[1][0], False)
- wr_mask = halfmask
- else:
- wr_mask = mask
- if immediate:
- # Inline constant: lower 11 bits
- i2block = ((reg_in2 & 0xFF) << 3) | ((reg_in2 >> 8) & 0x7)
- else:
- if half0:
- # TODO: replicate input 2 if half
- pass
- else:
- # TODO: half selection
- i2block = upper2 | (half2 << 2)
- i2block |= swizzle2 << 3
- instruction_word = (op << 0) | (io_mode << 8) | (mod1 << 10) | (repsel << 12) | (i1half << 14) | (swizzle1 << 15) | (mod2 << 23) | (i2block << 25) | (output_override << 36) | (out_mod << 38) | (wr_mask << 40)
- elif ins_mod in ["sadd", "smul"]:
- # TODO: What are these?
- unknown2 = 0
- unknown3 = 0
- i1comp_block = 0
- if half1:
- i1comp_block = swizzle1 | (upper1 << 2)
- else:
- i1comp_block = swizzle1 << 1
- i2block = 0
- if immediate:
- # Inline constant is splattered in a... bizarre way
- i2block = (((reg_in2 >> 9) & 3) << 0) | (((reg_in2 >> 8) & 1) << 2) | (((reg_in2 >> 5) & 7) << 3) | (((reg_in2 >> 0) & 15) << 6)
- else:
- # TODO: half register
- swizzle2 = (swizzle2 << 1) & 0x1F
- i2block = (mod2 << 0) | ((not half2) << 2) | (swizzle2 << 3) | (unknown2 << 5)
- outcomp_block = 0
-
- if True:
- outcomp_block = out_component << 1
- else:
- # TODO: half register
- pass
- instruction_word = (op << 0) | (mod1 << 8) | ((not half1) << 10) | (i1comp_block << 11) | (i2block << 14) | (unknown3 << 25) | (out_mod << 26) | ((not half0) << 28) | (outcomp_block) << 29
- else:
- instruction_word = op
- instruction_stream += [(ALU, ins_mod, register_word, instruction_word)]
- elif family == "texture":
- # Texture ops use long series of modifiers to describe their needed
- # capabilities, seperated by dots. Decode them here
- parts = ins.split(".")
- # First few modifiers are fixed, like an instruction name
- tex_op = parts[1]
- tex_fmt = parts[2]
- # The remaining are variable, but strictly ordered
- parts = parts[3:]
- op = texture_op[tex_op]
- # Some bits are defined directly in the modifier list
- shadow = "shadow" in parts
- cont = "cont" in parts
- last = "last" in parts
- has_filter = "raw" not in parts
- # The remaining need order preserved since they have their own arguments
- argument_parts = [part for part in parts if part not in ["shadow", "cont", "last", "raw"]]
- bias_lod = 0
- for argument, part in zip(argument_parts, arguments[4:]):
- if argument == "bias":
- bias_lod = int(float(part) * 256)
- else:
- print("Unknown argument: " + str(argument))
- fmt = texture_fmt[tex_fmt]
- has_offset = 0
- magic1 = 1 # IDEK
- texture_handle = int(arguments[1][len("texture"):])
-
- sampler_parts = arguments[2].split(".")
- sampler_handle = int(sampler_parts[0][len("sampler"):])
- swizzle0 = standard_swizzle_from_parts(sampler_parts)
- (full0, select0, upper0, mask0) = decode_texture_out_reg(arguments[0])
- (full1, select1, upper1, swizzleR1, swizzleL1) = decode_texture_reg(arguments[3])
- tex = (op << 0) | (shadow << 6) | (cont << 8) | (last << 9) | (fmt << 10) | (has_offset << 15) | (has_filter << 16) | (select1 << 17) | (upper1 << 18) | (full1 << 20) | (swizzleR1 << 21) | (swizzleL1 << 23) | (full0 << 29) | (magic1 << 30) | (select0 << 32) | (upper0 << 33) | (mask0 << 34) | (swizzle0 << 40) | (bias_lod << 72) | (texture_handle << 88) | (sampler_handle << 104)
- instruction_stream += [(TEXTURE, tex)]
- elif family == "fb":
- if ins_op == "write":
- # TODO: What does this argument mean, exactly?
- magic = int(p[1][0], 16)
- fb = fbwrite_op | (magic << 3)
- instruction_stream += [(ALU, "fb", None, fb)]
- else:
- print("Unknown fb op: " + str(ins_op))
- elif ins[1:] == "constants":
- if ins[0] not in constant_types:
- print("Unknown constant type " + str(constant_type))
- break
- (fmt, cast) = constant_types[ins[0]]
- encoded = [struct.pack(fmt, cast(f)) for f in p[1]]
- consts = bytearray()
- for c in encoded:
- consts += c
- # consts must be exactly 4 quadwords, so pad with zeroes if necessary
- consts += bytes(4*4 - len(consts))
- instruction_stream += [(ALU, "constants", consts)]
- # Emit from instruction stream
- instructions = []
- index = 0
- while index < len(instruction_stream):
- output_stream = bytearray()
- ins = instruction_stream[index]
- tag = ins[0]
- can_prefetch = index + 1 < len(instruction_stream)
- succeeding = None
- if tag == LDST:
- succeeding = instruction_stream[index + 1] if can_prefetch else None
- parta = ins[1]
- partb = None
- if succeeding and succeeding[0] == LDST:
- partb = succeeding[1]
- index += 1
- else:
- partb = parta
- parta = t6xx_load_store_ops["ld_st_noop"]
- tag8 = t6xx_tag["load_store"]
- ins = (partb << 68) | (parta << 8) | tag8
- output_stream += (ins.to_bytes(16, "little"))
- elif tag == TEXTURE:
- tag8 = t6xx_tag["texture"]
- ins = (ins[1] << 8) | tag8
- output_stream += (ins.to_bytes(16, "little"))
- elif tag == ALU:
- # TODO: Combining ALU ops
- emit_size = 4 # 32-bit tag always emitted
- tag = 0
- register_words = bytearray()
- body_words = bytearray()
- constant_words = None
- last_alu_bit = 0
- # Iterate through while there are ALU tags in strictly ascending order
- while index < len(instruction_stream) and instruction_stream[index][0] == ALU and t6xx_alu_bits[instruction_stream[index][1]] > last_alu_bit:
- ins = instruction_stream[index]
- bit = t6xx_alu_bits[ins[1]]
- last_alu_bit = bit
- if ins[1] == "constants":
- constant_words = ins[2]
- else:
- # Flag for the used part of the GPU
- tag |= 1 << bit
- # 16-bit register word, if present
- if ins[2] is not None:
- register_words += (ins[2].to_bytes(2, "little"))
- emit_size += 2
- size = int(t6xx_alu_size_bits[ins[1]] / 8)
- body_words += (ins[3].to_bytes(size, "little"))
- emit_size += size
- index += 1
- index -= 1 # fix off by one, from later loop increment
- # Pad to nearest multiple of 4 words
- padding = (16 - (emit_size & 15)) if (emit_size & 15) else 0
- emit_size += padding
- # emit_size includes constants
- if constant_words:
- emit_size += len(constant_words)
- # Calculate tag given size
- words = emit_size >> 2
- tag |= t6xx_tag["alu" + str(words)]
- # Actually emit, now that we can
- output_stream += tag.to_bytes(4, "little")
- output_stream += register_words
- output_stream += body_words
- output_stream += bytes(padding)
- if constant_words:
- output_stream += constant_words
- instructions += [output_stream]
- index += 1
- # Assmebly over; just emit tags at this point
- binary = bytearray()
- for (idx, ins) in enumerate(instructions):
- # Instruction prefetch
- tag = 0
- if idx + 1 < len(instructions):
- tag = instructions[idx + 1][0] & 0xF
- # Check for ALU special case
- if is_tag_alu(tag) and idx + 2 == len(instructions):
- tag = 1
- else:
- # Instruction stream over
-
- tag = 1
- ins[0] |= tag << 4
- binary += ins
- pprint.pprint(program)
- with open(sys.argv[2], "wb") as f:
- f.write(binary)
|