markv_codec.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. // Copyright (c) 2018 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #ifndef SOURCE_COMP_MARKV_CODEC_H_
  15. #define SOURCE_COMP_MARKV_CODEC_H_
  16. #include <list>
  17. #include <map>
  18. #include <memory>
  19. #include <vector>
  20. #include "source/assembly_grammar.h"
  21. #include "source/comp/huffman_codec.h"
  22. #include "source/comp/markv_model.h"
  23. #include "source/comp/move_to_front.h"
  24. #include "source/diagnostic.h"
  25. #include "source/id_descriptor.h"
  26. #include "source/val/instruction.h"
  27. // Base class for MARK-V encoder and decoder. Contains common functionality
  28. // such as:
  29. // - Validator connection and validation state.
  30. // - SPIR-V grammar and helper functions.
  31. namespace spvtools {
  32. namespace comp {
  33. class MarkvLogger;
  34. // Handles for move-to-front sequences. Enums which end with "Begin" define
  35. // handle spaces which start at that value and span 16 or 32 bit wide.
  36. enum : uint64_t {
  37. kMtfNone = 0,
  38. // All ids.
  39. kMtfAll,
  40. // All forward declared ids.
  41. kMtfForwardDeclared,
  42. // All type ids except for generated by OpTypeFunction.
  43. kMtfTypeNonFunction,
  44. // All labels.
  45. kMtfLabel,
  46. // All ids created by instructions which had type_id.
  47. kMtfObject,
  48. // All types generated by OpTypeFloat, OpTypeInt, OpTypeBool.
  49. kMtfTypeScalar,
  50. // All composite types.
  51. kMtfTypeComposite,
  52. // Boolean type or any vector type of it.
  53. kMtfTypeBoolScalarOrVector,
  54. // All float types or any vector floats type.
  55. kMtfTypeFloatScalarOrVector,
  56. // All int types or any vector int type.
  57. kMtfTypeIntScalarOrVector,
  58. // All types declared as return types in OpTypeFunction.
  59. kMtfTypeReturnedByFunction,
  60. // All composite objects.
  61. kMtfComposite,
  62. // All bool objects or vectors of bools.
  63. kMtfBoolScalarOrVector,
  64. // All float objects or vectors of float.
  65. kMtfFloatScalarOrVector,
  66. // All int objects or vectors of int.
  67. kMtfIntScalarOrVector,
  68. // All pointer types which point to composited.
  69. kMtfTypePointerToComposite,
  70. // Used by EncodeMtfRankHuffman.
  71. kMtfGenericNonZeroRank,
  72. // Handle space for ids of specific type.
  73. kMtfIdOfTypeBegin = 0x10000,
  74. // Handle space for ids generated by specific opcode.
  75. kMtfIdGeneratedByOpcode = 0x20000,
  76. // Handle space for ids of objects with type generated by specific opcode.
  77. kMtfIdWithTypeGeneratedByOpcodeBegin = 0x30000,
  78. // All vectors of specific component type.
  79. kMtfVectorOfComponentTypeBegin = 0x40000,
  80. // All vector types of specific size.
  81. kMtfTypeVectorOfSizeBegin = 0x50000,
  82. // All pointer types to specific type.
  83. kMtfPointerToTypeBegin = 0x60000,
  84. // All function types which return specific type.
  85. kMtfFunctionTypeWithReturnTypeBegin = 0x70000,
  86. // All function objects which return specific type.
  87. kMtfFunctionWithReturnTypeBegin = 0x80000,
  88. // Short id descriptor space (max 16-bit).
  89. kMtfShortIdDescriptorSpaceBegin = 0x90000,
  90. // Long id descriptor space (32-bit).
  91. kMtfLongIdDescriptorSpaceBegin = 0x100000000,
  92. };
  93. class MarkvCodec {
  94. public:
  95. static const uint32_t kMarkvMagicNumber;
  96. // Mtf ranks smaller than this are encoded with Huffman coding.
  97. static const uint32_t kMtfSmallestRankEncodedByValue;
  98. // Signals that the mtf rank is too large to be encoded with Huffman.
  99. static const uint32_t kMtfRankEncodedByValueSignal;
  100. static const uint32_t kShortDescriptorNumBits;
  101. static const size_t kByteBreakAfterInstIfLessThanUntilNextByte;
  102. static uint32_t GetMarkvVersion();
  103. virtual ~MarkvCodec();
  104. protected:
  105. struct MarkvHeader {
  106. MarkvHeader();
  107. uint32_t magic_number;
  108. uint32_t markv_version;
  109. // Magic number to identify or verify MarkvModel used for encoding.
  110. uint32_t markv_model = 0;
  111. uint32_t markv_length_in_bits = 0;
  112. uint32_t spirv_version = 0;
  113. uint32_t spirv_generator = 0;
  114. };
  115. // |model| is owned by the caller, must be not null and valid during the
  116. // lifetime of the codec.
  117. MarkvCodec(spv_const_context context, spv_validator_options validator_options,
  118. const MarkvModel* model);
  119. // Returns instruction which created |id| or nullptr if such instruction was
  120. // not registered.
  121. const val::Instruction* FindDef(uint32_t id) const {
  122. const auto it = id_to_def_instruction_.find(id);
  123. if (it == id_to_def_instruction_.end()) return nullptr;
  124. return it->second;
  125. }
  126. size_t GetNumBitsToNextByte(size_t bit_pos) const;
  127. bool OpcodeHasFixedNumberOfOperands(SpvOp opcode) const;
  128. // Returns type id of vector type component.
  129. uint32_t GetVectorComponentType(uint32_t vector_type_id) const {
  130. const val::Instruction* type_inst = FindDef(vector_type_id);
  131. assert(type_inst);
  132. assert(type_inst->opcode() == SpvOpTypeVector);
  133. const uint32_t component_type =
  134. type_inst->word(type_inst->operands()[1].offset);
  135. return component_type;
  136. }
  137. // Returns mtf handle for ids of given type.
  138. uint64_t GetMtfIdOfType(uint32_t type_id) const {
  139. return kMtfIdOfTypeBegin + type_id;
  140. }
  141. // Returns mtf handle for ids generated by given opcode.
  142. uint64_t GetMtfIdGeneratedByOpcode(SpvOp opcode) const {
  143. return kMtfIdGeneratedByOpcode + opcode;
  144. }
  145. // Returns mtf handle for ids of type generated by given opcode.
  146. uint64_t GetMtfIdWithTypeGeneratedByOpcode(SpvOp opcode) const {
  147. return kMtfIdWithTypeGeneratedByOpcodeBegin + opcode;
  148. }
  149. // Returns mtf handle for vectors of specific component type.
  150. uint64_t GetMtfVectorOfComponentType(uint32_t type_id) const {
  151. return kMtfVectorOfComponentTypeBegin + type_id;
  152. }
  153. // Returns mtf handle for vector type of specific size.
  154. uint64_t GetMtfTypeVectorOfSize(uint32_t size) const {
  155. return kMtfTypeVectorOfSizeBegin + size;
  156. }
  157. // Returns mtf handle for pointers to specific size.
  158. uint64_t GetMtfPointerToType(uint32_t type_id) const {
  159. return kMtfPointerToTypeBegin + type_id;
  160. }
  161. // Returns mtf handle for function types with given return type.
  162. uint64_t GetMtfFunctionTypeWithReturnType(uint32_t type_id) const {
  163. return kMtfFunctionTypeWithReturnTypeBegin + type_id;
  164. }
  165. // Returns mtf handle for functions with given return type.
  166. uint64_t GetMtfFunctionWithReturnType(uint32_t type_id) const {
  167. return kMtfFunctionWithReturnTypeBegin + type_id;
  168. }
  169. // Returns mtf handle for the given long id descriptor.
  170. uint64_t GetMtfLongIdDescriptor(uint32_t descriptor) const {
  171. return kMtfLongIdDescriptorSpaceBegin + descriptor;
  172. }
  173. // Returns mtf handle for the given short id descriptor.
  174. uint64_t GetMtfShortIdDescriptor(uint32_t descriptor) const {
  175. return kMtfShortIdDescriptorSpaceBegin + descriptor;
  176. }
  177. // Process data from the current instruction. This would update MTFs and
  178. // other data containers.
  179. void ProcessCurInstruction();
  180. // Returns move-to-front handle to be used for the current operand slot.
  181. // Mtf handle is chosen based on a set of rules defined by SPIR-V grammar.
  182. uint64_t GetRuleBasedMtf();
  183. // Returns words of the current instruction. Decoder has a different
  184. // implementation and the array is valid only until the previously decoded
  185. // word.
  186. virtual const uint32_t* GetInstWords() const { return inst_.words; }
  187. // Returns the opcode of the previous instruction.
  188. SpvOp GetPrevOpcode() const {
  189. if (instructions_.empty()) return SpvOpNop;
  190. return instructions_.back()->opcode();
  191. }
  192. // Returns diagnostic stream, position index is set to instruction number.
  193. DiagnosticStream Diag(spv_result_t error_code) const {
  194. return DiagnosticStream({0, 0, instructions_.size()}, context_->consumer,
  195. "", error_code);
  196. }
  197. // Returns current id bound.
  198. uint32_t GetIdBound() const { return id_bound_; }
  199. // Sets current id bound, expected to be no lower than the previous one.
  200. void SetIdBound(uint32_t id_bound) {
  201. assert(id_bound >= id_bound_);
  202. id_bound_ = id_bound;
  203. }
  204. // Returns Huffman codec for ranks of the mtf with given |handle|.
  205. // Different mtfs can use different rank distributions.
  206. // May return nullptr if the codec doesn't exist.
  207. const HuffmanCodec<uint32_t>* GetMtfHuffmanCodec(uint64_t handle) const {
  208. const auto it = mtf_huffman_codecs_.find(handle);
  209. if (it == mtf_huffman_codecs_.end()) return nullptr;
  210. return it->second.get();
  211. }
  212. // Promotes id in all move-to-front sequences if ids can be shared by multiple
  213. // sequences.
  214. void PromoteIfNeeded(uint32_t id) {
  215. if (!model_->AnyDescriptorHasCodingScheme() &&
  216. model_->id_fallback_strategy() ==
  217. MarkvModel::IdFallbackStrategy::kShortDescriptor) {
  218. // Move-to-front sequences do not share ids. Nothing to do.
  219. return;
  220. }
  221. multi_mtf_.Promote(id);
  222. }
  223. spv_validator_options validator_options_ = nullptr;
  224. const AssemblyGrammar grammar_;
  225. MarkvHeader header_;
  226. // MARK-V model, not owned.
  227. const MarkvModel* model_ = nullptr;
  228. // Current instruction, current operand and current operand index.
  229. spv_parsed_instruction_t inst_;
  230. spv_parsed_operand_t operand_;
  231. uint32_t operand_index_;
  232. // Maps a result ID to its type ID. By convention:
  233. // - a result ID that is a type definition maps to itself.
  234. // - a result ID without a type maps to 0. (E.g. for OpLabel)
  235. std::unordered_map<uint32_t, uint32_t> id_to_type_id_;
  236. // Container for all move-to-front sequences.
  237. MultiMoveToFront multi_mtf_;
  238. // Id of the current function or zero if outside of function.
  239. uint32_t cur_function_id_ = 0;
  240. // Return type of the current function.
  241. uint32_t cur_function_return_type_ = 0;
  242. // Remaining function parameter types. This container is filled on OpFunction,
  243. // and drained on OpFunctionParameter.
  244. std::list<uint32_t> remaining_function_parameter_types_;
  245. // List of ids local to the current function.
  246. std::vector<uint32_t> ids_local_to_cur_function_;
  247. // List of instructions in the order they are given in the module.
  248. std::vector<std::unique_ptr<const val::Instruction>> instructions_;
  249. // Container/computer for long (32-bit) id descriptors.
  250. IdDescriptorCollection long_id_descriptors_;
  251. // Container/computer for short id descriptors.
  252. // Short descriptors are stored in uint32_t, but their actual bit width is
  253. // defined with kShortDescriptorNumBits.
  254. // It doesn't seem logical to have a different computer for short id
  255. // descriptors, since one could actually map/truncate long descriptors.
  256. // But as short descriptors have collisions, the efficiency of
  257. // compression depends on the collision pattern, and short descriptors
  258. // produced by function ShortHashU32Array have been empirically proven to
  259. // produce better results.
  260. IdDescriptorCollection short_id_descriptors_;
  261. // Huffman codecs for move-to-front ranks. The map key is mtf handle. Doesn't
  262. // need to contain a different codec for every handle as most use one and the
  263. // same.
  264. std::map<uint64_t, std::unique_ptr<HuffmanCodec<uint32_t>>>
  265. mtf_huffman_codecs_;
  266. // If not nullptr, codec will log comments on the compression process.
  267. std::unique_ptr<MarkvLogger> logger_;
  268. spv_const_context context_ = nullptr;
  269. private:
  270. // Maps result id to the instruction which defined it.
  271. std::unordered_map<uint32_t, const val::Instruction*> id_to_def_instruction_;
  272. uint32_t id_bound_ = 1;
  273. };
  274. } // namespace comp
  275. } // namespace spvtools
  276. #endif // SOURCE_COMP_MARKV_CODEC_H_