bit_stream.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. // Copyright (c) 2017 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include <algorithm>
  15. #include <cassert>
  16. #include <cstring>
  17. #include <sstream>
  18. #include <type_traits>
  19. #include "source/comp/bit_stream.h"
  20. namespace spvtools {
  21. namespace comp {
  22. namespace {
  23. // Returns if the system is little-endian. Unfortunately only works during
  24. // runtime.
  25. bool IsLittleEndian() {
  26. // This constant value allows the detection of the host machine's endianness.
  27. // Accessing it as an array of bytes is valid due to C++11 section 3.10
  28. // paragraph 10.
  29. static const uint16_t kFF00 = 0xff00;
  30. return reinterpret_cast<const unsigned char*>(&kFF00)[0] == 0;
  31. }
  32. // Copies bytes from the given buffer to a uint64_t buffer.
  33. // Motivation: casting uint64_t* to uint8_t* is ok. Casting in the other
  34. // direction is only advisable if uint8_t* is aligned to 64-bit word boundary.
  35. std::vector<uint64_t> ToBuffer64(const void* buffer, size_t num_bytes) {
  36. std::vector<uint64_t> out;
  37. out.resize((num_bytes + 7) / 8, 0);
  38. memcpy(out.data(), buffer, num_bytes);
  39. return out;
  40. }
  41. // Copies uint8_t buffer to a uint64_t buffer.
  42. std::vector<uint64_t> ToBuffer64(const std::vector<uint8_t>& in) {
  43. return ToBuffer64(in.data(), in.size());
  44. }
  45. // Returns uint64_t containing the same bits as |val|.
  46. // Type size must be less than 8 bytes.
  47. template <typename T>
  48. uint64_t ToU64(T val) {
  49. static_assert(sizeof(T) <= 8, "Type size too big");
  50. uint64_t val64 = 0;
  51. std::memcpy(&val64, &val, sizeof(T));
  52. return val64;
  53. }
  54. // Returns value of type T containing the same bits as |val64|.
  55. // Type size must be less than 8 bytes. Upper (unused) bits of |val64| must be
  56. // zero (irrelevant, but is checked with assertion).
  57. template <typename T>
  58. T FromU64(uint64_t val64) {
  59. assert(sizeof(T) == 8 || (val64 >> (sizeof(T) * 8)) == 0);
  60. static_assert(sizeof(T) <= 8, "Type size too big");
  61. T val = 0;
  62. std::memcpy(&val, &val64, sizeof(T));
  63. return val;
  64. }
  65. // Writes bits from |val| to |writer| in chunks of size |chunk_length|.
  66. // Signal bit is used to signal if the reader should expect another chunk:
  67. // 0 - no more chunks to follow
  68. // 1 - more chunks to follow
  69. // If number of written bits reaches |max_payload| last chunk is truncated.
  70. void WriteVariableWidthInternal(BitWriterInterface* writer, uint64_t val,
  71. size_t chunk_length, size_t max_payload) {
  72. assert(chunk_length > 0);
  73. assert(chunk_length < max_payload);
  74. assert(max_payload == 64 || (val >> max_payload) == 0);
  75. if (val == 0) {
  76. // Split in two writes for more readable logging.
  77. writer->WriteBits(0, chunk_length);
  78. writer->WriteBits(0, 1);
  79. return;
  80. }
  81. size_t payload_written = 0;
  82. while (val) {
  83. if (payload_written + chunk_length >= max_payload) {
  84. // This has to be the last chunk.
  85. // There is no need for the signal bit and the chunk can be truncated.
  86. const size_t left_to_write = max_payload - payload_written;
  87. assert((val >> left_to_write) == 0);
  88. writer->WriteBits(val, left_to_write);
  89. break;
  90. }
  91. writer->WriteBits(val, chunk_length);
  92. payload_written += chunk_length;
  93. val = val >> chunk_length;
  94. // Write a single bit to signal if there is more to come.
  95. writer->WriteBits(val ? 1 : 0, 1);
  96. }
  97. }
  98. // Reads data written with WriteVariableWidthInternal. |chunk_length| and
  99. // |max_payload| should be identical to those used to write the data.
  100. // Returns false if the stream ends prematurely.
  101. bool ReadVariableWidthInternal(BitReaderInterface* reader, uint64_t* val,
  102. size_t chunk_length, size_t max_payload) {
  103. assert(chunk_length > 0);
  104. assert(chunk_length <= max_payload);
  105. size_t payload_read = 0;
  106. while (payload_read + chunk_length < max_payload) {
  107. uint64_t bits = 0;
  108. if (reader->ReadBits(&bits, chunk_length) != chunk_length) return false;
  109. *val |= bits << payload_read;
  110. payload_read += chunk_length;
  111. uint64_t more_to_come = 0;
  112. if (reader->ReadBits(&more_to_come, 1) != 1) return false;
  113. if (!more_to_come) {
  114. return true;
  115. }
  116. }
  117. // Need to read the last chunk which may be truncated. No signal bit follows.
  118. uint64_t bits = 0;
  119. const size_t left_to_read = max_payload - payload_read;
  120. if (reader->ReadBits(&bits, left_to_read) != left_to_read) return false;
  121. *val |= bits << payload_read;
  122. return true;
  123. }
  124. // Calls WriteVariableWidthInternal with the right max_payload argument.
  125. template <typename T>
  126. void WriteVariableWidthUnsigned(BitWriterInterface* writer, T val,
  127. size_t chunk_length) {
  128. static_assert(std::is_unsigned<T>::value, "Type must be unsigned");
  129. static_assert(std::is_integral<T>::value, "Type must be integral");
  130. WriteVariableWidthInternal(writer, val, chunk_length, sizeof(T) * 8);
  131. }
  132. // Calls ReadVariableWidthInternal with the right max_payload argument.
  133. template <typename T>
  134. bool ReadVariableWidthUnsigned(BitReaderInterface* reader, T* val,
  135. size_t chunk_length) {
  136. static_assert(std::is_unsigned<T>::value, "Type must be unsigned");
  137. static_assert(std::is_integral<T>::value, "Type must be integral");
  138. uint64_t val64 = 0;
  139. if (!ReadVariableWidthInternal(reader, &val64, chunk_length, sizeof(T) * 8))
  140. return false;
  141. *val = static_cast<T>(val64);
  142. assert(*val == val64);
  143. return true;
  144. }
  145. // Encodes signed |val| to an unsigned value and calls
  146. // WriteVariableWidthInternal with the right max_payload argument.
  147. template <typename T>
  148. void WriteVariableWidthSigned(BitWriterInterface* writer, T val,
  149. size_t chunk_length, size_t zigzag_exponent) {
  150. static_assert(std::is_signed<T>::value, "Type must be signed");
  151. static_assert(std::is_integral<T>::value, "Type must be integral");
  152. WriteVariableWidthInternal(writer, EncodeZigZag(val, zigzag_exponent),
  153. chunk_length, sizeof(T) * 8);
  154. }
  155. // Calls ReadVariableWidthInternal with the right max_payload argument
  156. // and decodes the value.
  157. template <typename T>
  158. bool ReadVariableWidthSigned(BitReaderInterface* reader, T* val,
  159. size_t chunk_length, size_t zigzag_exponent) {
  160. static_assert(std::is_signed<T>::value, "Type must be signed");
  161. static_assert(std::is_integral<T>::value, "Type must be integral");
  162. uint64_t encoded = 0;
  163. if (!ReadVariableWidthInternal(reader, &encoded, chunk_length, sizeof(T) * 8))
  164. return false;
  165. const int64_t decoded = DecodeZigZag(encoded, zigzag_exponent);
  166. *val = static_cast<T>(decoded);
  167. assert(*val == decoded);
  168. return true;
  169. }
  170. } // namespace
  171. void BitWriterInterface::WriteVariableWidthU64(uint64_t val,
  172. size_t chunk_length) {
  173. WriteVariableWidthUnsigned(this, val, chunk_length);
  174. }
  175. void BitWriterInterface::WriteVariableWidthU32(uint32_t val,
  176. size_t chunk_length) {
  177. WriteVariableWidthUnsigned(this, val, chunk_length);
  178. }
  179. void BitWriterInterface::WriteVariableWidthU16(uint16_t val,
  180. size_t chunk_length) {
  181. WriteVariableWidthUnsigned(this, val, chunk_length);
  182. }
  183. void BitWriterInterface::WriteVariableWidthS64(int64_t val, size_t chunk_length,
  184. size_t zigzag_exponent) {
  185. WriteVariableWidthSigned(this, val, chunk_length, zigzag_exponent);
  186. }
  187. BitWriterWord64::BitWriterWord64(size_t reserve_bits) : end_(0) {
  188. buffer_.reserve(NumBitsToNumWords<64>(reserve_bits));
  189. }
  190. void BitWriterWord64::WriteBits(uint64_t bits, size_t num_bits) {
  191. // Check that |bits| and |num_bits| are valid and consistent.
  192. assert(num_bits <= 64);
  193. const bool is_little_endian = IsLittleEndian();
  194. assert(is_little_endian && "Big-endian architecture support not implemented");
  195. if (!is_little_endian) return;
  196. if (num_bits == 0) return;
  197. bits = GetLowerBits(bits, num_bits);
  198. EmitSequence(bits, num_bits);
  199. // Offset from the start of the current word.
  200. const size_t offset = end_ % 64;
  201. if (offset == 0) {
  202. // If no offset, simply add |bits| as a new word to the buffer_.
  203. buffer_.push_back(bits);
  204. } else {
  205. // Shift bits and add them to the current word after offset.
  206. const uint64_t first_word = bits << offset;
  207. buffer_.back() |= first_word;
  208. // If we don't overflow to the next word, there is nothing more to do.
  209. if (offset + num_bits > 64) {
  210. // We overflow to the next word.
  211. const uint64_t second_word = bits >> (64 - offset);
  212. // Add remaining bits as a new word to buffer_.
  213. buffer_.push_back(second_word);
  214. }
  215. }
  216. // Move end_ into position for next write.
  217. end_ += num_bits;
  218. assert(buffer_.size() * 64 >= end_);
  219. }
  220. bool BitReaderInterface::ReadVariableWidthU64(uint64_t* val,
  221. size_t chunk_length) {
  222. return ReadVariableWidthUnsigned(this, val, chunk_length);
  223. }
  224. bool BitReaderInterface::ReadVariableWidthU32(uint32_t* val,
  225. size_t chunk_length) {
  226. return ReadVariableWidthUnsigned(this, val, chunk_length);
  227. }
  228. bool BitReaderInterface::ReadVariableWidthU16(uint16_t* val,
  229. size_t chunk_length) {
  230. return ReadVariableWidthUnsigned(this, val, chunk_length);
  231. }
  232. bool BitReaderInterface::ReadVariableWidthS64(int64_t* val, size_t chunk_length,
  233. size_t zigzag_exponent) {
  234. return ReadVariableWidthSigned(this, val, chunk_length, zigzag_exponent);
  235. }
  236. BitReaderWord64::BitReaderWord64(std::vector<uint64_t>&& buffer)
  237. : buffer_(std::move(buffer)), pos_(0) {}
  238. BitReaderWord64::BitReaderWord64(const std::vector<uint8_t>& buffer)
  239. : buffer_(ToBuffer64(buffer)), pos_(0) {}
  240. BitReaderWord64::BitReaderWord64(const void* buffer, size_t num_bytes)
  241. : buffer_(ToBuffer64(buffer, num_bytes)), pos_(0) {}
  242. size_t BitReaderWord64::ReadBits(uint64_t* bits, size_t num_bits) {
  243. assert(num_bits <= 64);
  244. const bool is_little_endian = IsLittleEndian();
  245. assert(is_little_endian && "Big-endian architecture support not implemented");
  246. if (!is_little_endian) return 0;
  247. if (ReachedEnd()) return 0;
  248. // Index of the current word.
  249. const size_t index = pos_ / 64;
  250. // Bit position in the current word where we start reading.
  251. const size_t offset = pos_ % 64;
  252. // Read all bits from the current word (it might be too much, but
  253. // excessive bits will be removed later).
  254. *bits = buffer_[index] >> offset;
  255. const size_t num_read_from_first_word = std::min(64 - offset, num_bits);
  256. pos_ += num_read_from_first_word;
  257. if (pos_ >= buffer_.size() * 64) {
  258. // Reached end of buffer_.
  259. EmitSequence(*bits, num_read_from_first_word);
  260. return num_read_from_first_word;
  261. }
  262. if (offset + num_bits > 64) {
  263. // Requested |num_bits| overflows to next word.
  264. // Write all bits from the beginning of next word to *bits after offset.
  265. *bits |= buffer_[index + 1] << (64 - offset);
  266. pos_ += offset + num_bits - 64;
  267. }
  268. // We likely have written more bits than requested. Clear excessive bits.
  269. *bits = GetLowerBits(*bits, num_bits);
  270. EmitSequence(*bits, num_bits);
  271. return num_bits;
  272. }
  273. bool BitReaderWord64::ReachedEnd() const { return pos_ >= buffer_.size() * 64; }
  274. bool BitReaderWord64::OnlyZeroesLeft() const {
  275. if (ReachedEnd()) return true;
  276. const size_t index = pos_ / 64;
  277. if (index < buffer_.size() - 1) return false;
  278. assert(index == buffer_.size() - 1);
  279. const size_t offset = pos_ % 64;
  280. const uint64_t remaining_bits = buffer_[index] >> offset;
  281. return !remaining_bits;
  282. }
  283. } // namespace comp
  284. } // namespace spvtools