huffman_codec.cpp 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. // Copyright (c) 2017 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include <algorithm>
  15. #include <map>
  16. #include <sstream>
  17. #include <string>
  18. #include <unordered_map>
  19. #include <utility>
  20. #include <vector>
  21. #include "gmock/gmock.h"
  22. #include "source/comp/bit_stream.h"
  23. #include "source/comp/huffman_codec.h"
  24. namespace spvtools {
  25. namespace comp {
  26. namespace {
  27. const std::map<std::string, uint32_t>& GetTestSet() {
  28. static const std::map<std::string, uint32_t> hist = {
  29. {"a", 4}, {"e", 7}, {"f", 3}, {"h", 2}, {"i", 3},
  30. {"m", 2}, {"n", 2}, {"s", 2}, {"t", 2}, {"l", 1},
  31. {"o", 2}, {"p", 1}, {"r", 1}, {"u", 1}, {"x", 1},
  32. };
  33. return hist;
  34. }
  35. class TestBitReader {
  36. public:
  37. TestBitReader(const std::string& bits) : bits_(bits) {}
  38. bool ReadBit(bool* bit) {
  39. if (pos_ < bits_.length()) {
  40. *bit = bits_[pos_++] == '0' ? false : true;
  41. return true;
  42. }
  43. return false;
  44. }
  45. private:
  46. std::string bits_;
  47. size_t pos_ = 0;
  48. };
  49. TEST(Huffman, PrintTree) {
  50. HuffmanCodec<std::string> huffman(GetTestSet());
  51. std::stringstream ss;
  52. huffman.PrintTree(ss);
  53. // clang-format off
  54. const std::string expected = std::string(R"(
  55. 15-----7------e
  56. 8------4------a
  57. 4------2------m
  58. 2------n
  59. 19-----8------4------2------o
  60. 2------s
  61. 4------2------t
  62. 2------1------l
  63. 1------p
  64. 11-----5------2------1------r
  65. 1------u
  66. 3------f
  67. 6------3------i
  68. 3------1------x
  69. 2------h
  70. )").substr(1);
  71. // clang-format on
  72. EXPECT_EQ(expected, ss.str());
  73. }
  74. TEST(Huffman, PrintTable) {
  75. HuffmanCodec<std::string> huffman(GetTestSet());
  76. std::stringstream ss;
  77. huffman.PrintTable(ss);
  78. const std::string expected = std::string(R"(
  79. e 7 11
  80. a 4 101
  81. i 3 0001
  82. f 3 0010
  83. t 2 0101
  84. s 2 0110
  85. o 2 0111
  86. n 2 1000
  87. m 2 1001
  88. h 2 00000
  89. x 1 00001
  90. u 1 00110
  91. r 1 00111
  92. p 1 01000
  93. l 1 01001
  94. )")
  95. .substr(1);
  96. EXPECT_EQ(expected, ss.str());
  97. }
  98. TEST(Huffman, TestValidity) {
  99. HuffmanCodec<std::string> huffman(GetTestSet());
  100. const auto& encoding_table = huffman.GetEncodingTable();
  101. std::vector<std::string> codes;
  102. for (const auto& entry : encoding_table) {
  103. codes.push_back(BitsToStream(entry.second.first, entry.second.second));
  104. }
  105. std::sort(codes.begin(), codes.end());
  106. ASSERT_LT(codes.size(), 20u) << "Inefficient test ahead";
  107. for (size_t i = 0; i < codes.size(); ++i) {
  108. for (size_t j = i + 1; j < codes.size(); ++j) {
  109. ASSERT_FALSE(codes[i] == codes[j].substr(0, codes[i].length()))
  110. << codes[i] << " is prefix of " << codes[j];
  111. }
  112. }
  113. }
  114. TEST(Huffman, TestEncode) {
  115. HuffmanCodec<std::string> huffman(GetTestSet());
  116. uint64_t bits = 0;
  117. size_t num_bits = 0;
  118. EXPECT_TRUE(huffman.Encode("e", &bits, &num_bits));
  119. EXPECT_EQ(2u, num_bits);
  120. EXPECT_EQ("11", BitsToStream(bits, num_bits));
  121. EXPECT_TRUE(huffman.Encode("a", &bits, &num_bits));
  122. EXPECT_EQ(3u, num_bits);
  123. EXPECT_EQ("101", BitsToStream(bits, num_bits));
  124. EXPECT_TRUE(huffman.Encode("x", &bits, &num_bits));
  125. EXPECT_EQ(5u, num_bits);
  126. EXPECT_EQ("00001", BitsToStream(bits, num_bits));
  127. EXPECT_FALSE(huffman.Encode("y", &bits, &num_bits));
  128. }
  129. TEST(Huffman, TestDecode) {
  130. HuffmanCodec<std::string> huffman(GetTestSet());
  131. TestBitReader bit_reader(
  132. "01001"
  133. "0001"
  134. "1000"
  135. "00110"
  136. "00001"
  137. "00");
  138. auto read_bit = [&bit_reader](bool* bit) { return bit_reader.ReadBit(bit); };
  139. std::string decoded;
  140. ASSERT_TRUE(huffman.DecodeFromStream(read_bit, &decoded));
  141. EXPECT_EQ("l", decoded);
  142. ASSERT_TRUE(huffman.DecodeFromStream(read_bit, &decoded));
  143. EXPECT_EQ("i", decoded);
  144. ASSERT_TRUE(huffman.DecodeFromStream(read_bit, &decoded));
  145. EXPECT_EQ("n", decoded);
  146. ASSERT_TRUE(huffman.DecodeFromStream(read_bit, &decoded));
  147. EXPECT_EQ("u", decoded);
  148. ASSERT_TRUE(huffman.DecodeFromStream(read_bit, &decoded));
  149. EXPECT_EQ("x", decoded);
  150. ASSERT_FALSE(huffman.DecodeFromStream(read_bit, &decoded));
  151. }
  152. TEST(Huffman, TestDecodeNumbers) {
  153. const std::map<uint32_t, uint32_t> hist = {{1, 10}, {2, 5}, {3, 15}};
  154. HuffmanCodec<uint32_t> huffman(hist);
  155. TestBitReader bit_reader(
  156. "1"
  157. "1"
  158. "01"
  159. "00"
  160. "01"
  161. "1");
  162. auto read_bit = [&bit_reader](bool* bit) { return bit_reader.ReadBit(bit); };
  163. uint32_t decoded;
  164. ASSERT_TRUE(huffman.DecodeFromStream(read_bit, &decoded));
  165. EXPECT_EQ(3u, decoded);
  166. ASSERT_TRUE(huffman.DecodeFromStream(read_bit, &decoded));
  167. EXPECT_EQ(3u, decoded);
  168. ASSERT_TRUE(huffman.DecodeFromStream(read_bit, &decoded));
  169. EXPECT_EQ(2u, decoded);
  170. ASSERT_TRUE(huffman.DecodeFromStream(read_bit, &decoded));
  171. EXPECT_EQ(1u, decoded);
  172. ASSERT_TRUE(huffman.DecodeFromStream(read_bit, &decoded));
  173. EXPECT_EQ(2u, decoded);
  174. ASSERT_TRUE(huffman.DecodeFromStream(read_bit, &decoded));
  175. EXPECT_EQ(3u, decoded);
  176. }
  177. TEST(Huffman, SerializeToTextU64) {
  178. const std::map<uint64_t, uint32_t> hist = {{1001, 10}, {1002, 5}, {1003, 15}};
  179. HuffmanCodec<uint64_t> huffman(hist);
  180. const std::string code = huffman.SerializeToText(2);
  181. const std::string expected = R"((5, {
  182. {0, 0, 0},
  183. {1001, 0, 0},
  184. {1002, 0, 0},
  185. {1003, 0, 0},
  186. {0, 1, 2},
  187. {0, 4, 3},
  188. }))";
  189. ASSERT_EQ(expected, code);
  190. }
  191. TEST(Huffman, SerializeToTextString) {
  192. const std::map<std::string, uint32_t> hist = {
  193. {"aaa", 10}, {"bbb", 20}, {"ccc", 15}};
  194. HuffmanCodec<std::string> huffman(hist);
  195. const std::string code = huffman.SerializeToText(4);
  196. const std::string expected = R"((5, {
  197. {"", 0, 0},
  198. {"aaa", 0, 0},
  199. {"bbb", 0, 0},
  200. {"ccc", 0, 0},
  201. {"", 3, 1},
  202. {"", 4, 2},
  203. }))";
  204. ASSERT_EQ(expected, code);
  205. }
  206. TEST(Huffman, CreateFromTextString) {
  207. std::vector<HuffmanCodec<std::string>::Node> nodes = {
  208. {},
  209. {"root", 2, 3},
  210. {"left", 0, 0},
  211. {"right", 0, 0},
  212. };
  213. HuffmanCodec<std::string> huffman(1, std::move(nodes));
  214. std::stringstream ss;
  215. huffman.PrintTree(ss);
  216. const std::string expected = std::string(R"(
  217. 0------right
  218. 0------left
  219. )")
  220. .substr(1);
  221. EXPECT_EQ(expected, ss.str());
  222. }
  223. TEST(Huffman, CreateFromTextU64) {
  224. HuffmanCodec<uint64_t> huffman(5, {
  225. {0, 0, 0},
  226. {1001, 0, 0},
  227. {1002, 0, 0},
  228. {1003, 0, 0},
  229. {0, 1, 2},
  230. {0, 4, 3},
  231. });
  232. std::stringstream ss;
  233. huffman.PrintTree(ss);
  234. const std::string expected = std::string(R"(
  235. 0------1003
  236. 0------0------1002
  237. 0------1001
  238. )")
  239. .substr(1);
  240. EXPECT_EQ(expected, ss.str());
  241. TestBitReader bit_reader("01");
  242. auto read_bit = [&bit_reader](bool* bit) { return bit_reader.ReadBit(bit); };
  243. uint64_t decoded = 0;
  244. ASSERT_TRUE(huffman.DecodeFromStream(read_bit, &decoded));
  245. EXPECT_EQ(1002u, decoded);
  246. uint64_t bits = 0;
  247. size_t num_bits = 0;
  248. EXPECT_TRUE(huffman.Encode(1001, &bits, &num_bits));
  249. EXPECT_EQ(2u, num_bits);
  250. EXPECT_EQ("00", BitsToStream(bits, num_bits));
  251. }
  252. } // namespace
  253. } // namespace comp
  254. } // namespace spvtools