3 Commit-ok a2004b93a0 ... c285e865d3

Szerző SHA1 Üzenet Dátum
  namark c285e865d3 Fixed lz77_decode not passing ref offset type properly. 7 hónapja
  namark caf8eed58e huffman + lz77 unit test. 7 hónapja
  namark e8b2674141 Fixed huffman for enumeration types (std::byte). 7 hónapja

+ 8 - 7
source/simple/compress/huffman.hpp

@@ -85,6 +85,9 @@ namespace simple::compress
 
 		public:
 
+		using key_type = SmallKey;
+		using value_type = Value;
+
 		constexpr Value& operator[](const SmallKey& key)
 		{ return get(*this, key); }
 
@@ -154,7 +157,7 @@ namespace simple::compress
 			std::vector<std::pair<key_type,key_type>>
 		> hierarchy{};
 		for(auto i = begin; i != end; ++i)
-			++counter[*i];
+			++counter[static_cast<key_type>(*i)];
 
 		std::array<std::pair<key_type, std::size_t>, 2> minmin;
 		while(true)
@@ -214,11 +217,8 @@ namespace simple::compress
 	template <typename It, typename Out, typename Code>
 	constexpr auto huffman_encode(const Code& code, It begin, It end, Out out)
 	{
-		// NOTE: almost not worth a function, especially if add call operator to code...
-		// like this
-		// return std::transform(begin, end, out, std::ref(code));
-		// ref cause can't trust std to not copy galore, so maybe still some point to it, even in that form
-		return std::transform(begin, end, std::move(out), [&code](auto&& x) { return code[x]; });
+		return std::transform(begin, end, std::move(out),
+			[&code](auto&& x) { return code[static_cast<typename Code::key_type>(x)]; });
 	}
 
 	template <typename Code, typename I, typename O,
@@ -226,6 +226,7 @@ namespace simple::compress
 	>
 	constexpr auto huffman_decode(const Code& code, I i, O out, O out_end)
 	{
+		using out_value = typename std::iterator_traits<O>::value_type;
 		while(out != out_end)
 		{
 			code.find_if([&i, &out](auto&& kv)
@@ -236,7 +237,7 @@ namespace simple::compress
 					auto next = read_bits(i, read);
 					if(read == kv.second)
 					{
-						*out = kv.first;
+						*out = static_cast<out_value>(kv.first);
 						i = next;
 						return true;
 					}

+ 3 - 3
source/simple/compress/lz77.hpp

@@ -94,7 +94,7 @@ namespace simple::compress
 	{
 		constexpr ref_offset_t ref_offset_max = [](){
 			auto bits = get_bits(ref_offset_t{});
-			// do this step by step to prevent unsigned short to freaking exploding into an int TODO: safe unsigned short
+			// do this step by step to prevent unsigned short from freaking exploding into an int TODO: safe unsigned short
 			bits = ~bits;
 			bits >>= std::numeric_limits<decltype(bits)>::digits - bit_count(ref_offset_t{});
 			return bits;
@@ -126,7 +126,7 @@ namespace simple::compress
 			auto dict = window > ref_offset_max ? i - ref_offset_max : begin;
 			auto match = lz77_lookup(dict, i, end, map);
 			auto match_size = support::distance(match.first);
-			if(match_size > 8)
+			if(match_size > 8) // FIXME: parameterize
 			{
 				auto pattern_size = i - match.first.begin();
 				ref_offset_t encodable_pattern_chunk = ref_offset_max / pattern_size * pattern_size;
@@ -234,7 +234,7 @@ namespace simple::compress
 	>
 	constexpr auto lz77_decode(I i, O out_begin, O out_end)
 	{
-		return lz77_decode(bit_iterator{i,0}, out_begin, out_end);
+		return lz77_decode<ref_offset_t>(bit_iterator{i,0}, out_begin, out_end);
 	}
 
 } // namespace simple::compress

+ 1 - 1
unit_tests/lz77.cpp

@@ -12,7 +12,7 @@
 using namespace simple::compress;
 
 void Endecode(std::string text)
-// void Encode(std::vector<unsigned> text)
+// void Endecode(std::vector<unsigned> text)
 {
 	std::vector<std::byte> encoded;
 	encoded.reserve(text.size());

+ 70 - 0
unit_tests/lz77_huffman.cpp

@@ -0,0 +1,70 @@
+// #include "simple/support/debug.hpp"
+
+#include "simple/compress/lz77.hpp"
+#include "simple/compress/huffman.hpp"
+#include "simple/compress/iterator.hpp" // out_bits
+#include "simple/support/iterator.hpp" // offset_expander
+
+#include <cassert>
+#include <vector>
+#include <string>
+#include <cstdio>
+
+using namespace simple::compress;
+
+void Endecode(std::string text)
+{
+	std::vector<std::byte> lzed;
+	lzed.reserve(text.size());
+
+	lz77_encode(text.begin(), text.end(), out_bits(simple::support::offset_expander(lzed)));
+
+	std::vector<std::byte> huffed;
+	huffed.reserve(lzed.size());
+
+	auto code = huffman_code(lzed.begin(), lzed.end());
+
+#if defined SIMPLE_SUPPORT_DEBUG_HPP
+		simple::support::print('\n');
+		simple::support::println("CODE: ");
+		code.for_each([](auto && kv) { using std::to_string; if(bit_count(kv.second) != 0) simple::support::println(to_string((int)kv.first) + " - " + to_string(kv.second)); });
+		simple::support::print('\n');
+#endif
+
+	huffman_encode(code, lzed.begin(), lzed.end(), out_bits(simple::support::offset_expander(huffed)));
+
+#if defined SIMPLE_SUPPORT_DEBUG_HPP
+	simple::support::print('\n');
+
+	simple::support::print("INPUT SIZE: ", text.size(), '\n');
+	simple::support::print("COMPRESSED SIZE: ", huffed.size(), '\n');
+#endif
+
+	std::vector<std::byte> unhuffed;
+	unhuffed.resize(lzed.size());
+	huffman_decode(code, huffed.begin(), unhuffed.begin(), unhuffed.end());
+
+	std::string decoded;
+	decoded.resize(text.size());
+	lz77_decode(lzed.begin(), decoded.begin(), decoded.end());
+
+	assert(text == decoded);
+}
+
+int main(int argc, char const* argv[])
+{
+	std::string text = "abcd aaaa bbbb cccc aaaa abcd aaaa aaaa aaaa aaaaa aaaa aaaaaaaaaaa aaaaaaaaaaaaa aaaaaaaaaaaaaa aaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaaaaaa";
+	if(argc > 1)
+	{
+		auto f = std::fopen(argv[1], "rb");
+		std::fseek(f,0,SEEK_END);
+		text.resize(std::ftell(f));
+		std::fseek(f,0,SEEK_SET);
+		auto unused [[maybe_unused]] = std::fread(text.data(), text.size(), 1 ,f);
+#if defined SIMPLE_SUPPORT_DEBUG_HPP
+		simple::support::print("s: ", text.size(), '\n');
+#endif
+	}
+	Endecode(std::move(text));
+	return 0;
+}