namark
/
smolen


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
							#include <cstdio>
#include <cerrno>
#include <numeric>
#include <complex>

#include "simple/io.h"
#include "simple/compress.hpp"
#include "simple/support.hpp"
#include "simple/geom.hpp"

using namespace simple;
using namespace std::literals;

const auto filetype = io::as_byte_view("SMOLENFILE");
const auto audio_filetype = 	io::as_byte_view("RAWAUDIOFILE");
const auto dct_audio_filetype = io::as_byte_view("DCTAUDIOFILE");

struct audio_params_t
{
	bool big_endian = false; // 1 bit
	std::uint8_t sample_type = 0; // 3 bits
	std::uint8_t channels = 2; // 4 bits
	std::uint32_t sample_rate = 48000; // 24 bits
};

const std::array sample_type_names {
	"int16"s,
	"int8"s,
	"uint16"s,
	"uint8"s,
	"int32"s,
	"uint32"s,
	"float"s,
};
using sample_types = support::meta::list<
	std::int16_t,
	std::int8_t,
	std::uint16_t,
	std::uint8_t,
	std::int32_t,
	std::uint32_t,
	float
>;

template <typename F>
void with_sample_range(audio_params_t, io::byte_range bytes, F f)
{
	// transform sample_types wrap them in geom::vector<T, params.channels>
	// pick the sample_types[params.sample_type]
	using sample_iterator = support::byte_iterator<geom::vector<std::int16_t, 2>>;
	f(support::range{sample_iterator{bytes.begin()}, sample_iterator{bytes.end()}});
};

int main(int argc, char const* argv[]) try
{
	if(argc < 2)
	{
		std::fputs("argumentzzz \n", stderr);
		return 0;
	}

	std::optional<audio_params_t> audio_params;

	if(argc > 2)
	{
		// TODO
		audio_params = audio_params_t{};
	}

	std::vector<std::byte> data;

	{
		auto in_file = std::get<0>( io::open<io::mode::read>(argv[1]) );
		std::array<std::byte, 4096> buffer;
		support::copy(io::read_iterator(in_file, io::as_byte_range(buffer)), io::read_iterator(), std::back_inserter(data));
	}

	std::vector<std::byte> out_data;
	std::string out_filename = argv[1];

	const auto filetypematch = support::mismatch(data, filetype);

	if(filetypematch.second == filetype.end())
	{
		auto& decoded = out_data;

		// un-huffman
		decltype(compress::huffman_code(data.begin(), data.end())) code{};
		std::byte code_length{};
		auto i = compress::read_bits(filetypematch.first,code_length);
		while(code_length != std::byte{}) // TODO: check if i reaches end
		{
			decltype(code)::key_type key{};
			decltype(code)::value_type value{0, static_cast<std::size_t>(code_length)};
			i = compress::read_bits(i,value);
			i = compress::read_bits(i,key);
			code[key] = value;
			i = compress::read_bits(i,code_length);
		}

		std::vector<std::byte>::size_type decoded_size = 0;
		auto header_end = compress::read_bits(i, decoded_size);
		decoded.resize(decoded_size);
		compress::huffman_decode(code, header_end, decoded.begin(), decoded.end());

		// un-lz77
		std::swap(decoded, data);
		decoded_size = 0;

		header_end = compress::read_bits(data.begin(), decoded_size);
		decoded.resize(decoded_size);
		compress::lz77_decode(header_end, decoded.begin(), decoded.end());

		const auto audio_filetypematch = support::mismatch(decoded, dct_audio_filetype);
		if(audio_filetypematch.second == dct_audio_filetype.end())
		{
			// TODO: set audio_params from decoded
		}

		if(audio_params)
		{
			// un-dct
			using sample_type = geom::vector<std::int16_t,2>;
			using sample_iterator = support::byte_iterator<sample_type>;

			constexpr std::size_t chunk_size = (1 << 11) + 1;
			constexpr std::size_t dft_size = (chunk_size - 1) * 2;
			const std::size_t chunk_count = decoded.size() / (chunk_size * sizeof(sample_type));

			std::vector<geom::vector<float,2>> frequencies;
			// std::vector<geom::vector<std::complex<float>,2>> frequencies;
			frequencies.resize(dft_size);

			std::vector<geom::vector<std::complex<float>,2>> samples;
			samples.resize(dft_size);

			for(sample_iterator i{decoded.data()}; i != sample_iterator{decoded.data()} + chunk_count * chunk_size; i += chunk_size)
			{
				std::transform(i, i + chunk_size, frequencies.begin(),
					[](sample_type x) { return geom::vector<float,2>(x) / std::numeric_limits<std::int16_t>::max(); });

				std::copy(frequencies.rbegin()+chunk_size-1, frequencies.rend()-1, frequencies.begin() + chunk_size);

				const float tau = 2*std::acos(-1);
				compress::fft(std::multiplies<>{}, std::polar(1.f,tau/dft_size), frequencies, samples);

				std::transform(samples.begin(), samples.begin() + chunk_size, i, [](auto x)
					// TODO: clamp instead of assert?
					{ return sample_type{x.transformed([](auto cx) { assert(std::abs(cx.real()) <= 1); return cx.real(); }) * std::numeric_limits<std::int16_t>::max()}; });
			}

			// TODO: add raw audio filetype header, if started with dct audio filetype just need to change the first 3 bytes
		}

		auto dot = support::find(support::make_range(out_filename).reverse(), '.');
		auto expected_ext = std::string_view{"smol"};
		if(dot == out_filename.rend() || not std::equal(dot.base(), out_filename.end(), expected_ext.begin(), expected_ext.end()))
		{
			std::fputs("where .smol? make .larg :/ \n", stderr);
			out_filename += ".larg";
		}
		else
			out_filename.erase(std::prev(dot.base()), out_filename.end());

		if(out_filename == "")
		{
			std::fputs("is this a jape >:( \n", stderr);
			out_filename = "harharharharharharharharharharharharhar.larg";
		}
	}
	else
	{

		const auto audio_filetypematch = support::mismatch(data, audio_filetype);
		if(audio_filetypematch.second == audio_filetype.end())
		{
			// TODO: set audio_params from data
		}

		if(audio_params)
		{
			// dct

			// TODO: actual audio params
			// FIXME: handle endianness
			using sample_type = geom::vector<std::int16_t,2>;
			using sample_iterator = support::byte_iterator<sample_type>;

			constexpr std::size_t chunk_size = (1 << 11) + 1;
			constexpr std::size_t dft_size = (chunk_size - 1) * 2;
			const std::size_t chunk_count = data.size() / (chunk_size * sizeof(sample_type));

			std::vector<geom::vector<float,2>> normalized;
			normalized.resize(dft_size);

			std::vector<geom::vector<std::complex<float>,2>> frequencies;
			frequencies.resize(dft_size);

			for(sample_iterator i{data.data()}; i != sample_iterator{data.data()} + chunk_count * chunk_size; i += chunk_size)
			{
				std::transform(i, i + chunk_size, normalized.begin(),
					[](sample_type x) { return geom::vector<float,2>(x) / std::numeric_limits<std::int16_t>::max(); });

				std::copy(normalized.rbegin()+chunk_size-1, normalized.rend()-1, normalized.begin() + chunk_size);

				const float tau = 2*std::acos(-1);
				compress::fft(std::multiplies<>{}, std::polar(1.f,tau/dft_size), normalized, frequencies);

				std::transform(frequencies.begin(), frequencies.begin() + chunk_size, i, [](auto x)
					// TODO: clamp instead of assert?
					{ return sample_type{x.transformed([](auto x) { auto r = x.real() / dft_size; assert(std::abs(r) <= 1); return r; }) * std::numeric_limits<std::int16_t>::max()}; });
					// TODO: quantization paramter
					// { return sample_type{x.transformed([&dft_size](auto cx) { auto normalized = cx.real() / dft_size; return normalized < 0.01f ? 0.f : normalized; }) * std::numeric_limits<std::uint16_t>::max()}; });
					// TODO: paramter to discard higher frequencies
					// std::fill(i + chunk_size/2, i + chunk_size, sample_type{});
			}

			// TODO: add dct audio filetype header, if started with raw audio filetype just need to change the first 3 bytes

		}

		auto& encoded = out_data;
		encoded.reserve(data.size());

		// lz77
		auto out = compress::out_bits(support::offset_expander(encoded));
		*out = data.size();
		compress::lz77_encode(data.begin(), data.end(), std::move(out));

		out_filename += ".smol"s;

		if(encoded.size() > data.size())
			std::fputs("largenz :( \n", stderr);

		// huffman
		std::swap(encoded, data);
		encoded.resize(0);
		out = compress::out_bits(support::offset_expander(encoded));

		auto code = compress::huffman_code(data.begin(), data.end());
		code.for_each([&](auto kv)
		{
			auto code_bit_count = compress::bit_count(kv.second);
			if(code_bit_count != 0)
			{
				assert( code_bit_count < (1ull << compress::bit_count(std::byte{})) );
				*out = static_cast<std::byte>(code_bit_count);
				*out = kv.second;
				*out = kv.first;
			}
		});
		*out = static_cast<std::byte>(0);
		*out = data.size();

		compress::huffman_encode(code, data.begin(), data.end(), std::move(out));

		if(encoded.size() > data.size())
			std::fputs("huffpuffs :( \n", stderr);
	}

	auto out_file = std::get<0>( io::open<io::mode::write, io::mode::create, io::mode::truncate>(out_filename) );
	auto out_view = io::as_byte_view(out_data);
	if(filetypematch.second != filetype.end())
		if(std::get<0>(write(out_file, filetype)) != filetype.end()) throw std::runtime_error("Failed to write filetype to " + out_filename);
	if(std::get<0>(write(out_file, out_view)) != out_view.end()) throw std::runtime_error("Failed to write data to " + out_filename);

	return 0;
}
catch(const std::exception& e)
{
	std::fputs(";( \n", stderr);
	if(errno)
		std::perror("");

	std::fputs(e.what(), stderr);
	std::fputs("\n", stderr);
	// throw;
}