huffman.hpp 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. #pragma once
  2. namespace nall::Encode {
  3. inline auto Huffman(array_view<uint8_t> input) -> vector<uint8_t> {
  4. vector<uint8_t> output;
  5. for(uint byte : range(8)) output.append(input.size() >> byte * 8);
  6. struct Node {
  7. uint frequency = 0;
  8. uint parent = 0;
  9. uint lhs = 0;
  10. uint rhs = 0;
  11. };
  12. array<Node[512]> nodes;
  13. for(uint offset : range(input.size())) nodes[input[offset]].frequency++;
  14. uint count = 0;
  15. for(uint offset : range(511)) {
  16. if(nodes[offset].frequency) count++;
  17. else nodes[offset].parent = 511;
  18. }
  19. auto minimum = [&] {
  20. uint frequency = ~0, minimum = 511;
  21. for(uint index : range(511)) {
  22. if(!nodes[index].parent && nodes[index].frequency && nodes[index].frequency < frequency) {
  23. frequency = nodes[index].frequency;
  24. minimum = index;
  25. }
  26. }
  27. return minimum;
  28. };
  29. //group the least two frequently used nodes until only one node remains
  30. uint index = 256;
  31. for(uint remaining = max(2, count); remaining >= 2; remaining--) {
  32. uint lhs = minimum();
  33. nodes[lhs].parent = index;
  34. uint rhs = minimum();
  35. nodes[rhs].parent = index;
  36. if(remaining == 2) index = nodes[lhs].parent = nodes[rhs].parent = 511;
  37. nodes[index].lhs = lhs;
  38. nodes[index].rhs = rhs;
  39. nodes[index].parent = 0;
  40. nodes[index].frequency = nodes[lhs].frequency + nodes[rhs].frequency;
  41. index++;
  42. }
  43. uint byte = 0, bits = 0;
  44. auto write = [&](bool bit) {
  45. byte = byte << 1 | bit;
  46. if(++bits == 8) output.append(byte), bits = 0;
  47. };
  48. //only the upper half of the table is needed for decompression
  49. //the first 256 nodes are always treated as leaf nodes
  50. for(uint offset : range(256)) {
  51. for(uint index : reverse(range(9))) write(nodes[256 + offset].lhs >> index & 1);
  52. for(uint index : reverse(range(9))) write(nodes[256 + offset].rhs >> index & 1);
  53. }
  54. for(uint byte : input) {
  55. uint node = byte, length = 0;
  56. uint256_t sequence = 0;
  57. //traversing the array produces the bitstream in reverse order
  58. do {
  59. uint parent = nodes[node].parent;
  60. bool bit = nodes[nodes[node].parent].rhs == node;
  61. sequence = sequence << 1 | bit;
  62. length++;
  63. node = parent;
  64. } while(node != 511);
  65. //output the generated bits in the correct order
  66. for(uint index : range(length)) {
  67. write(sequence >> index & 1);
  68. }
  69. }
  70. while(bits) write(0);
  71. return output;
  72. }
  73. }