bwt.hpp 884 B

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. #pragma once
  2. //burrows-wheeler transform
  3. #include <nall/suffix-array.hpp>
  4. namespace nall::Decode {
  5. inline auto BWT(array_view<uint8_t> input) -> vector<uint8_t> {
  6. vector<uint8_t> output;
  7. uint size = 0;
  8. for(uint byte : range(8)) size |= *input++ << byte * 8;
  9. output.resize(size);
  10. uint I = 0;
  11. for(uint byte : range(8)) I |= *input++ << byte * 8;
  12. auto suffixes = SuffixArray(input);
  13. auto L = input;
  14. auto F = new uint8_t[size];
  15. for(uint offset : range(size)) F[offset] = L[suffixes[offset + 1]];
  16. uint64_t K[256] = {};
  17. auto C = new int[size];
  18. for(uint i : range(size)) {
  19. C[i] = K[L[i]];
  20. K[L[i]]++;
  21. }
  22. int M[256];
  23. memory::fill<int>(M, 256, -1);
  24. for(uint i : range(size)) {
  25. if(M[F[i]] == -1) M[F[i]] = i;
  26. }
  27. uint i = I;
  28. for(uint j : reverse(range(size))) {
  29. output[j] = L[i];
  30. i = C[i] + M[L[i]];
  31. }
  32. return output;
  33. }
  34. }