123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387 |
- #pragma once
- #include <nall/array.hpp>
- #include <nall/counting-sort.hpp>
- #include <nall/induced-sort.hpp>
- #include <nall/range.hpp>
- #include <nall/view.hpp>
- namespace nall {
- inline auto suffix_array(array_view<uint8_t> input) -> vector<int> {
- return induced_sort(input);
- }
- inline auto suffix_array_invert(array_view<int> sa) -> vector<int> {
- vector<int> isa;
- isa.reallocate(sa.size());
- for(int i : range(sa.size())) isa[sa[i]] = i;
- return isa;
- }
- inline auto suffix_array_phi(array_view<int> sa) -> vector<int> {
- vector<int> phi;
- phi.reallocate(sa.size());
- phi[sa[0]] = 0;
- for(int i : range(1, sa.size())) phi[sa[i]] = sa[i - 1];
- return phi;
- }
- inline auto suffix_array_lcp(int l, int r, array_view<int> sa, array_view<uint8_t> input) -> int {
- int i = sa[l], j = sa[r], k = 0, size = input.size();
- while(i + k < size && j + k < size && input[i + k] == input[j + k]) k++;
- return k;
- }
- inline auto suffix_array_lcp(int i, int j, int k, array_view<uint8_t> input) -> int {
- int size = input.size();
- while(i + k < size && j + k < size && input[i + k] == input[j + k]) k++;
- return k;
- }
- inline auto suffix_array_lcp(array_view<int> sa, array_view<int> isa, array_view<uint8_t> input) -> vector<int> {
- int k = 0, size = input.size();
- vector<int> lcp;
- lcp.reallocate(size + 1);
- for(int i : range(size)) {
- if(isa[i] == size) { k = 0; continue; }
- int j = sa[isa[i] + 1];
- while(i + k < size && j + k < size && input[i + k] == input[j + k]) k++;
- lcp[1 + isa[i]] = k;
- if(k) k--;
- }
- lcp[0] = 0;
- return lcp;
- }
- inline auto suffix_array_lcp(array_view<int> plcp, array_view<int> sa) -> vector<int> {
- vector<int> lcp;
- lcp.reallocate(plcp.size());
- for(int i : range(plcp.size())) lcp[i] = plcp[sa[i]];
- return lcp;
- }
- inline auto suffix_array_plcp(array_view<int> phi, array_view<uint8_t> input) -> vector<int> {
- vector<int> plcp;
- plcp.reallocate(phi.size());
- int k = 0, size = input.size();
- for(int i : range(size)) {
- int j = phi[i];
- while(i + k < size && j + k < size && input[i + k] == input[j + k]) k++;
- plcp[i] = k;
- if(k) k--;
- }
- return plcp;
- }
- inline auto suffix_array_plcp(array_view<int> lcp, array_view<int> sa) -> vector<int> {
- vector<int> plcp;
- plcp.reallocate(lcp.size());
- for(int i : range(lcp.size())) plcp[sa[i]] = lcp[i];
- return plcp;
- }
- inline auto suffix_array_lrcp(vector<int>& llcp, vector<int>& rlcp, array_view<int> lcp, array_view<int> plcp, array_view<int> sa, array_view<uint8_t> input) -> void {
- int size = input.size();
- llcp.reset(), llcp.reallocate(size + 1);
- rlcp.reset(), rlcp.reallocate(size + 1);
- function<int (int, int)> recurse = [&](int l, int r) -> int {
- if(l >= r - 1) {
- if(l >= size) return 0;
- if(lcp) return lcp[l];
- return plcp[sa[l]];
- }
- int m = l + r >> 1;
- llcp[m - 1] = recurse(l, m);
- rlcp[m - 1] = recurse(m, r);
- return min(llcp[m - 1], rlcp[m - 1]);
- };
- recurse(1, size + 1);
- llcp[0] = 0;
- rlcp[0] = 0;
- }
- inline auto suffix_array_lpf(vector<int>& lengths, vector<int>& offsets, array_view<int> phi, array_view<int> plcp, array_view<uint8_t> input) -> void {
- int k = 0, size = input.size();
- lengths.reset(), lengths.resize(size + 1, -1);
- offsets.reset(), offsets.resize(size + 1, -1);
- function<void (int, int, int)> recurse = [&](int i, int j, int k) -> void {
- if(lengths[i] < 0) {
- lengths[i] = k;
- offsets[i] = j;
- } else if(lengths[i] < k) {
- if(offsets[i] > j) {
- recurse(offsets[i], j, lengths[i]);
- } else {
- recurse(j, offsets[i], lengths[i]);
- }
- lengths[i] = k;
- offsets[i] = j;
- } else {
- if(offsets[i] > j) {
- recurse(offsets[i], j, k);
- } else {
- recurse(j, offsets[i], k);
- }
- }
- };
- for(int i : range(size)) {
- int j = phi[i];
- if(plcp) k = plcp[i];
- else while(i + k < size && j + k < size && input[i + k] == input[j + k]) k++;
- if(i > j) {
- recurse(i, j, k);
- } else {
- recurse(j, i, k);
- }
- if(k) k--;
- }
- lengths[0] = 0;
- offsets[0] = 0;
- }
- inline auto suffix_array_find(int& length, int& offset, array_view<int> sa, array_view<uint8_t> input, array_view<uint8_t> match) -> bool {
- length = 0, offset = 0;
- int l = 0, r = input.size();
- while(l < r - 1) {
- int m = l + r >> 1;
- int s = sa[m];
- int k = 0;
- while(k < match.size() && s + k < input.size()) {
- if(match[k] != input[s + k]) break;
- k++;
- }
- if(k > length) {
- length = k;
- offset = s;
- if(k == match.size()) return true;
- }
- if(k == match.size() || s + k == input.size()) k--;
- if(match[k] < input[s + k]) {
- r = m;
- } else {
- l = m;
- }
- }
- return false;
- }
- inline auto suffix_array_find(int& length, int& offset, array_view<int> llcp, array_view<int> rlcp, array_view<int> sa, array_view<uint8_t> input, array_view<uint8_t> match) -> bool {
- length = 0, offset = 0;
- int l = 0, r = input.size(), k = 0;
- while(l < r - 1) {
- int m = l + r >> 1;
- int s = sa[m];
- while(k < match.size() && s + k < input.size()) {
- if(match[k] != input[s + k]) break;
- k++;
- }
- if(k > length) {
- length = k;
- offset = s;
- if(k == match.size()) return true;
- }
- if(k == match.size() || s + k == input.size()) k--;
- if(match[k] < input[s + k]) {
- r = m;
- k = min(k, llcp[m]);
- } else {
- l = m;
- k = min(k, rlcp[m]);
- }
- }
- return false;
- }
- struct SuffixArray {
- using type = SuffixArray;
-
- inline SuffixArray(array_view<uint8_t> input) : input(input) {
- sa = suffix_array(input);
- }
-
- inline auto lrcp() -> type& {
-
-
- if(!phi) phi = suffix_array_phi(sa);
- if(!plcp) plcp = suffix_array_plcp(phi, input);
-
- if(!llcp || !rlcp) suffix_array_lrcp(llcp, rlcp, lcp, plcp, sa, input);
- return *this;
- }
-
- inline auto lpf() -> type& {
- if(!phi) phi = suffix_array_phi(sa);
-
- if(!lengths || !offsets) suffix_array_lpf(lengths, offsets, phi, plcp, input);
- return *this;
- }
- inline auto operator[](int offset) const -> int {
- return sa[offset];
- }
-
-
- inline auto find(int& length, int& offset, array_view<uint8_t> match) -> bool {
- if(!llcp || !rlcp) return suffix_array_find(length, offset, sa, input, match);
- return suffix_array_find(length, offset, llcp, rlcp, sa, input, match);
- }
-
- inline auto previous(int& length, int& offset, int address) -> void {
- length = lengths[address];
- offset = offsets[address];
- }
-
- array_view<uint8_t> input;
-
- vector<int> sa;
- vector<int> isa;
- vector<int> phi;
- vector<int> plcp;
- vector<int> lcp;
- vector<int> llcp;
- vector<int> rlcp;
- vector<int> lengths;
- vector<int> offsets;
- };
- }
|