utf8.hpp 1.0 KB

123456789101112131415161718192021222324252627282930313233
  1. #pragma once
  2. namespace nall {
  3. //note: this function assumes the string contains valid UTF-8 characters
  4. //invalid characters will result in an incorrect result from this function
  5. //invalid case 1: byte 1 == 0b'01xxxxxx
  6. //invalid case 2: bytes 2-4 != 0b'10xxxxxx
  7. //invalid case 3: end of string without bytes 2-4 present
  8. auto characters(string_view self, int offset, int length) -> uint {
  9. uint characters = 0;
  10. if(offset < 0) offset = self.size() - abs(offset);
  11. if(offset >= 0 && offset < self.size()) {
  12. if(length < 0) length = self.size() - offset;
  13. if(length >= 0) {
  14. for(int index = offset; index < offset + length;) {
  15. auto byte = self.data()[index++];
  16. if((byte & 0b111'00000) == 0b110'00000) index += 1;
  17. if((byte & 0b1111'0000) == 0b1110'0000) index += 2;
  18. if((byte & 0b11111'000) == 0b11110'000) index += 3;
  19. characters++;
  20. }
  21. }
  22. }
  23. return characters;
  24. }
  25. auto string::characters(int offset, int length) const -> uint {
  26. return nall::characters(*this, offset, length);
  27. }
  28. }