codepoints.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. # Functions that validate codepoint sequences according to specific criteria.
  2. # All of them will return ValueErrors when they fail.
  3. def testZWJSanity(c):
  4. """
  5. Function that tests the sanity of ZWJ placement in a codepoint sequence.
  6. (ie. ZWJs placed at the beginning or end of a sequence, or two ZWJs placed
  7. next to each other, is considered 'not sane')
  8. """
  9. zwj = 0x200d
  10. if len(c) > 1 and zwj in c:
  11. if c[0] == zwj or c[-1] == zwj:
  12. raise ValueError(f"This codepoint sequence has a ZWJ (U+200d) at the beginning and/or the end of it's codepoint seqence (when ignoring VS16/U+fe0f). This is not valid.")
  13. if any(c[i]== zwj and c[i+1] == zwj for i in range(len(c)-1)):
  14. raise ValueError(f"This codepoint sequence has two or more ZWJs (U+200d) next to each other (when ignoring VS16/U+fe0f). This is not a valid.")
  15. def testRestrictedCodepoints(codepointSeq):
  16. """
  17. Make sure that each codepoint in a codepoint string is within the right ranges.
  18. Throws an exception when it is not.
  19. """
  20. for c in codepointSeq:
  21. if c < 0x20:
  22. raise ValueError(f"This codepoint sequence contains a codepoint that is below U+20. You cannot encode glyphs below this number because various typing environments get confused when you do.")
  23. if c == 0x20:
  24. raise ValueError(f"This codepoint sequence contains U+20. This is space - you shouldn't be using a glyph for this.")
  25. if c == 0xa0:
  26. raise ValueError(f"This codepoint sequence contains U+a0. This is a space character - you shouldn't be using a glyph for this.")
  27. if c > 0x10FFFF:
  28. raise ValueError(f"This codepoint sequence contains a codepoint that is above U+10FFFF. The Unicode Standard currently does not support codepoints above this number.")
  29. if len(codepointSeq) == 1 and codepointSeq[0] == 0xfe0f:
  30. raise ValueError(f"'fe0f' by itself is just a service codepoint and cannot be be used as an input.")