unicode-spec.js 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. /* eslint max-len:0 */
  2. /* global expect: false */
  3. /* global it: false */
  4. /* global describe: false */
  5. import Settings from "../src/Settings";
  6. import {scriptFromCodepoint, supportedCodepoint} from "../src/unicodeScripts";
  7. import {strictSettings, nonstrictSettings} from "./helpers";
  8. describe("unicode", function() {
  9. it("should parse Latin-1 inside \\text{}", function() {
  10. expect`\text{ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿÆÇÐØÞßæçðøþ}`
  11. .toParse();
  12. });
  13. it("should not parse Latin-1 outside \\text{} with strict", function() {
  14. const chars = 'ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿÇÐÞçþ';
  15. for (const ch of chars) {
  16. expect(ch).not.toParse(strictSettings);
  17. }
  18. });
  19. it("should parse Latin-1 outside \\text{}", function() {
  20. expect`ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿÇÐÞçðþ`
  21. .toParse(nonstrictSettings);
  22. });
  23. it("should parse all lower case Greek letters", function() {
  24. expect`αβγδεϵζηθϑικλμνξοπϖρϱςστυφϕχψω`.toParse();
  25. });
  26. it("should parse math upper case Greek letters", function() {
  27. expect`ΓΔΘΛΞΠΣΥΦΨΩ`.toParse();
  28. });
  29. it("should parse Cyrillic inside \\text{}", function() {
  30. expect`\text{БГДЖЗЙЛФЦШЫЮЯ}`.toParse();
  31. });
  32. it("should not parse Cyrillic outside \\text{} with strict", function() {
  33. expect`БГДЖЗЙЛФЦШЫЮЯ`.not.toParse(strictSettings);
  34. });
  35. it("should parse CJK inside \\text{}", function() {
  36. expect`\text{私はバナナです}`.toParse();
  37. expect`\text{여보세요}`.toParse();
  38. });
  39. it("should not parse CJK outside \\text{} with strict", function() {
  40. expect`私はバナナです。`.not.toParse(strictSettings);
  41. expect`여보세요`.not.toParse(strictSettings);
  42. });
  43. it("should parse Devangari inside \\text{}", function() {
  44. expect`\text{नमस्ते}`.toParse();
  45. });
  46. it("should not parse Devangari outside \\text{} with strict", function() {
  47. expect`नमस्ते`.not.toParse(strictSettings);
  48. });
  49. it("should parse Georgian inside \\text{}", function() {
  50. expect`\text{გამარჯობა}`.toParse();
  51. });
  52. it("should not parse Georgian outside \\text{} with strict", function() {
  53. expect`გამარჯობა`.not.toParse(strictSettings);
  54. });
  55. it("should parse extended Latin characters inside \\text{}", function() {
  56. expect`\text{ěščřžůřťďňőİı}`.toParse();
  57. });
  58. it("should not parse extended Latin outside \\text{} with strict", function() {
  59. expect`ěščřžůřťďňőİı`.not.toParse(strictSettings);
  60. });
  61. it("should not allow emoji in strict mode", function() {
  62. expect`✌`.not.toParse(strictSettings);
  63. expect`\text{✌}`.not.toParse(strictSettings);
  64. const settings = new Settings({
  65. strict: (errorCode) =>
  66. (errorCode === "unknownSymbol" ? "error" : "ignore"),
  67. });
  68. expect`✌`.not.toParse(settings);
  69. expect`\text{✌}`.not.toParse(settings);
  70. });
  71. it("should allow emoji outside strict mode", function() {
  72. expect`✌`.toWarn();
  73. expect`\text{✌}`.toWarn();
  74. const settings = new Settings({
  75. strict: (errorCode) =>
  76. (errorCode === "unknownSymbol" ? "ignore" : "error"),
  77. });
  78. expect`✌`.toParse(settings);
  79. expect`\text{✌}`.toParse(settings);
  80. });
  81. });
  82. describe("unicodeScripts", () => {
  83. const scriptRegExps = {
  84. latin: /[\u0100-\u024f\u0300-\u036f]/,
  85. cyrillic: /[\u0400-\u04ff]/,
  86. brahmic: /[\u0900-\u109F]/,
  87. georgian: /[\u10a0-\u10ff]/,
  88. cjk: /[\u3000-\u30FF\u4E00-\u9FAF\uFF00-\uFF60]/,
  89. hangul: /[\uAC00-\uD7AF]/,
  90. };
  91. const scriptNames = Object.keys(scriptRegExps);
  92. const allRegExp = new RegExp(
  93. scriptNames.map(script => scriptRegExps[script].source).join('|')
  94. );
  95. it("supportedCodepoint() should return the correct values", () => {
  96. for (let codepoint = 0; codepoint <= 0xffff; codepoint++) {
  97. expect(supportedCodepoint(codepoint)).toBe(
  98. allRegExp.test(String.fromCharCode(codepoint))
  99. );
  100. }
  101. });
  102. it("scriptFromCodepoint() should return correct values", () => {
  103. outer: for (let codepoint = 0; codepoint <= 0xffff; codepoint++) {
  104. const character = String.fromCharCode(codepoint);
  105. const script = scriptFromCodepoint(codepoint);
  106. for (const scriptName of scriptNames) {
  107. if (scriptRegExps[scriptName].test(character)) {
  108. expect(script).toEqual(scriptName);
  109. continue outer;
  110. }
  111. }
  112. expect(script).toBe(null);
  113. expect(supportedCodepoint(codepoint)).toBe(false);
  114. }
  115. });
  116. });