wcwidth.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601
  1. #!/usr/bin/env python
  2. # License: GPL v3 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
  3. import os
  4. import re
  5. import subprocess
  6. import sys
  7. from collections import defaultdict
  8. from contextlib import contextmanager
  9. from functools import lru_cache, partial
  10. from html.entities import html5
  11. from itertools import groupby
  12. from operator import itemgetter
  13. from typing import (
  14. Callable,
  15. DefaultDict,
  16. Dict,
  17. FrozenSet,
  18. Generator,
  19. Iterable,
  20. List,
  21. Optional,
  22. Set,
  23. Tuple,
  24. Union,
  25. )
  26. from urllib.request import urlopen
  27. if __name__ == '__main__' and not __package__:
  28. import __main__
  29. __main__.__package__ = 'gen'
  30. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  31. non_characters = frozenset(range(0xfffe, 0x10ffff, 0x10000))
  32. non_characters |= frozenset(range(0xffff, 0x10ffff + 1, 0x10000))
  33. non_characters |= frozenset(range(0xfdd0, 0xfdf0))
  34. if len(non_characters) != 66:
  35. raise SystemExit('non_characters table incorrect')
  36. emoji_skin_tone_modifiers = frozenset(range(0x1f3fb, 0x1F3FF + 1))
  37. def get_data(fname: str, folder: str = 'UCD') -> Iterable[str]:
  38. url = f'https://www.unicode.org/Public/{folder}/latest/{fname}'
  39. bn = os.path.basename(url)
  40. local = os.path.join('/tmp', bn)
  41. if os.path.exists(local):
  42. with open(local, 'rb') as f:
  43. data = f.read()
  44. else:
  45. data = urlopen(url).read()
  46. with open(local, 'wb') as f:
  47. f.write(data)
  48. for line in data.decode('utf-8').splitlines():
  49. line = line.strip()
  50. if line and not line.startswith('#'):
  51. yield line
  52. @lru_cache(maxsize=2)
  53. def unicode_version() -> Tuple[int, int, int]:
  54. for line in get_data("ReadMe.txt"):
  55. m = re.search(r'Version\s+(\d+)\.(\d+)\.(\d+)', line)
  56. if m is not None:
  57. return int(m.group(1)), int(m.group(2)), int(m.group(3))
  58. raise ValueError('Could not find Unicode Version')
  59. # Map of class names to set of codepoints in class
  60. class_maps: Dict[str, Set[int]] = {}
  61. all_symbols: Set[int] = set()
  62. name_map: Dict[int, str] = {}
  63. word_search_map: DefaultDict[str, Set[int]] = defaultdict(set)
  64. soft_hyphen = 0xad
  65. flag_codepoints = frozenset(range(0x1F1E6, 0x1F1E6 + 26))
  66. # See https://github.com/harfbuzz/harfbuzz/issues/169
  67. marks = set(emoji_skin_tone_modifiers) | flag_codepoints
  68. not_assigned = set(range(0, sys.maxunicode))
  69. property_maps: Dict[str, Set[int]] = defaultdict(set)
  70. def parse_prop_list() -> None:
  71. global marks
  72. for line in get_data('ucd/PropList.txt'):
  73. if line.startswith('#'):
  74. continue
  75. cp_or_range, rest = line.split(';', 1)
  76. chars = parse_range_spec(cp_or_range.strip())
  77. name = rest.strip().split()[0]
  78. property_maps[name] |= chars
  79. # see https://www.unicode.org/faq/unsup_char.html#3
  80. marks |= property_maps['Other_Default_Ignorable_Code_Point']
  81. def parse_ucd() -> None:
  82. def add_word(w: str, c: int) -> None:
  83. if c <= 32 or c == 127 or 128 <= c <= 159:
  84. return
  85. if len(w) > 1:
  86. word_search_map[w.lower()].add(c)
  87. first: Optional[int] = None
  88. for word, c in html5.items():
  89. if len(c) == 1:
  90. add_word(word.rstrip(';'), ord(c))
  91. word_search_map['nnbsp'].add(0x202f)
  92. for line in get_data('ucd/UnicodeData.txt'):
  93. parts = [x.strip() for x in line.split(';')]
  94. codepoint = int(parts[0], 16)
  95. name = parts[1] or parts[10]
  96. if name == '<control>':
  97. name = parts[10]
  98. if name:
  99. name_map[codepoint] = name
  100. for word in name.lower().split():
  101. add_word(word, codepoint)
  102. category = parts[2]
  103. s = class_maps.setdefault(category, set())
  104. desc = parts[1]
  105. codepoints: Union[Tuple[int, ...], Iterable[int]] = (codepoint,)
  106. if first is None:
  107. if desc.endswith(', First>'):
  108. first = codepoint
  109. continue
  110. else:
  111. codepoints = range(first, codepoint + 1)
  112. first = None
  113. for codepoint in codepoints:
  114. s.add(codepoint)
  115. not_assigned.discard(codepoint)
  116. if category.startswith('M'):
  117. marks.add(codepoint)
  118. elif category.startswith('S'):
  119. all_symbols.add(codepoint)
  120. elif category == 'Cf':
  121. # we add Cf to marks as it contains things like tags and zero
  122. # width chars. Not sure if *all* of Cf should be treated as
  123. # combining chars, might need to add individual exceptions in
  124. # the future.
  125. marks.add(codepoint)
  126. with open('gen/nerd-fonts-glyphs.txt') as f:
  127. for line in f:
  128. line = line.strip()
  129. if not line or line.startswith('#'):
  130. continue
  131. code, category, name = line.split(' ', 2)
  132. codepoint = int(code, 16)
  133. if name and codepoint not in name_map:
  134. name_map[codepoint] = name.upper()
  135. for word in name.lower().split():
  136. add_word(word, codepoint)
  137. # Some common synonyms
  138. word_search_map['bee'] |= word_search_map['honeybee']
  139. word_search_map['lambda'] |= word_search_map['lamda']
  140. word_search_map['lamda'] |= word_search_map['lambda']
  141. word_search_map['diamond'] |= word_search_map['gem']
  142. def parse_range_spec(spec: str) -> Set[int]:
  143. spec = spec.strip()
  144. if '..' in spec:
  145. chars_ = tuple(map(lambda x: int(x, 16), filter(None, spec.split('.'))))
  146. chars = set(range(chars_[0], chars_[1] + 1))
  147. else:
  148. chars = {int(spec, 16)}
  149. return chars
  150. def split_two(line: str) -> Tuple[Set[int], str]:
  151. spec, rest = line.split(';', 1)
  152. spec, rest = spec.strip(), rest.strip().split(' ', 1)[0].strip()
  153. return parse_range_spec(spec), rest
  154. all_emoji: Set[int] = set()
  155. emoji_presentation_bases: Set[int] = set()
  156. narrow_emoji: Set[int] = set()
  157. wide_emoji: Set[int] = set()
  158. flags: Dict[int, List[int]] = {}
  159. def parse_basic_emoji(spec: str) -> None:
  160. parts = list(filter(None, spec.split()))
  161. has_emoji_presentation = len(parts) < 2
  162. chars = parse_range_spec(parts[0])
  163. all_emoji.update(chars)
  164. emoji_presentation_bases.update(chars)
  165. (wide_emoji if has_emoji_presentation else narrow_emoji).update(chars)
  166. def parse_keycap_sequence(spec: str) -> None:
  167. base, fe0f, cc = list(filter(None, spec.split()))
  168. chars = parse_range_spec(base)
  169. all_emoji.update(chars)
  170. emoji_presentation_bases.update(chars)
  171. narrow_emoji.update(chars)
  172. def parse_flag_emoji_sequence(spec: str) -> None:
  173. a, b = list(filter(None, spec.split()))
  174. left, right = int(a, 16), int(b, 16)
  175. chars = {left, right}
  176. all_emoji.update(chars)
  177. wide_emoji.update(chars)
  178. emoji_presentation_bases.update(chars)
  179. flags.setdefault(left, []).append(right)
  180. def parse_emoji_tag_sequence(spec: str) -> None:
  181. a = int(spec.split()[0], 16)
  182. all_emoji.add(a)
  183. wide_emoji.add(a)
  184. emoji_presentation_bases.add(a)
  185. def parse_emoji_modifier_sequence(spec: str) -> None:
  186. a, b = list(filter(None, spec.split()))
  187. char, mod = int(a, 16), int(b, 16)
  188. mod
  189. all_emoji.add(char)
  190. wide_emoji.add(char)
  191. emoji_presentation_bases.add(char)
  192. def parse_emoji() -> None:
  193. for line in get_data('emoji-sequences.txt', 'emoji'):
  194. parts = [x.strip() for x in line.split(';')]
  195. if len(parts) < 2:
  196. continue
  197. data, etype = parts[:2]
  198. if etype == 'Basic_Emoji':
  199. parse_basic_emoji(data)
  200. elif etype == 'Emoji_Keycap_Sequence':
  201. parse_keycap_sequence(data)
  202. elif etype == 'RGI_Emoji_Flag_Sequence':
  203. parse_flag_emoji_sequence(data)
  204. elif etype == 'RGI_Emoji_Tag_Sequence':
  205. parse_emoji_tag_sequence(data)
  206. elif etype == 'RGI_Emoji_Modifier_Sequence':
  207. parse_emoji_modifier_sequence(data)
  208. doublewidth: Set[int] = set()
  209. ambiguous: Set[int] = set()
  210. def parse_eaw() -> None:
  211. global doublewidth, ambiguous
  212. seen: Set[int] = set()
  213. for line in get_data('ucd/EastAsianWidth.txt'):
  214. chars, eaw = split_two(line)
  215. if eaw == 'A':
  216. ambiguous |= chars
  217. seen |= chars
  218. elif eaw in ('W', 'F'):
  219. doublewidth |= chars
  220. seen |= chars
  221. doublewidth |= set(range(0x3400, 0x4DBF + 1)) - seen
  222. doublewidth |= set(range(0x4E00, 0x9FFF + 1)) - seen
  223. doublewidth |= set(range(0xF900, 0xFAFF + 1)) - seen
  224. doublewidth |= set(range(0x20000, 0x2FFFD + 1)) - seen
  225. doublewidth |= set(range(0x30000, 0x3FFFD + 1)) - seen
  226. def get_ranges(items: List[int]) -> Generator[Union[int, Tuple[int, int]], None, None]:
  227. items.sort()
  228. for k, g in groupby(enumerate(items), lambda m: m[0]-m[1]):
  229. group = tuple(map(itemgetter(1), g))
  230. a, b = group[0], group[-1]
  231. if a == b:
  232. yield a
  233. else:
  234. yield a, b
  235. def write_case(spec: Union[Tuple[int, ...], int], p: Callable[..., None], for_go: bool = False) -> None:
  236. if isinstance(spec, tuple):
  237. if for_go:
  238. v = ', '.join(f'0x{x:x}' for x in range(spec[0], spec[1] + 1))
  239. p(f'\t\tcase {v}:')
  240. else:
  241. p('\t\tcase 0x{:x} ... 0x{:x}:'.format(*spec))
  242. else:
  243. p(f'\t\tcase 0x{spec:x}:')
  244. @contextmanager
  245. def create_header(path: str, include_data_types: bool = True) -> Generator[Callable[..., None], None, None]:
  246. with open(path, 'w') as f:
  247. p = partial(print, file=f)
  248. p('// Unicode data, built from the Unicode Standard', '.'.join(map(str, unicode_version())))
  249. p(f'// Code generated by {os.path.basename(__file__)}, DO NOT EDIT.', end='\n\n')
  250. if path.endswith('.h'):
  251. p('#pragma once')
  252. if include_data_types:
  253. p('#include "data-types.h"\n')
  254. p('START_ALLOW_CASE_RANGE')
  255. p()
  256. yield p
  257. p()
  258. if include_data_types:
  259. p('END_ALLOW_CASE_RANGE')
  260. def gen_emoji() -> None:
  261. with create_header('kitty/emoji.h') as p:
  262. p('static inline bool\nis_emoji(char_type code) {')
  263. p('\tswitch(code) {')
  264. for spec in get_ranges(list(all_emoji)):
  265. write_case(spec, p)
  266. p('\t\t\treturn true;')
  267. p('\t\tdefault: return false;')
  268. p('\t}')
  269. p('\treturn false;\n}')
  270. p('static inline bool\nis_symbol(char_type code) {')
  271. p('\tswitch(code) {')
  272. for spec in get_ranges(list(all_symbols)):
  273. write_case(spec, p)
  274. p('\t\t\treturn true;')
  275. p('\t\tdefault: return false;')
  276. p('\t}')
  277. p('\treturn false;\n}')
  278. def category_test(
  279. name: str,
  280. p: Callable[..., None],
  281. classes: Iterable[str],
  282. comment: str,
  283. use_static: bool = False,
  284. extra_chars: Union[FrozenSet[int], Set[int]] = frozenset(),
  285. exclude: Union[Set[int], FrozenSet[int]] = frozenset(),
  286. least_check_return: Optional[str] = None,
  287. ascii_range: Optional[str] = None
  288. ) -> None:
  289. static = 'static inline ' if use_static else ''
  290. chars: Set[int] = set()
  291. for c in classes:
  292. chars |= class_maps[c]
  293. chars |= extra_chars
  294. chars -= exclude
  295. p(f'{static}bool\n{name}(char_type code) {{')
  296. p(f'\t// {comment} ({len(chars)} codepoints)' + ' {{' '{')
  297. if least_check_return is not None:
  298. least = min(chars)
  299. p(f'\tif (LIKELY(code < {least})) return {least_check_return};')
  300. if ascii_range is not None:
  301. p(f'\tif (LIKELY(0x20 <= code && code <= 0x7e)) return {ascii_range};')
  302. p('\tswitch(code) {')
  303. for spec in get_ranges(list(chars)):
  304. write_case(spec, p)
  305. p('\t\t\treturn true;')
  306. p('\t} // }}}\n')
  307. p('\treturn false;\n}\n')
  308. def codepoint_to_mark_map(p: Callable[..., None], mark_map: List[int]) -> Dict[int, int]:
  309. p('\tswitch(c) { // {{{')
  310. rmap = {c: m for m, c in enumerate(mark_map)}
  311. for spec in get_ranges(mark_map):
  312. if isinstance(spec, tuple):
  313. s = rmap[spec[0]]
  314. cases = ' '.join(f'case {i}:' for i in range(spec[0], spec[1]+1))
  315. p(f'\t\t{cases} return {s} + c - {spec[0]};')
  316. else:
  317. p(f'\t\tcase {spec}: return {rmap[spec]};')
  318. p('default: return 0;')
  319. p('\t} // }}}')
  320. return rmap
  321. def classes_to_regex(classes: Iterable[str], exclude: str = '', for_go: bool = True) -> Iterable[str]:
  322. chars: Set[int] = set()
  323. for c in classes:
  324. chars |= class_maps[c]
  325. for x in map(ord, exclude):
  326. chars.discard(x)
  327. if for_go:
  328. def as_string(codepoint: int) -> str:
  329. if codepoint < 256:
  330. return fr'\x{codepoint:02x}'
  331. return fr'\x{{{codepoint:x}}}'
  332. else:
  333. def as_string(codepoint: int) -> str:
  334. if codepoint < 256:
  335. return fr'\x{codepoint:02x}'
  336. if codepoint <= 0xffff:
  337. return fr'\u{codepoint:04x}'
  338. return fr'\U{codepoint:08x}'
  339. for spec in get_ranges(list(chars)):
  340. if isinstance(spec, tuple):
  341. yield '{}-{}'.format(*map(as_string, (spec[0], spec[1])))
  342. else:
  343. yield as_string(spec)
  344. def gen_ucd() -> None:
  345. cz = {c for c in class_maps if c[0] in 'CZ'}
  346. with create_header('kitty/unicode-data.c') as p:
  347. p('#include "unicode-data.h"')
  348. p('START_ALLOW_CASE_RANGE')
  349. category_test(
  350. 'is_combining_char', p,
  351. (),
  352. 'Combining and default ignored characters',
  353. extra_chars=marks,
  354. least_check_return='false'
  355. )
  356. category_test(
  357. 'is_ignored_char', p, 'Cc Cs'.split(),
  358. 'Control characters and non-characters',
  359. extra_chars=non_characters,
  360. ascii_range='false'
  361. )
  362. category_test(
  363. 'is_non_rendered_char', p, 'Cc Cs Cf'.split(),
  364. 'Other_Default_Ignorable_Code_Point and soft hyphen',
  365. extra_chars=property_maps['Other_Default_Ignorable_Code_Point'] | set(range(0xfe00, 0xfe0f + 1)),
  366. ascii_range='false'
  367. )
  368. category_test('is_word_char', p, {c for c in class_maps if c[0] in 'LN'}, 'L and N categories')
  369. category_test('is_CZ_category', p, cz, 'C and Z categories')
  370. category_test('is_P_category', p, {c for c in class_maps if c[0] == 'P'}, 'P category (punctuation)')
  371. mark_map = [0] + list(sorted(marks))
  372. p('char_type codepoint_for_mark(combining_type m) {')
  373. p(f'\tstatic char_type map[{len(mark_map)}] =', '{', ', '.join(map(str, mark_map)), '}; // {{{ mapping }}}')
  374. p('\tif (m < arraysz(map)) return map[m];')
  375. p('\treturn 0;')
  376. p('}\n')
  377. p('combining_type mark_for_codepoint(char_type c) {')
  378. rmap = codepoint_to_mark_map(p, mark_map)
  379. p('}\n')
  380. with open('kitty/unicode-data.h', 'r+') as f:
  381. raw = f.read()
  382. f.seek(0)
  383. raw, num = re.subn(
  384. r'^// START_KNOWN_MARKS.+?^// END_KNOWN_MARKS',
  385. '// START_KNOWN_MARKS\nstatic const combining_type '
  386. f'VS15 = {rmap[0xfe0e]}, VS16 = {rmap[0xfe0f]};'
  387. '\n// END_KNOWN_MARKS', raw, flags=re.MULTILINE | re.DOTALL)
  388. if not num:
  389. raise SystemExit('Faile dto patch mark definitions in unicode-data.h')
  390. f.truncate()
  391. f.write(raw)
  392. def gen_names() -> None:
  393. aliases_map: Dict[int, Set[str]] = {}
  394. for word, codepoints in word_search_map.items():
  395. for cp in codepoints:
  396. aliases_map.setdefault(cp, set()).add(word)
  397. if len(name_map) > 0xffff:
  398. raise Exception('Too many named codepoints')
  399. with open('tools/unicode_names/names.txt', 'w') as f:
  400. print(len(name_map), len(word_search_map), file=f)
  401. for cp in sorted(name_map):
  402. name = name_map[cp]
  403. words = name.lower().split()
  404. aliases = aliases_map.get(cp, set()) - set(words)
  405. end = '\n'
  406. if aliases:
  407. end = '\t' + ' '.join(sorted(aliases)) + end
  408. print(cp, *words, end=end, file=f)
  409. def gen_wcwidth() -> None:
  410. seen: Set[int] = set()
  411. non_printing = class_maps['Cc'] | class_maps['Cf'] | class_maps['Cs']
  412. def add(p: Callable[..., None], comment: str, chars_: Union[Set[int], FrozenSet[int]], ret: int, for_go: bool = False) -> None:
  413. chars = chars_ - seen
  414. seen.update(chars)
  415. p(f'\t\t// {comment} ({len(chars)} codepoints)' + ' {{' '{')
  416. for spec in get_ranges(list(chars)):
  417. write_case(spec, p, for_go)
  418. p(f'\t\t\treturn {ret};')
  419. p('\t\t// }}}\n')
  420. def add_all(p: Callable[..., None], for_go: bool = False) -> None:
  421. seen.clear()
  422. add(p, 'Flags', flag_codepoints, 2, for_go)
  423. add(p, 'Marks', marks | {0}, 0, for_go)
  424. add(p, 'Non-printing characters', non_printing, -1, for_go)
  425. add(p, 'Private use', class_maps['Co'], -3, for_go)
  426. add(p, 'Text Presentation', narrow_emoji, 1, for_go)
  427. add(p, 'East Asian ambiguous width', ambiguous, -2, for_go)
  428. add(p, 'East Asian double width', doublewidth, 2, for_go)
  429. add(p, 'Emoji Presentation', wide_emoji, 2, for_go)
  430. add(p, 'Not assigned in the unicode character database', not_assigned, -4, for_go)
  431. p('\t\tdefault:\n\t\t\treturn 1;')
  432. p('\t}')
  433. if for_go:
  434. p('\t}')
  435. else:
  436. p('\treturn 1;\n}')
  437. with create_header('kitty/wcwidth-std.h') as p, open('tools/wcswidth/std.go', 'w') as gof:
  438. gop = partial(print, file=gof)
  439. gop('package wcswidth\n\n')
  440. gop('func Runewidth(code rune) int {')
  441. p('static inline int\nwcwidth_std(int32_t code) {')
  442. p('\tif (LIKELY(0x20 <= code && code <= 0x7e)) { return 1; }')
  443. p('\tswitch(code) {')
  444. gop('\tswitch(code) {')
  445. add_all(p)
  446. add_all(gop, True)
  447. p('static inline bool\nis_emoji_presentation_base(uint32_t code) {')
  448. gop('func IsEmojiPresentationBase(code rune) bool {')
  449. p('\tswitch(code) {')
  450. gop('\tswitch(code) {')
  451. for spec in get_ranges(list(emoji_presentation_bases)):
  452. write_case(spec, p)
  453. write_case(spec, gop, for_go=True)
  454. p('\t\t\treturn true;')
  455. gop('\t\t\treturn true;')
  456. p('\t\tdefault: return false;')
  457. p('\t}')
  458. gop('\t\tdefault:\n\t\t\treturn false')
  459. gop('\t}')
  460. p('\treturn true;\n}')
  461. gop('\n}')
  462. uv = unicode_version()
  463. p(f'#define UNICODE_MAJOR_VERSION {uv[0]}')
  464. p(f'#define UNICODE_MINOR_VERSION {uv[1]}')
  465. p(f'#define UNICODE_PATCH_VERSION {uv[2]}')
  466. gop('var UnicodeDatabaseVersion [3]int = [3]int{' f'{uv[0]}, {uv[1]}, {uv[2]}' + '}')
  467. subprocess.check_call(['gofmt', '-w', '-s', gof.name])
  468. def gen_rowcolumn_diacritics() -> None:
  469. # codes of all row/column diacritics
  470. codes = []
  471. with open("gen/rowcolumn-diacritics.txt") as file:
  472. for line in file.readlines():
  473. if line.startswith('#'):
  474. continue
  475. code = int(line.split(";")[0], 16)
  476. codes.append(code)
  477. go_file = 'tools/utils/images/rowcolumn_diacritics.go'
  478. with create_header('kitty/rowcolumn-diacritics.c') as p, create_header(go_file, include_data_types=False) as g:
  479. p('int diacritic_to_num(char_type code) {')
  480. p('\tswitch (code) {')
  481. g('package images')
  482. g(f'var NumberToDiacritic = [{len(codes)}]rune''{')
  483. g(', '.join(f'0x{x:x}' for x in codes) + ',')
  484. g('}')
  485. range_start_num = 1
  486. range_start = 0
  487. range_end = 0
  488. def print_range() -> None:
  489. if range_start >= range_end:
  490. return
  491. write_case((range_start, range_end), p)
  492. p('\t\treturn code - ' + hex(range_start) + ' + ' +
  493. str(range_start_num) + ';')
  494. for code in codes:
  495. if range_end == code:
  496. range_end += 1
  497. else:
  498. print_range()
  499. range_start_num += range_end - range_start
  500. range_start = code
  501. range_end = code + 1
  502. print_range()
  503. p('\t}')
  504. p('\treturn 0;')
  505. p('}')
  506. subprocess.check_call(['gofmt', '-w', '-s', go_file])
  507. def main(args: List[str]=sys.argv) -> None:
  508. parse_ucd()
  509. parse_prop_list()
  510. parse_emoji()
  511. parse_eaw()
  512. gen_ucd()
  513. gen_wcwidth()
  514. gen_emoji()
  515. gen_names()
  516. gen_rowcolumn_diacritics()
  517. if __name__ == '__main__':
  518. import runpy
  519. m = runpy.run_path(os.path.dirname(os.path.abspath(__file__)))
  520. m['main']([sys.executable, 'wcwidth'])