1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234 |
- #!/usr/bin/env python
- # License: GPL v3 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
- # Imports {{{
- import json
- import os
- import re
- import subprocess
- import sys
- from collections import defaultdict
- from collections.abc import Generator, Hashable, Iterable
- from contextlib import contextmanager
- from functools import lru_cache, partial
- from html.entities import html5
- from io import StringIO
- from math import ceil, log
- from typing import (
- Callable,
- DefaultDict,
- Iterator,
- Literal,
- NamedTuple,
- Optional,
- Protocol,
- Sequence,
- TypedDict,
- Union,
- )
- from urllib.request import urlopen
- if __name__ == '__main__' and not __package__:
- import __main__
- __main__.__package__ = 'gen'
- sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
- # }}}
- # Fetching data {{{
- non_characters = frozenset(range(0xfffe, 0x10ffff, 0x10000))
- non_characters |= frozenset(range(0xffff, 0x10ffff + 1, 0x10000))
- non_characters |= frozenset(range(0xfdd0, 0xfdf0))
- if len(non_characters) != 66:
- raise SystemExit('non_characters table incorrect')
- emoji_skin_tone_modifiers = frozenset(range(0x1f3fb, 0x1F3FF + 1))
- def get_data(fname: str, folder: str = 'UCD') -> Iterable[str]:
- url = f'https://www.unicode.org/Public/{folder}/latest/{fname}'
- bn = os.path.basename(url)
- local = os.path.join('/tmp', bn)
- if os.path.exists(local):
- with open(local, 'rb') as f:
- data = f.read()
- else:
- data = urlopen(url).read()
- with open(local, 'wb') as f:
- f.write(data)
- for line in data.decode('utf-8').splitlines():
- line = line.strip()
- if line and not line.startswith('#'):
- yield line
- @lru_cache(maxsize=2)
- def unicode_version() -> tuple[int, int, int]:
- for line in get_data("ReadMe.txt"):
- m = re.search(r'Version\s+(\d+)\.(\d+)\.(\d+)', line)
- if m is not None:
- return int(m.group(1)), int(m.group(2)), int(m.group(3))
- raise ValueError('Could not find Unicode Version')
- # }}}
- # Parsing Unicode databases {{{
- # Map of class names to set of codepoints in class
- class_maps: dict[str, set[int]] = {}
- all_symbols: set[int] = set()
- name_map: dict[int, str] = {}
- word_search_map: DefaultDict[str, set[int]] = defaultdict(set)
- soft_hyphen = 0xad
- flag_codepoints = frozenset(range(0x1F1E6, 0x1F1E6 + 26))
- # See https://github.com/harfbuzz/harfbuzz/issues/169
- marks = set(emoji_skin_tone_modifiers) | flag_codepoints
- not_assigned = set(range(0, sys.maxunicode))
- property_maps: dict[str, set[int]] = defaultdict(set)
- grapheme_segmentation_maps: dict[str, set[int]] = defaultdict(set)
- grapheme_break_as_int: dict[str, int] = {}
- int_as_grapheme_break: tuple[str, ...] = ()
- incb_as_int: dict[str, int] = {}
- int_as_incb: tuple[str, ...] = ()
- incb_map: dict[str, set[int]] = defaultdict(set)
- extended_pictographic: set[int] = set()
- def parse_prop_list() -> None:
- global marks
- for line in get_data('ucd/PropList.txt'):
- if line.startswith('#'):
- continue
- cp_or_range, rest = line.split(';', 1)
- chars = parse_range_spec(cp_or_range.strip())
- name = rest.strip().split()[0]
- property_maps[name] |= chars
- # see https://www.unicode.org/faq/unsup_char.html#3
- marks |= property_maps['Other_Default_Ignorable_Code_Point']
- def parse_ucd() -> None:
- def add_word(w: str, c: int) -> None:
- if c <= 32 or c == 127 or 128 <= c <= 159:
- return
- if len(w) > 1:
- word_search_map[w.lower()].add(c)
- first: Optional[int] = None
- for word, c in html5.items():
- if len(c) == 1:
- add_word(word.rstrip(';'), ord(c))
- word_search_map['nnbsp'].add(0x202f)
- for line in get_data('ucd/UnicodeData.txt'):
- parts = [x.strip() for x in line.split(';')]
- codepoint = int(parts[0], 16)
- name = parts[1] or parts[10]
- if name == '<control>':
- name = parts[10]
- if name:
- name_map[codepoint] = name
- for word in name.lower().split():
- add_word(word, codepoint)
- category = parts[2]
- s = class_maps.setdefault(category, set())
- desc = parts[1]
- codepoints: Union[tuple[int, ...], Iterable[int]] = (codepoint,)
- if first is None:
- if desc.endswith(', First>'):
- first = codepoint
- continue
- else:
- codepoints = range(first, codepoint + 1)
- first = None
- for codepoint in codepoints:
- s.add(codepoint)
- not_assigned.discard(codepoint)
- if category.startswith('M'):
- marks.add(codepoint)
- elif category.startswith('S'):
- all_symbols.add(codepoint)
- elif category == 'Cf':
- # we add Cf to marks as it contains things like tags and zero
- # width chars. Not sure if *all* of Cf should be treated as
- # combining chars, might need to add individual exceptions in
- # the future.
- marks.add(codepoint)
- with open('gen/nerd-fonts-glyphs.txt') as f:
- for line in f:
- line = line.strip()
- if not line or line.startswith('#'):
- continue
- code, category, name = line.split(' ', 2)
- codepoint = int(code, 16)
- if name and codepoint not in name_map:
- name_map[codepoint] = name.upper()
- for word in name.lower().split():
- add_word(word, codepoint)
- # Some common synonyms
- word_search_map['bee'] |= word_search_map['honeybee']
- word_search_map['lambda'] |= word_search_map['lamda']
- word_search_map['lamda'] |= word_search_map['lambda']
- word_search_map['diamond'] |= word_search_map['gem']
- def parse_range_spec(spec: str) -> set[int]:
- spec = spec.strip()
- if '..' in spec:
- chars_ = tuple(map(lambda x: int(x, 16), filter(None, spec.split('.'))))
- chars = set(range(chars_[0], chars_[1] + 1))
- else:
- chars = {int(spec, 16)}
- return chars
- def split_two(line: str) -> tuple[set[int], str]:
- spec, rest = line.split(';', 1)
- spec, rest = spec.strip(), rest.strip().split(' ', 1)[0].strip()
- return parse_range_spec(spec), rest
- all_emoji: set[int] = set()
- emoji_presentation_bases: set[int] = set()
- narrow_emoji: set[int] = set()
- wide_emoji: set[int] = set()
- flags: dict[int, list[int]] = {}
- def parse_basic_emoji(spec: str) -> None:
- parts = list(filter(None, spec.split()))
- has_emoji_presentation = len(parts) < 2
- chars = parse_range_spec(parts[0])
- all_emoji.update(chars)
- emoji_presentation_bases.update(chars)
- (wide_emoji if has_emoji_presentation else narrow_emoji).update(chars)
- def parse_keycap_sequence(spec: str) -> None:
- base, fe0f, cc = list(filter(None, spec.split()))
- chars = parse_range_spec(base)
- all_emoji.update(chars)
- emoji_presentation_bases.update(chars)
- narrow_emoji.update(chars)
- def parse_flag_emoji_sequence(spec: str) -> None:
- a, b = list(filter(None, spec.split()))
- left, right = int(a, 16), int(b, 16)
- chars = {left, right}
- all_emoji.update(chars)
- wide_emoji.update(chars)
- emoji_presentation_bases.update(chars)
- flags.setdefault(left, []).append(right)
- def parse_emoji_tag_sequence(spec: str) -> None:
- a = int(spec.split()[0], 16)
- all_emoji.add(a)
- wide_emoji.add(a)
- emoji_presentation_bases.add(a)
- def parse_emoji_modifier_sequence(spec: str) -> None:
- a, b = list(filter(None, spec.split()))
- char, mod = int(a, 16), int(b, 16)
- mod
- all_emoji.add(char)
- wide_emoji.add(char)
- emoji_presentation_bases.add(char)
- def parse_emoji() -> None:
- for line in get_data('emoji-sequences.txt', 'emoji'):
- parts = [x.strip() for x in line.split(';')]
- if len(parts) < 2:
- continue
- data, etype = parts[:2]
- if etype == 'Basic_Emoji':
- parse_basic_emoji(data)
- elif etype == 'Emoji_Keycap_Sequence':
- parse_keycap_sequence(data)
- elif etype == 'RGI_Emoji_Flag_Sequence':
- parse_flag_emoji_sequence(data)
- elif etype == 'RGI_Emoji_Tag_Sequence':
- parse_emoji_tag_sequence(data)
- elif etype == 'RGI_Emoji_Modifier_Sequence':
- parse_emoji_modifier_sequence(data)
- doublewidth: set[int] = set()
- ambiguous: set[int] = set()
- def parse_eaw() -> None:
- global doublewidth, ambiguous
- seen: set[int] = set()
- for line in get_data('ucd/EastAsianWidth.txt'):
- chars, eaw = split_two(line)
- if eaw == 'A':
- ambiguous |= chars
- seen |= chars
- elif eaw in ('W', 'F'):
- doublewidth |= chars
- seen |= chars
- doublewidth |= set(range(0x3400, 0x4DBF + 1)) - seen
- doublewidth |= set(range(0x4E00, 0x9FFF + 1)) - seen
- doublewidth |= set(range(0xF900, 0xFAFF + 1)) - seen
- doublewidth |= set(range(0x20000, 0x2FFFD + 1)) - seen
- doublewidth |= set(range(0x30000, 0x3FFFD + 1)) - seen
- def parse_grapheme_segmentation() -> None:
- global extended_pictographic, grapheme_break_as_int, incb_as_int, int_as_grapheme_break, int_as_incb
- global seg_props_from_int, seg_props_as_int
- grapheme_segmentation_maps['AtStart'] # this is used by the segmentation algorithm, no character has it
- grapheme_segmentation_maps['None'] # this is used by the segmentation algorithm, no character has it
- for line in get_data('ucd/auxiliary/GraphemeBreakProperty.txt'):
- chars, category = split_two(line)
- grapheme_segmentation_maps[category] |= chars
- grapheme_segmentation_maps['Private_Expecting_RI'] # this is used by the segmentation algorithm, no character has it
- grapheme_break_as_int = {x: i for i, x in enumerate(grapheme_segmentation_maps)}
- int_as_grapheme_break = tuple(grapheme_break_as_int)
- incb_map['None'] # used by segmentation algorithm no character has it
- for line in get_data('ucd/DerivedCoreProperties.txt'):
- spec, rest = line.split(';', 1)
- category = rest.strip().split(' ', 1)[0].strip().rstrip(';')
- chars = parse_range_spec(spec.strip())
- if category == 'InCB':
- # Most InCB chars also have a GBP categorization, but not all,
- # there exist some InCB chars that do not have a GBP category
- subcat = rest.strip().split(';')[1].strip().split()[0].strip()
- incb_map[subcat] |= chars
- incb_as_int = {x: i for i, x in enumerate(incb_map)}
- int_as_incb = tuple(incb_as_int)
- for line in get_data('ucd/emoji/emoji-data.txt'):
- chars, category = split_two(line)
- if 'Extended_Pictographic#' == category:
- extended_pictographic |= chars
- seg_props_from_int = {'grapheme_break': int_as_grapheme_break, 'indic_conjunct_break': int_as_incb}
- seg_props_as_int = {'grapheme_break': grapheme_break_as_int, 'indic_conjunct_break': incb_as_int}
- class GraphemeSegmentationTest(TypedDict):
- data: tuple[str, ...]
- comment: str
- grapheme_segmentation_tests: list[GraphemeSegmentationTest] = []
- def parse_test_data() -> None:
- for line in get_data('ucd/auxiliary/GraphemeBreakTest.txt'):
- t, comment = line.split('#')
- t = t.lstrip('÷').strip().rstrip('÷').strip()
- chars: list[list[str]] = [[]]
- for x in re.split(r'([÷×])', t):
- x = x.strip()
- match x:
- case '÷':
- chars.append([])
- case '×':
- pass
- case '':
- pass
- case _:
- ch = chr(int(x, 16))
- chars[-1].append(ch)
- c = tuple(''.join(c) for c in chars)
- grapheme_segmentation_tests.append({'data': c, 'comment': comment.strip()})
- grapheme_segmentation_tests.append({
- 'data': (' ', '\xad', ' '),
- 'comment': '÷ [0.2] SPACE (Other) ÷ [0.4] SOFT HYPHEN ÷ [999.0] SPACE (Other) ÷ [0.3]'
- })
- grapheme_segmentation_tests.append({
- 'data': ('\U0001f468\u200d\U0001f469\u200d\U0001f467\u200d\U0001f466',),
- 'comment': '÷ [0.2] MAN × [9.0] ZERO WIDTH JOINER × [11.0] WOMAN × [9.0] ZERO WIDTH JOINER × [11.0] GIRL × [9.0] ZERO WIDTH JOINER × [11.0] BOY ÷ [0.3]'
- })
- # }}}
- def write_case(spec: Union[tuple[int, ...], int], p: Callable[..., None], for_go: bool = False) -> None:
- if isinstance(spec, tuple):
- if for_go:
- v = ', '.join(f'0x{x:x}' for x in range(spec[0], spec[1] + 1))
- p(f'\t\tcase {v}:')
- else:
- p('\t\tcase 0x{:x} ... 0x{:x}:'.format(*spec))
- else:
- p(f'\t\tcase 0x{spec:x}:')
- @contextmanager
- def create_header(path: str, include_data_types: bool = True) -> Generator[Callable[..., None], None, None]:
- with open(path, 'w') as f:
- p = partial(print, file=f)
- p('// Unicode data, built from the Unicode Standard', '.'.join(map(str, unicode_version())))
- p(f'// Code generated by {os.path.basename(__file__)}, DO NOT EDIT.', end='\n\n')
- if path.endswith('.h'):
- p('#pragma once')
- if include_data_types:
- p('#include "data-types.h"\n')
- p('START_ALLOW_CASE_RANGE')
- p()
- yield p
- p()
- if include_data_types:
- p('END_ALLOW_CASE_RANGE')
- def gen_names() -> None:
- aliases_map: dict[int, set[str]] = {}
- for word, codepoints in word_search_map.items():
- for cp in codepoints:
- aliases_map.setdefault(cp, set()).add(word)
- if len(name_map) > 0xffff:
- raise Exception('Too many named codepoints')
- with open('tools/unicode_names/names.txt', 'w') as f:
- print(len(name_map), len(word_search_map), file=f)
- for cp in sorted(name_map):
- name = name_map[cp]
- words = name.lower().split()
- aliases = aliases_map.get(cp, set()) - set(words)
- end = '\n'
- if aliases:
- end = '\t' + ' '.join(sorted(aliases)) + end
- print(cp, *words, end=end, file=f)
- def gofmt(*files: str) -> None:
- subprocess.check_call(['gofmt', '-w', '-s'] + list(files))
- def gen_rowcolumn_diacritics() -> None:
- # codes of all row/column diacritics
- codes = []
- with open("gen/rowcolumn-diacritics.txt") as file:
- for line in file.readlines():
- if line.startswith('#'):
- continue
- code = int(line.split(";")[0], 16)
- codes.append(code)
- go_file = 'tools/utils/images/rowcolumn_diacritics.go'
- with create_header('kitty/rowcolumn-diacritics.c') as p, create_header(go_file, include_data_types=False) as g:
- p('int diacritic_to_num(char_type code) {')
- p('\tswitch (code) {')
- g('package images')
- g(f'var NumberToDiacritic = [{len(codes)}]rune''{')
- g(', '.join(f'0x{x:x}' for x in codes) + ',')
- g('}')
- range_start_num = 1
- range_start = 0
- range_end = 0
- def print_range() -> None:
- if range_start >= range_end:
- return
- write_case((range_start, range_end), p)
- p('\t\treturn code - ' + hex(range_start) + ' + ' +
- str(range_start_num) + ';')
- for code in codes:
- if range_end == code:
- range_end += 1
- else:
- print_range()
- range_start_num += range_end - range_start
- range_start = code
- range_end = code + 1
- print_range()
- p('\t}')
- p('\treturn 0;')
- p('}')
- gofmt(go_file)
- def gen_test_data() -> None:
- with open('kitty_tests/GraphemeBreakTest.json', 'wb') as f:
- f.write(json.dumps(grapheme_segmentation_tests, indent=2, ensure_ascii=False).encode())
- def getsize(data: Iterable[int]) -> Literal[1, 2, 4]:
- # return smallest possible integer size for the given array
- maxdata = max(data)
- if maxdata < 256:
- return 1
- if maxdata < 65536:
- return 2
- return 4
- def mask_for(bits: int) -> int:
- return ~((~0) << bits)
- def splitbins[T: Hashable](t: tuple[T, ...], property_size: int, use_fixed_shift: int = 0) -> tuple[list[int], list[int], list[T], int]:
- if use_fixed_shift:
- candidates = range(use_fixed_shift, use_fixed_shift + 1)
- else:
- n = len(t)-1 # last valid index
- maxshift = 0 # the most we can shift n and still have something left
- if n > 0:
- while n >> 1:
- n >>= 1
- maxshift += 1
- candidates = range(maxshift + 1)
- t3: list[T] = []
- tmap: dict[T, int] = {}
- seen = set()
- for x in t:
- if x not in seen:
- seen.add(x)
- tmap[x] = len(t3)
- t3.append(x)
- t_int = tuple(tmap[x] for x in t)
- bytesz = sys.maxsize
- def memsize() -> int:
- ans = len(t1)*getsize(t1)
- sz3 = len(t3)*property_size + len(t2)*getsize(t2)
- sz2 = len(t2) * property_size
- return ans + min(sz2, sz3)
- for shift in candidates:
- t1: list[int] = []
- t2: list[int] = []
- size = 2**shift
- bincache: dict[tuple[int, ...], int] = {}
- for i in range(0, len(t_int), size):
- bin = t_int[i:i+size]
- index = bincache.get(bin)
- if index is None:
- index = len(t2)
- bincache[bin] = index
- t2.extend(bin)
- t1.append(index >> shift)
- # determine memory size
- b = memsize()
- if b < bytesz:
- best = t1, t2, shift
- bytesz = b
- t1, t2, shift = best
- return t1, t2, t3, shift
- class Property(Protocol):
- @property
- def as_c(self) -> str:
- return ''
- @property
- def as_go(self) -> str:
- return ''
- @classmethod
- def bitsize(cls) -> int:
- return 0
- def get_types(sz: int) -> tuple[str, str]:
- sz *= 8
- return f'uint{sz}_t', f'uint{sz}'
- def gen_multistage_table(
- c: Callable[..., None], g: Callable[..., None], t1: Sequence[int], t2: Sequence[int], t3: Sequence[Property], shift: int, input_max_val: int
- ) -> None:
- t1_type_sz = getsize(t1)
- ctype_t1, gotype_t1 = get_types(t1_type_sz)
- mask = mask_for(shift)
- name = t3[0].__class__.__name__
- t2_type_sz = getsize(tuple(range(len(t3))))
- ctype_t2, gotype_t2 = get_types(t2_type_sz)
- t3_type_sz = t3[0].bitsize() // 8
- lname = name.lower()
- input_type = get_types(getsize((input_max_val,)))[1]
- # Output t1
- c(f'static const char_type {name}_mask = {mask}u;')
- c(f'static const char_type {name}_shift = {shift}u;')
- c(f'static const {ctype_t1} {name}_t1[{len(t1)}] = ''{')
- c(f'\t{", ".join(map(str, t1))}')
- c('};')
- g(f'const {lname}_mask = {mask}')
- g(f'const {lname}_shift = {shift}')
- g(f'var {lname}_t1 = [{len(t1)}]{gotype_t1}''{')
- g(f'\t{", ".join(map(str, t1))},')
- g('}')
- bytesz = len(t1) * t1_type_sz
- if t3_type_sz > t2_type_sz: # needs 3 levels
- bytesz += len(t2) * t2_type_sz + len(t3) * t3_type_sz
- c(f'static const {ctype_t2} {name}_t2[{len(t2)}] = ''{')
- c(f'\t{", ".join(map(str, t2))}')
- c('};')
- items = '\n\t'.join(x.as_c + f', // {i}' for i, x in enumerate(t3))
- c(f'static const {name} {name}_t3[{len(t3)}] = ''{')
- c(f'\t{items}')
- c('};')
- g(f'var {lname}_t2 = [{len(t2)}]{gotype_t2}''{')
- g(f'\t{", ".join(map(str, t2))},')
- g('}')
- items = '\n\t'.join(x.as_go + f', // {i}' for i, x in enumerate(t3))
- g(f'var {lname}_t3 = [{len(t3)}]{name}''{')
- g(f'\t{items}')
- g('}')
- g(f'''
- // Array accessor function that avoids bounds checking
- func {lname}_for(x {input_type}) {name} {{
- t1 := uintptr(*(*{gotype_t1})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t1[0])) + uintptr(x>>{lname}_shift)*{t1_type_sz})))
- t1_shifted := (t1 << {lname}_shift) + (uintptr(x) & {lname}_mask)
- t2 := uintptr(*(*{gotype_t2})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t2[0])) + t1_shifted*{t2_type_sz})))
- return *(*{name})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t3[0])) + t2*{t3_type_sz}))
- }}
- ''')
- else:
- t3 = tuple(t3[i] for i in t2)
- bytesz += len(t3) * t3_type_sz
- items = '\n\t'.join(x.as_c + ',' for x in t3)
- c(f'static const {name} {name}_t2[{len(t3)}] = ''{')
- c(f'\t{items}')
- c('};')
- items = '\n\t'.join(x.as_go + ',' for x in t3)
- g(f'var {lname}_t2 = [{len(t3)}]{name}''{')
- g(f'\t{items}')
- g('}')
- g(f'''
- // Array accessor function that avoids bounds checking
- func {lname}_for(x {input_type}) {name} {{
- t1 := uintptr(*(*{gotype_t1})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t1[0])) + uintptr(x>>{lname}_shift)*{t1_type_sz})))
- t1_shifted := (t1 << {lname}_shift) + (uintptr(x) & {lname}_mask)
- return *(*{name})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t2[0])) + t1_shifted*{t3_type_sz}))
- }}
- ''')
- print(f'Size of {name} table: {ceil(bytesz/1024)}KB with {shift} bit shift')
- width_shift = 4
- def bitsize(maxval: int) -> int: # number of bits needed to store maxval
- return ceil(log(maxval, 2))
- def clamped_bitsize(val: int) -> int:
- if val <= 8:
- return 8
- if val <= 16:
- return 16
- if val <= 32:
- return 32
- if val <= 64:
- return 64
- raise ValueError('Too many fields')
- def bitfield_from_int(
- fields: dict[str, int], x: int, int_to_str: dict[str, tuple[str, ...]]
- ) -> dict[str, str | bool]:
- # first field is least significant, last field is most significant
- args: dict[str, str | bool] = {}
- for f, shift in fields.items():
- mask = mask_for(shift)
- val = x & mask
- if shift == 1:
- args[f] = bool(val)
- else:
- args[f] = int_to_str[f][val]
- x >>= shift
- return args
- def bitfield_as_int(
- fields: dict[str, int], vals: Sequence[bool | str], str_maps: dict[str, dict[str, int]]
- ) -> int:
- # first field is least significant, last field is most significant
- ans = shift = 0
- for i, (f, width) in enumerate(fields.items()):
- qval = vals[i]
- if isinstance(qval, str):
- val = str_maps[f][qval]
- else:
- val = int(qval)
- ans |= val << shift
- shift += width
- return ans
- seg_props_from_int: dict[str, tuple[str, ...]] = {}
- seg_props_as_int: dict[str, dict[str, int]] = {}
- class GraphemeSegmentationProps(NamedTuple):
- grapheme_break: str = '' # set at runtime
- indic_conjunct_break: str = '' # set at runtime
- is_extended_pictographic: bool = True
- @classmethod
- def used_bits(cls) -> int:
- return sum(int(cls._field_defaults[f]) for f in cls._fields)
- @classmethod
- def bitsize(cls) -> int:
- return clamped_bitsize(cls.used_bits())
- @classmethod
- def fields(cls) -> dict[str, int]:
- return {f: int(cls._field_defaults[f]) for f in cls._fields}
- @classmethod
- def from_int(cls, x: int) -> 'GraphemeSegmentationProps':
- args = bitfield_from_int(cls.fields(), x, seg_props_from_int)
- return cls(**args) # type: ignore
- def __int__(self) -> int:
- return bitfield_as_int(self.fields(), self, seg_props_as_int)
- control_grapheme_breaks = 'CR', 'LF', 'Control'
- linker_or_extend = 'Linker', 'Extend'
- def bitfield_declaration_as_c(name: str, fields: dict[str, int], *alternate_fields: dict[str, int]) -> str:
- base_size = clamped_bitsize(sum(fields.values()))
- base_type = f'uint{base_size}_t'
- ans = [f'// {name}Declaration: uses {sum(fields.values())} bits {{''{{', f'typedef union {name} {{']
- def struct(fields: dict[str, int]) -> Iterator[str]:
- if not fields:
- return
- empty = base_size - sum(fields.values())
- yield ' struct __attribute__((packed)) {'
- yield '#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__'
- for f, width in reversed(fields.items()):
- yield f' uint{clamped_bitsize(width)}_t {f} : {width};'
- if empty:
- yield f' uint{clamped_bitsize(empty)}_t : {empty};'
- yield '#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__'
- if empty:
- yield f' uint{clamped_bitsize(empty)}_t : {empty};'
- for f, width in fields.items():
- yield f' uint{clamped_bitsize(width)}_t {f} : {width};'
- yield '#else'
- yield '#error "Unsupported endianness"'
- yield '#endif'
- yield ' };'
- ans.extend(struct(fields))
- for fields in alternate_fields:
- ans.extend(struct(fields))
- ans.append(f' {base_type} val;')
- ans.append(f'}} {name};')
- ans.append(f'static_assert(sizeof({name}) == sizeof({base_type}), "Fix the ordering of {name}");')
- ans.append(f'// End{name}Declaration }}''}}')
- return '\n'.join(ans)
- class GraphemeSegmentationState(NamedTuple):
- grapheme_break: str = '' # set at runtime
- # True if the last character ends a sequence of Indic_Conjunct_Break values: consonant {extend|linker}*
- incb_consonant_extended: bool = True
- # True if the last character ends a sequence of Indic_Conjunct_Break values: consonant {extend|linker}* linker
- incb_consonant_extended_linker: bool = True
- # True if the last character ends a sequence of Indic_Conjunct_Break values: consonant {extend|linker}* linker {extend|linker}*
- incb_consonant_extended_linker_extended: bool = True
- # True if the last character ends an emoji modifier sequence \p{Extended_Pictographic} Extend*
- emoji_modifier_sequence: bool = True
- # True if the last character was immediately preceded by an emoji modifier sequence \p{Extended_Pictographic} Extend*
- emoji_modifier_sequence_before_last_char: bool = True
- @classmethod
- def make(cls) -> 'GraphemeSegmentationState':
- return GraphemeSegmentationState('AtStart', False, False, False, False, False)
- @classmethod
- def fields(cls) -> dict[str, int]:
- return {f: int(cls._field_defaults[f]) for f in cls._fields}
- @classmethod
- def from_int(cls, x: int) -> 'GraphemeSegmentationState':
- args = bitfield_from_int(cls.fields(), x, {'grapheme_break': int_as_grapheme_break})
- return cls(**args) # type: ignore
- def __int__(self) -> int:
- return bitfield_as_int(self.fields(), self, seg_props_as_int)
- @classmethod
- def c_declaration(cls) -> str:
- return bitfield_declaration_as_c(cls.__name__, cls.fields())
- @classmethod
- def used_bits(cls) -> int:
- return sum(int(cls._field_defaults[f]) for f in cls._fields)
- @classmethod
- def bitsize(cls) -> int:
- return clamped_bitsize(cls.used_bits())
- def add_to_current_cell(self, p: GraphemeSegmentationProps) -> 'GraphemeSegmentationResult':
- prev = self.grapheme_break
- prop = p.grapheme_break
- incb = p.indic_conjunct_break
- add_to_cell = False
- if self.grapheme_break == 'AtStart':
- add_to_cell = True
- if prop == 'Regional_Indicator':
- prop = 'Private_Expecting_RI'
- else:
- # No break between CR and LF (GB3).
- if prev == 'CR' and prop == 'LF':
- add_to_cell = True
- # Break before and after controls (GB4, GB5).
- elif prev in control_grapheme_breaks or prop in control_grapheme_breaks:
- pass
- # No break between Hangul syllable sequences (GB6, GB7, GB8).
- elif (
- (prev == 'L' and prop in ('L', 'V', 'LV', 'LVT')) or
- (prev in ('LV', 'V') and prop in ('V', 'T')) or
- (prev in ('LVT', 'T') and prop == 'T')
- ):
- add_to_cell = True
- # No break before: extending characters or ZWJ (GB9), SpacingMarks (GB9a), Prepend characters (GB9b).
- elif prop in ('Extend', 'ZWJ', 'SpacingMark') or prev == 'Prepend':
- add_to_cell = True
- # No break within certain combinations of Indic_Conjunct_Break values
- # Between consonant {extend|linker}* linker {extend|linker}* and consonant (GB9c).
- elif self.incb_consonant_extended_linker_extended and incb == 'Consonant':
- add_to_cell = True
- # No break within emoji modifier sequences or emoji zwj sequences (GB11).
- elif prev == 'ZWJ' and self.emoji_modifier_sequence_before_last_char and p.is_extended_pictographic:
- add_to_cell = True
- # No break between RI if there is an odd number of RI characters before (GB12, GB13).
- elif prop == 'Regional_Indicator':
- if prev == 'Private_Expecting_RI':
- add_to_cell = True
- else:
- prop = 'Private_Expecting_RI'
- # Break everywhere else GB999
- incb_consonant_extended_linker = self.incb_consonant_extended and incb == 'Linker'
- incb_consonant_extended_linker_extended = incb_consonant_extended_linker or (
- self.incb_consonant_extended_linker_extended and incb in linker_or_extend)
- incb_consonant_extended = incb == 'Consonant' or (
- self.incb_consonant_extended and incb in linker_or_extend)
- emoji_modifier_sequence_before_last_char = self.emoji_modifier_sequence
- emoji_modifier_sequence = (self.emoji_modifier_sequence and prop == 'Extend') or p.is_extended_pictographic
- return GraphemeSegmentationResult(GraphemeSegmentationState(
- grapheme_break=prop, incb_consonant_extended=incb_consonant_extended,
- incb_consonant_extended_linker=incb_consonant_extended_linker,
- incb_consonant_extended_linker_extended=incb_consonant_extended_linker_extended,
- emoji_modifier_sequence=emoji_modifier_sequence, emoji_modifier_sequence_before_last_char=emoji_modifier_sequence_before_last_char
- ), add_to_cell)
- def split_into_graphemes(props: Sequence[GraphemeSegmentationProps], text: str) -> Iterator[str]:
- s = GraphemeSegmentationState.make()
- pos = 0
- for i, ch in enumerate(text):
- p = props[ord(ch)]
- s, add_to_cell = s.add_to_current_cell(p)
- if not add_to_cell:
- yield text[pos:i]
- pos = i
- if pos < len(text):
- yield text[pos:]
- def split_into_graphemes_with_table(
- props: Sequence['GraphemeSegmentationProps'], table: Sequence['GraphemeSegmentationResult'], text: str,
- ) -> Iterator[str]:
- s = GraphemeSegmentationResult.make()
- pos = 0
- for i, ch in enumerate(text):
- k = int(GraphemeSegmentationKey(s.new_state, props[ord(ch)]))
- s = table[k]
- if not s.add_to_current_cell:
- yield text[pos:i]
- pos = i
- if pos < len(text):
- yield text[pos:]
- def test_grapheme_segmentation(split_into_graphemes: Callable[[str], Iterator[str]]) -> None:
- for test in grapheme_segmentation_tests:
- expected = test['data']
- actual = tuple(split_into_graphemes(''.join(test['data'])))
- if expected != actual:
- def as_codepoints(text: str) -> str:
- return ' '.join(hex(ord(x))[2:] for x in text)
- qe = tuple(map(as_codepoints, expected))
- qa = tuple(map(as_codepoints, actual))
- raise SystemExit(f'Failed to split graphemes for: {test["comment"]}\n{expected!r} {qe} != {actual!r} {qa}')
- class GraphemeSegmentationKey(NamedTuple):
- state: GraphemeSegmentationState
- char: GraphemeSegmentationProps
- @classmethod
- def from_int(cls, x: int) -> 'GraphemeSegmentationKey':
- shift = GraphemeSegmentationProps.used_bits()
- mask = mask_for(shift)
- state = GraphemeSegmentationState.from_int(x >> shift)
- char = GraphemeSegmentationProps.from_int(x & mask)
- return GraphemeSegmentationKey(state, char)
- def __int__(self) -> int:
- shift = GraphemeSegmentationProps.used_bits()
- return int(self.state) << shift | int(self.char)
- def result(self) -> 'GraphemeSegmentationResult':
- return self.state.add_to_current_cell(self.char)
- @classmethod
- def code_to_convert_to_int(cls, for_go: bool = False) -> str:
- lines: list[str] = []
- a = lines.append
- shift = GraphemeSegmentationProps.used_bits()
- if for_go:
- base_type = f'uint{GraphemeSegmentationState.bitsize()}'
- a(f'func grapheme_segmentation_key(r GraphemeSegmentationResult, ch CharProps) ({base_type}) ''{')
- a(f'\treturn (r.State() << {shift}) | ch.GraphemeSegmentationProperty()')
- a('}')
- else:
- base_type = f'uint{GraphemeSegmentationState.bitsize()}_t'
- a(f'static inline {base_type} {cls.__name__}(GraphemeSegmentationResult r, CharProps ch)' '{')
- a(f'\treturn (r.state << {shift}) | ch.grapheme_segmentation_property;')
- a('}')
- return '\n'.join(lines)
- class GraphemeSegmentationResult(NamedTuple):
- new_state: GraphemeSegmentationState = GraphemeSegmentationState()
- add_to_current_cell: bool = True
- @classmethod
- def used_bits(cls) -> int:
- return sum(int(GraphemeSegmentationState._field_defaults[f]) for f in GraphemeSegmentationState._fields) + 1
- @classmethod
- def bitsize(cls) -> int:
- return clamped_bitsize(cls.used_bits())
- @classmethod
- def make(cls) -> 'GraphemeSegmentationResult':
- return GraphemeSegmentationResult(GraphemeSegmentationState.make(), False)
- @classmethod
- def go_fields(cls) -> Sequence[str]:
- ans = []
- ans.append('add_to_current_cell 1')
- for f, width in reversed(GraphemeSegmentationState.fields().items()):
- ans.append(f'{f} {width}')
- return tuple(ans)
- @property
- def as_go(self) -> str:
- shift = 0
- parts = []
- for f in reversed(GraphemeSegmentationResult.go_fields()):
- f, _, w = f.partition(' ')
- bits = int(w)
- if f != 'add_to_current_cell':
- x = getattr(self.new_state, f)
- if f == 'grapheme_break':
- x = f'GraphemeSegmentationResult(GBP_{x})'
- else:
- x = int(x)
- else:
- x = int(self.add_to_current_cell)
- mask = '0b' + '1' * bits
- parts.append(f'(({x} & {mask}) << {shift})')
- shift += bits
- return ' | '.join(parts)
- @classmethod
- def go_extra(cls) -> str:
- bits = GraphemeSegmentationState.used_bits()
- base_type = f'uint{GraphemeSegmentationState.bitsize()}'
- return f'''
- func (r GraphemeSegmentationResult) State() (ans {base_type}) {{
- return {base_type}(r) & {mask_for(bits)}
- }}
- '''
- @property
- def as_c(self) -> str:
- parts = []
- for f in GraphemeSegmentationState._fields:
- x = getattr(self.new_state, f)
- match f:
- case 'grapheme_break':
- x = f'GBP_{x}'
- case _:
- x = int(x)
- parts.append(f'.{f}={x}')
- parts.append(f'.add_to_current_cell={int(self.add_to_current_cell)}')
- return '{' + ', '.join(parts) + '}'
- @classmethod
- def c_declaration(cls) -> str:
- fields = {'add_to_current_cell': 1}
- sfields = GraphemeSegmentationState.fields()
- fields.update(sfields)
- bits = sum(sfields.values())
- # dont know if the alternate state access works in big endian
- return bitfield_declaration_as_c('GraphemeSegmentationResult', fields, {'state': bits})
- class CharProps(NamedTuple):
- width: int = 3
- is_emoji: bool = True
- category: str = '' # set at runtime
- is_emoji_presentation_base: bool = True
- # derived properties for fast lookup
- is_invalid: bool = True
- is_non_rendered: bool = True
- is_symbol: bool = True
- is_combining_char: bool = True
- is_word_char: bool = True
- is_punctuation: bool = True
- # needed for grapheme segmentation set as LSB bits for easy conversion to GraphemeSegmentationProps
- grapheme_break: str = '' # set at runtime
- indic_conjunct_break: str = '' # set at runtime
- is_extended_pictographic: bool = True
- @classmethod
- def bitsize(cls) -> int:
- ans = sum(int(cls._field_defaults[f]) for f in cls._fields)
- return clamped_bitsize(ans)
- @classmethod
- def go_fields(cls) -> Sequence[str]:
- ans = []
- for f in cls._fields:
- bits = int(cls._field_defaults[f])
- if f == 'width':
- f = 'shifted_width'
- ans.append(f'{f} {bits}')
- return tuple(ans)
- @property
- def as_go(self) -> str:
- shift = 0
- parts = []
- for f in reversed(self.go_fields()):
- f, _, w = f.partition(' ')
- if f == 'shifted_width':
- f = 'width'
- x = getattr(self, f)
- match f:
- case 'width':
- x += width_shift
- case 'grapheme_break':
- x = f'CharProps(GBP_{x})'
- case 'indic_conjunct_break':
- x = f'CharProps(ICB_{x})'
- case 'category':
- x = f'CharProps(UC_{x})'
- case _:
- x = int(x)
- bits = int(w)
- mask = '0b' + '1' * bits
- parts.append(f'(({x} & {mask}) << {shift})')
- shift += bits
- return ' | '.join(parts)
- @classmethod
- def go_extra(cls) -> str:
- base_type = f'uint{GraphemeSegmentationState.bitsize()}'
- f = GraphemeSegmentationProps.fields()
- s = f['grapheme_break'] + f['indic_conjunct_break']
- return f'''
- func (s CharProps) Width() int {{
- return int(s.Shifted_width()) - {width_shift}
- }}
- func (s CharProps) GraphemeSegmentationProperty() {base_type} {{
- return {base_type}(s.Grapheme_break() | (s.Indic_conjunct_break() << {f["grapheme_break"]}) | (s.Is_extended_pictographic() << {s}))
- }}
- '''
- @property
- def as_c(self) -> str:
- parts = []
- for f in self._fields:
- x = getattr(self, f)
- match f:
- case 'width':
- x += width_shift
- f = 'shifted_width'
- case 'grapheme_break':
- x = f'GBP_{x}'
- case 'indic_conjunct_break':
- x = f'ICB_{x}'
- case 'category':
- x = f'UC_{x}'
- case _:
- x = int(x)
- parts.append(f'.{f}={x}')
- return '{' + ', '.join(parts) + '}'
- @classmethod
- def fields(cls) -> dict[str, int]:
- return {'shifted_width' if f == 'width' else f: int(cls._field_defaults[f]) for f in cls._fields}
- @classmethod
- def c_declaration(cls) -> str:
- # Dont know if grapheme_segmentation_property in alternate works on big endian
- alternate = {
- 'grapheme_segmentation_property': sum(int(cls._field_defaults[f]) for f in GraphemeSegmentationProps._fields)
- }
- return bitfield_declaration_as_c(cls.__name__, cls.fields(), alternate)
- def generate_enum(p: Callable[..., None], gp: Callable[..., None], name: str, *items: str, prefix: str = '') -> None:
- p(f'typedef enum {name} {{') # }}
- gp(f'type {name} uint8\n')
- gp('const (') # )
- for i, x in enumerate(items):
- x = prefix + x
- p(f'\t{x},')
- if i == 0:
- gp(f'{x} {name} = iota')
- else:
- gp(x)
- p(f'}} {name};')
- gp(')')
- p('')
- gp('')
- def category_set(predicate: Callable[[str], bool]) -> set[int]:
- ans = set()
- for c, chs in class_maps.items():
- if predicate(c):
- ans |= chs
- return ans
- def top_level_category(q: str) -> set[int]:
- return category_set(lambda x: x[0] in q)
- def patch_declaration(name: str, decl: str, raw: str) -> str:
- begin = f'// {name}Declaration'
- end = f'// End{name}Declaration }}''}}'
- return re.sub(rf'{begin}.+?{end}', decl.rstrip(), raw, flags=re.DOTALL)
- def gen_char_props() -> None:
- CharProps._field_defaults['grapheme_break'] = str(bitsize(len(grapheme_segmentation_maps)))
- CharProps._field_defaults['indic_conjunct_break'] = str(bitsize(len(incb_map)))
- CharProps._field_defaults['category'] = str(bitsize(len(class_maps) + 1))
- GraphemeSegmentationProps._field_defaults['grapheme_break'] = CharProps._field_defaults['grapheme_break']
- GraphemeSegmentationProps._field_defaults['indic_conjunct_break'] = CharProps._field_defaults['indic_conjunct_break']
- GraphemeSegmentationState._field_defaults['grapheme_break'] = GraphemeSegmentationProps._field_defaults['grapheme_break']
- invalid = class_maps['Cc'] | class_maps['Cs'] | non_characters
- non_printing = invalid | class_maps['Cf']
- non_rendered = non_printing | property_maps['Other_Default_Ignorable_Code_Point'] | set(range(0xfe00, 0xfe0f + 1))
- is_word_char = top_level_category('LN')
- is_punctuation = top_level_category('P')
- width_map: dict[int, int] = {}
- cat_map: dict[int, str] = {}
- for cat, chs in class_maps.items():
- for ch in chs:
- cat_map[ch] = cat
- def aw(s: Iterable[int], width: int) -> None:
- nonlocal width_map
- d = dict.fromkeys(s, width)
- d.update(width_map)
- width_map = d
- aw(flag_codepoints, 2)
- aw(doublewidth, 2)
- aw(wide_emoji, 2)
- aw(marks | {0}, 0)
- aw(non_printing, -1)
- aw(ambiguous, -2)
- aw(class_maps['Co'], -3) # Private use
- aw(not_assigned, -4)
- gs_map: dict[int, str] = {}
- icb_map: dict[int, str] = {}
- for name, cps in grapheme_segmentation_maps.items():
- gs_map.update(dict.fromkeys(cps, name))
- for name, cps in incb_map.items():
- icb_map.update(dict.fromkeys(cps, name))
- prop_array = tuple(
- CharProps(
- width=width_map.get(ch, 1), grapheme_break=gs_map.get(ch, 'None'), indic_conjunct_break=icb_map.get(ch, 'None'),
- is_invalid=ch in invalid, is_non_rendered=ch in non_rendered, is_emoji=ch in all_emoji, is_symbol=ch in all_symbols,
- is_extended_pictographic=ch in extended_pictographic, is_emoji_presentation_base=ch in emoji_presentation_bases,
- is_combining_char=ch in marks, category=cat_map.get(ch, 'Cn'), is_word_char=ch in is_word_char,
- is_punctuation=ch in is_punctuation,
- ) for ch in range(sys.maxunicode + 1))
- gsprops = tuple(GraphemeSegmentationProps(
- grapheme_break=x.grapheme_break, indic_conjunct_break=x.indic_conjunct_break,
- is_extended_pictographic=x.is_extended_pictographic) for x in prop_array)
- test_grapheme_segmentation(partial(split_into_graphemes, gsprops))
- gseg_results = tuple(GraphemeSegmentationKey.from_int(i).result() for i in range(1 << 16))
- test_grapheme_segmentation(partial(split_into_graphemes_with_table, gsprops, gseg_results))
- t1, t2, t3, t_shift = splitbins(prop_array, CharProps.bitsize() // 8)
- g1, g2, g3, g_shift = splitbins(gseg_results, GraphemeSegmentationResult.bitsize() // 8)
- from .bitfields import make_bitfield
- buf = StringIO()
- cen = partial(print, file=buf)
- with create_header('kitty/char-props-data.h', include_data_types=False) as c, open('tools/wcswidth/char-props-data.go', 'w') as gof:
- gp = partial(print, file=gof)
- gp('package wcswidth')
- gp('import "unsafe"')
- gp(f'const MAX_UNICODE = {sys.maxunicode}')
- gp(f'const UNICODE_LIMIT = {sys.maxunicode + 1}')
- cen('// UCBDeclaration {{''{')
- cen(f'#define MAX_UNICODE ({sys.maxunicode}u)')
- generate_enum(cen, gp, 'GraphemeBreakProperty', *grapheme_segmentation_maps, prefix='GBP_')
- generate_enum(c, gp, 'IndicConjunctBreak', *incb_map, prefix='ICB_')
- generate_enum(cen, gp, 'UnicodeCategory', 'Cn', *class_maps, prefix='UC_')
- cen('// EndUCBDeclaration }}''}')
- gp(make_bitfield('tools/wcswidth', 'CharProps', *CharProps.go_fields(), add_package=False)[1])
- gp(make_bitfield('tools/wcswidth', 'GraphemeSegmentationResult', *GraphemeSegmentationResult.go_fields(), add_package=False)[1])
- gp(CharProps.go_extra())
- gp(GraphemeSegmentationResult.go_extra())
- gen_multistage_table(c, gp, t1, t2, t3, t_shift, len(prop_array)-1)
- gen_multistage_table(c, gp, g1, g2, g3, g_shift, len(gseg_results)-1)
- c(GraphemeSegmentationKey.code_to_convert_to_int())
- c(GraphemeSegmentationState.c_declaration())
- gp(GraphemeSegmentationKey.code_to_convert_to_int(for_go=True))
- gofmt(gof.name)
- with open('kitty/char-props.h', 'r+') as f:
- raw = f.read()
- nraw = re.sub(r'\d+/\*=width_shift\*/', f'{width_shift}/*=width_shift*/', raw)
- nraw = patch_declaration('CharProps', CharProps.c_declaration(), nraw)
- nraw = patch_declaration('GraphemeSegmentationResult', GraphemeSegmentationResult.c_declaration(), nraw)
- nraw = patch_declaration('UCB', buf.getvalue(), nraw)
- if nraw != raw:
- f.seek(0)
- f.truncate()
- f.write(nraw)
- def main(args: list[str]=sys.argv) -> None:
- parse_ucd()
- parse_prop_list()
- parse_emoji()
- parse_eaw()
- parse_grapheme_segmentation()
- parse_test_data()
- gen_names()
- gen_rowcolumn_diacritics()
- gen_test_data()
- gen_char_props()
- if __name__ == '__main__':
- import runpy
- m = runpy.run_path(os.path.dirname(os.path.abspath(__file__)))
- m['main']([sys.executable, 'wcwidth'])
|