123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190 |
- #!/usr/bin/env python3
- """
- Simple parser for spec compliant toml files
- A simple toml parser for files that comply with the spec.
- Should only be used to parse `pyproject.toml` for `install_deps.py`.
- IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED!
- """
- from __future__ import annotations
- import datetime as dt
- import json
- import re
- WS = r'(?:[\ \t]*)'
- STRING_RE = re.compile(r'"(?:\\.|[^\\"\n])*"|\'[^\'\n]*\'')
- SINGLE_KEY_RE = re.compile(rf'{STRING_RE.pattern}|[A-Za-z0-9_-]+')
- KEY_RE = re.compile(rf'{WS}(?:{SINGLE_KEY_RE.pattern}){WS}(?:\.{WS}(?:{SINGLE_KEY_RE.pattern}){WS})*')
- EQUALS_RE = re.compile(rf'={WS}')
- WS_RE = re.compile(WS)
- _SUBTABLE = rf'(?P<subtable>^\[(?P<is_list>\[)?(?P<path>{KEY_RE.pattern})\]\]?)'
- EXPRESSION_RE = re.compile(rf'^(?:{_SUBTABLE}|{KEY_RE.pattern}=)', re.MULTILINE)
- LIST_WS_RE = re.compile(rf'{WS}((#[^\n]*)?\n{WS})*')
- LEFTOVER_VALUE_RE = re.compile(r'[^,}\]\t\n#]+')
- def parse_key(value: str):
- for match in SINGLE_KEY_RE.finditer(value):
- if match[0][0] == '"':
- yield json.loads(match[0])
- elif match[0][0] == '\'':
- yield match[0][1:-1]
- else:
- yield match[0]
- def get_target(root: dict, paths: list[str], is_list=False):
- target = root
- for index, key in enumerate(paths, 1):
- use_list = is_list and index == len(paths)
- result = target.get(key)
- if result is None:
- result = [] if use_list else {}
- target[key] = result
- if isinstance(result, dict):
- target = result
- elif use_list:
- target = {}
- result.append(target)
- else:
- target = result[-1]
- assert isinstance(target, dict)
- return target
- def parse_enclosed(data: str, index: int, end: str, ws_re: re.Pattern):
- index += 1
- if match := ws_re.match(data, index):
- index = match.end()
- while data[index] != end:
- index = yield True, index
- if match := ws_re.match(data, index):
- index = match.end()
- if data[index] == ',':
- index += 1
- if match := ws_re.match(data, index):
- index = match.end()
- assert data[index] == end
- yield False, index + 1
- def parse_value(data: str, index: int):
- if data[index] == '[':
- result = []
- indices = parse_enclosed(data, index, ']', LIST_WS_RE)
- valid, index = next(indices)
- while valid:
- index, value = parse_value(data, index)
- result.append(value)
- valid, index = indices.send(index)
- return index, result
- if data[index] == '{':
- result = {}
- indices = parse_enclosed(data, index, '}', WS_RE)
- valid, index = next(indices)
- while valid:
- valid, index = indices.send(parse_kv_pair(data, index, result))
- return index, result
- if match := STRING_RE.match(data, index):
- return match.end(), json.loads(match[0]) if match[0][0] == '"' else match[0][1:-1]
- match = LEFTOVER_VALUE_RE.match(data, index)
- assert match
- value = match[0].strip()
- for func in [
- int,
- float,
- dt.time.fromisoformat,
- dt.date.fromisoformat,
- dt.datetime.fromisoformat,
- {'true': True, 'false': False}.get,
- ]:
- try:
- value = func(value)
- break
- except Exception:
- pass
- return match.end(), value
- def parse_kv_pair(data: str, index: int, target: dict):
- match = KEY_RE.match(data, index)
- if not match:
- return None
- *keys, key = parse_key(match[0])
- match = EQUALS_RE.match(data, match.end())
- assert match
- index = match.end()
- index, value = parse_value(data, index)
- get_target(target, keys)[key] = value
- return index
- def parse_toml(data: str):
- root = {}
- target = root
- index = 0
- while True:
- match = EXPRESSION_RE.search(data, index)
- if not match:
- break
- if match.group('subtable'):
- index = match.end()
- path, is_list = match.group('path', 'is_list')
- target = get_target(root, list(parse_key(path)), bool(is_list))
- continue
- index = parse_kv_pair(data, match.start(), target)
- assert index is not None
- return root
- def main():
- import argparse
- from pathlib import Path
- parser = argparse.ArgumentParser()
- parser.add_argument('infile', type=Path, help='The TOML file to read as input')
- args = parser.parse_args()
- with args.infile.open('r', encoding='utf-8') as file:
- data = file.read()
- def default(obj):
- if isinstance(obj, (dt.date, dt.time, dt.datetime)):
- return obj.isoformat()
- print(json.dumps(parse_toml(data), default=default))
- if __name__ == '__main__':
- main()
|