tomlparse.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. #!/usr/bin/env python3
  2. """
  3. Simple parser for spec compliant toml files
  4. A simple toml parser for files that comply with the spec.
  5. Should only be used to parse `pyproject.toml` for `install_deps.py`.
  6. IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED!
  7. """
  8. from __future__ import annotations
  9. import datetime as dt
  10. import json
  11. import re
  12. WS = r'(?:[\ \t]*)'
  13. STRING_RE = re.compile(r'"(?:\\.|[^\\"\n])*"|\'[^\'\n]*\'')
  14. SINGLE_KEY_RE = re.compile(rf'{STRING_RE.pattern}|[A-Za-z0-9_-]+')
  15. KEY_RE = re.compile(rf'{WS}(?:{SINGLE_KEY_RE.pattern}){WS}(?:\.{WS}(?:{SINGLE_KEY_RE.pattern}){WS})*')
  16. EQUALS_RE = re.compile(rf'={WS}')
  17. WS_RE = re.compile(WS)
  18. _SUBTABLE = rf'(?P<subtable>^\[(?P<is_list>\[)?(?P<path>{KEY_RE.pattern})\]\]?)'
  19. EXPRESSION_RE = re.compile(rf'^(?:{_SUBTABLE}|{KEY_RE.pattern}=)', re.MULTILINE)
  20. LIST_WS_RE = re.compile(rf'{WS}((#[^\n]*)?\n{WS})*')
  21. LEFTOVER_VALUE_RE = re.compile(r'[^,}\]\t\n#]+')
  22. def parse_key(value: str):
  23. for match in SINGLE_KEY_RE.finditer(value):
  24. if match[0][0] == '"':
  25. yield json.loads(match[0])
  26. elif match[0][0] == '\'':
  27. yield match[0][1:-1]
  28. else:
  29. yield match[0]
  30. def get_target(root: dict, paths: list[str], is_list=False):
  31. target = root
  32. for index, key in enumerate(paths, 1):
  33. use_list = is_list and index == len(paths)
  34. result = target.get(key)
  35. if result is None:
  36. result = [] if use_list else {}
  37. target[key] = result
  38. if isinstance(result, dict):
  39. target = result
  40. elif use_list:
  41. target = {}
  42. result.append(target)
  43. else:
  44. target = result[-1]
  45. assert isinstance(target, dict)
  46. return target
  47. def parse_enclosed(data: str, index: int, end: str, ws_re: re.Pattern):
  48. index += 1
  49. if match := ws_re.match(data, index):
  50. index = match.end()
  51. while data[index] != end:
  52. index = yield True, index
  53. if match := ws_re.match(data, index):
  54. index = match.end()
  55. if data[index] == ',':
  56. index += 1
  57. if match := ws_re.match(data, index):
  58. index = match.end()
  59. assert data[index] == end
  60. yield False, index + 1
  61. def parse_value(data: str, index: int):
  62. if data[index] == '[':
  63. result = []
  64. indices = parse_enclosed(data, index, ']', LIST_WS_RE)
  65. valid, index = next(indices)
  66. while valid:
  67. index, value = parse_value(data, index)
  68. result.append(value)
  69. valid, index = indices.send(index)
  70. return index, result
  71. if data[index] == '{':
  72. result = {}
  73. indices = parse_enclosed(data, index, '}', WS_RE)
  74. valid, index = next(indices)
  75. while valid:
  76. valid, index = indices.send(parse_kv_pair(data, index, result))
  77. return index, result
  78. if match := STRING_RE.match(data, index):
  79. return match.end(), json.loads(match[0]) if match[0][0] == '"' else match[0][1:-1]
  80. match = LEFTOVER_VALUE_RE.match(data, index)
  81. assert match
  82. value = match[0].strip()
  83. for func in [
  84. int,
  85. float,
  86. dt.time.fromisoformat,
  87. dt.date.fromisoformat,
  88. dt.datetime.fromisoformat,
  89. {'true': True, 'false': False}.get,
  90. ]:
  91. try:
  92. value = func(value)
  93. break
  94. except Exception:
  95. pass
  96. return match.end(), value
  97. def parse_kv_pair(data: str, index: int, target: dict):
  98. match = KEY_RE.match(data, index)
  99. if not match:
  100. return None
  101. *keys, key = parse_key(match[0])
  102. match = EQUALS_RE.match(data, match.end())
  103. assert match
  104. index = match.end()
  105. index, value = parse_value(data, index)
  106. get_target(target, keys)[key] = value
  107. return index
  108. def parse_toml(data: str):
  109. root = {}
  110. target = root
  111. index = 0
  112. while True:
  113. match = EXPRESSION_RE.search(data, index)
  114. if not match:
  115. break
  116. if match.group('subtable'):
  117. index = match.end()
  118. path, is_list = match.group('path', 'is_list')
  119. target = get_target(root, list(parse_key(path)), bool(is_list))
  120. continue
  121. index = parse_kv_pair(data, match.start(), target)
  122. assert index is not None
  123. return root
  124. def main():
  125. import argparse
  126. from pathlib import Path
  127. parser = argparse.ArgumentParser()
  128. parser.add_argument('infile', type=Path, help='The TOML file to read as input')
  129. args = parser.parse_args()
  130. with args.infile.open('r', encoding='utf-8') as file:
  131. data = file.read()
  132. def default(obj):
  133. if isinstance(obj, (dt.date, dt.time, dt.datetime)):
  134. return obj.isoformat()
  135. print(json.dumps(parse_toml(data), default=default))
  136. if __name__ == '__main__':
  137. main()