mparser.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684
  1. # Copyright 2014-2017 The Meson development team
  2. # Licensed under the Apache License, Version 2.0 (the "License");
  3. # you may not use this file except in compliance with the License.
  4. # You may obtain a copy of the License at
  5. # http://www.apache.org/licenses/LICENSE-2.0
  6. # Unless required by applicable law or agreed to in writing, software
  7. # distributed under the License is distributed on an "AS IS" BASIS,
  8. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. # See the License for the specific language governing permissions and
  10. # limitations under the License.
  11. import re
  12. from .mesonlib import MesonException
  13. from . import mlog
  14. class ParseException(MesonException):
  15. def __init__(self, text, line, lineno, colno):
  16. # Format as error message, followed by the line with the error, followed by a caret to show the error column.
  17. super().__init__("%s\n%s\n%s" % (text, line, '%s^' % (' ' * colno)))
  18. self.lineno = lineno
  19. self.colno = colno
  20. class BlockParseException(MesonException):
  21. def __init__(self, text, line, lineno, colno, start_line, start_lineno, start_colno):
  22. # This can be formatted in two ways - one if the block start and end are on the same line, and a different way if they are on different lines.
  23. if lineno == start_lineno:
  24. # If block start and end are on the same line, it is formatted as:
  25. # Error message
  26. # Followed by the line with the error
  27. # Followed by a caret to show the block start
  28. # Followed by underscores
  29. # Followed by a caret to show the block end.
  30. super().__init__("%s\n%s\n%s" % (text, line, '%s^%s^' % (' ' * start_colno, '_' * (colno - start_colno - 1))))
  31. else:
  32. # If block start and end are on different lines, it is formatted as:
  33. # Error message
  34. # Followed by the line with the error
  35. # Followed by a caret to show the error column.
  36. # Followed by a message saying where the block started.
  37. # Followed by the line of the block start.
  38. # Followed by a caret for the block start.
  39. super().__init__("%s\n%s\n%s\nFor a block that started at %d,%d\n%s\n%s" % (text, line, '%s^' % (' ' * colno), start_lineno, start_colno, start_line, "%s^" % (' ' * start_colno)))
  40. self.lineno = lineno
  41. self.colno = colno
  42. class Token:
  43. def __init__(self, tid, subdir, line_start, lineno, colno, bytespan, value):
  44. self.tid = tid
  45. self.subdir = subdir
  46. self.line_start = line_start
  47. self.lineno = lineno
  48. self.colno = colno
  49. self.bytespan = bytespan
  50. self.value = value
  51. def __eq__(self, other):
  52. if isinstance(other, str):
  53. return self.tid == other
  54. return self.tid == other.tid
  55. class Lexer:
  56. def __init__(self, code):
  57. self.code = code
  58. self.keywords = {'true', 'false', 'if', 'else', 'elif',
  59. 'endif', 'and', 'or', 'not', 'foreach', 'endforeach'}
  60. self.token_specification = [
  61. # Need to be sorted longest to shortest.
  62. ('ignore', re.compile(r'[ \t]')),
  63. ('id', re.compile('[_a-zA-Z][_0-9a-zA-Z]*')),
  64. ('number', re.compile(r'\d+')),
  65. ('eol_cont', re.compile(r'\\\n')),
  66. ('eol', re.compile(r'\n')),
  67. ('multiline_string', re.compile(r"'''(.|\n)*?'''", re.M)),
  68. ('comment', re.compile(r'#.*')),
  69. ('lparen', re.compile(r'\(')),
  70. ('rparen', re.compile(r'\)')),
  71. ('lbracket', re.compile(r'\[')),
  72. ('rbracket', re.compile(r'\]')),
  73. ('dblquote', re.compile(r'"')),
  74. ('string', re.compile(r"'([^'\\]|(\\.))*'")),
  75. ('comma', re.compile(r',')),
  76. ('plusassign', re.compile(r'\+=')),
  77. ('dot', re.compile(r'\.')),
  78. ('plus', re.compile(r'\+')),
  79. ('dash', re.compile(r'-')),
  80. ('star', re.compile(r'\*')),
  81. ('percent', re.compile(r'%')),
  82. ('fslash', re.compile(r'/')),
  83. ('colon', re.compile(r':')),
  84. ('equal', re.compile(r'==')),
  85. ('nequal', re.compile(r'!=')),
  86. ('assign', re.compile(r'=')),
  87. ('le', re.compile(r'<=')),
  88. ('lt', re.compile(r'<')),
  89. ('ge', re.compile(r'>=')),
  90. ('gt', re.compile(r'>')),
  91. ('questionmark', re.compile(r'\?')),
  92. ]
  93. def getline(self, line_start):
  94. return self.code[line_start:self.code.find('\n', line_start)]
  95. def lex(self, subdir):
  96. line_start = 0
  97. lineno = 1
  98. loc = 0
  99. par_count = 0
  100. bracket_count = 0
  101. col = 0
  102. while loc < len(self.code):
  103. matched = False
  104. value = None
  105. for (tid, reg) in self.token_specification:
  106. mo = reg.match(self.code, loc)
  107. if mo:
  108. curline = lineno
  109. curline_start = line_start
  110. col = mo.start() - line_start
  111. matched = True
  112. span_start = loc
  113. loc = mo.end()
  114. span_end = loc
  115. bytespan = (span_start, span_end)
  116. match_text = mo.group()
  117. if tid == 'ignore' or tid == 'comment':
  118. break
  119. elif tid == 'lparen':
  120. par_count += 1
  121. elif tid == 'rparen':
  122. par_count -= 1
  123. elif tid == 'lbracket':
  124. bracket_count += 1
  125. elif tid == 'rbracket':
  126. bracket_count -= 1
  127. elif tid == 'dblquote':
  128. raise ParseException('Double quotes are not supported. Use single quotes.', self.getline(line_start), lineno, col)
  129. elif tid == 'string':
  130. value = match_text[1:-1]\
  131. .replace(r"\'", "'")\
  132. .replace(r" \\ ".strip(), r" \ ".strip())\
  133. .replace("\\n", "\n")
  134. elif tid == 'multiline_string':
  135. tid = 'string'
  136. value = match_text[3:-3]
  137. lines = match_text.split('\n')
  138. if len(lines) > 1:
  139. lineno += len(lines) - 1
  140. line_start = mo.end() - len(lines[-1])
  141. elif tid == 'number':
  142. value = int(match_text)
  143. elif tid == 'eol' or tid == 'eol_cont':
  144. lineno += 1
  145. line_start = loc
  146. if par_count > 0 or bracket_count > 0:
  147. break
  148. elif tid == 'id':
  149. if match_text in self.keywords:
  150. tid = match_text
  151. else:
  152. value = match_text
  153. yield Token(tid, subdir, curline_start, curline, col, bytespan, value)
  154. break
  155. if not matched:
  156. raise ParseException('lexer', self.getline(line_start), lineno, col)
  157. class ElementaryNode:
  158. def __init__(self, token):
  159. self.lineno = token.lineno
  160. self.subdir = token.subdir
  161. self.colno = token.colno
  162. self.value = token.value
  163. self.bytespan = token.bytespan
  164. class BooleanNode(ElementaryNode):
  165. def __init__(self, token, value):
  166. super().__init__(token)
  167. self.value = value
  168. assert(isinstance(self.value, bool))
  169. class IdNode(ElementaryNode):
  170. def __init__(self, token):
  171. super().__init__(token)
  172. assert(isinstance(self.value, str))
  173. def __str__(self):
  174. return "Id node: '%s' (%d, %d)." % (self.value, self.lineno, self.colno)
  175. class NumberNode(ElementaryNode):
  176. def __init__(self, token):
  177. super().__init__(token)
  178. assert(isinstance(self.value, int))
  179. class StringNode(ElementaryNode):
  180. def __init__(self, token):
  181. super().__init__(token)
  182. assert(isinstance(self.value, str))
  183. def __str__(self):
  184. return "String node: '%s' (%d, %d)." % (self.value, self.lineno, self.colno)
  185. class ArrayNode:
  186. def __init__(self, args):
  187. self.subdir = args.subdir
  188. self.lineno = args.lineno
  189. self.colno = args.colno
  190. self.args = args
  191. class EmptyNode:
  192. def __init__(self, lineno, colno):
  193. self.subdir = ''
  194. self.lineno = lineno
  195. self.colno = colno
  196. self.value = None
  197. class OrNode:
  198. def __init__(self, left, right):
  199. self.subdir = left.subdir
  200. self.lineno = left.lineno
  201. self.colno = left.colno
  202. self.left = left
  203. self.right = right
  204. class AndNode:
  205. def __init__(self, left, right):
  206. self.subdir = left.subdir
  207. self.lineno = left.lineno
  208. self.colno = left.colno
  209. self.left = left
  210. self.right = right
  211. class ComparisonNode:
  212. def __init__(self, ctype, left, right):
  213. self.lineno = left.lineno
  214. self.colno = left.colno
  215. self.subdir = left.subdir
  216. self.left = left
  217. self.right = right
  218. self.ctype = ctype
  219. class ArithmeticNode:
  220. def __init__(self, operation, left, right):
  221. self.subdir = left.subdir
  222. self.lineno = left.lineno
  223. self.colno = left.colno
  224. self.left = left
  225. self.right = right
  226. self.operation = operation
  227. class NotNode:
  228. def __init__(self, location_node, value):
  229. self.subdir = location_node.subdir
  230. self.lineno = location_node.lineno
  231. self.colno = location_node.colno
  232. self.value = value
  233. class CodeBlockNode:
  234. def __init__(self, location_node):
  235. self.subdir = location_node.subdir
  236. self.lineno = location_node.lineno
  237. self.colno = location_node.colno
  238. self.lines = []
  239. class IndexNode:
  240. def __init__(self, iobject, index):
  241. self.iobject = iobject
  242. self.index = index
  243. self.subdir = iobject.subdir
  244. self.lineno = iobject.lineno
  245. self.colno = iobject.colno
  246. class MethodNode:
  247. def __init__(self, subdir, lineno, colno, source_object, name, args):
  248. self.subdir = subdir
  249. self.lineno = lineno
  250. self.colno = colno
  251. self.source_object = source_object
  252. self.name = name
  253. assert(isinstance(self.name, str))
  254. self.args = args
  255. class FunctionNode:
  256. def __init__(self, subdir, lineno, colno, func_name, args):
  257. self.subdir = subdir
  258. self.lineno = lineno
  259. self.colno = colno
  260. self.func_name = func_name
  261. assert(isinstance(func_name, str))
  262. self.args = args
  263. class AssignmentNode:
  264. def __init__(self, lineno, colno, var_name, value):
  265. self.lineno = lineno
  266. self.colno = colno
  267. self.var_name = var_name
  268. assert(isinstance(var_name, str))
  269. self.value = value
  270. class PlusAssignmentNode:
  271. def __init__(self, lineno, colno, var_name, value):
  272. self.lineno = lineno
  273. self.colno = colno
  274. self.var_name = var_name
  275. assert(isinstance(var_name, str))
  276. self.value = value
  277. class ForeachClauseNode:
  278. def __init__(self, lineno, colno, varname, items, block):
  279. self.lineno = lineno
  280. self.colno = colno
  281. self.varname = varname
  282. self.items = items
  283. self.block = block
  284. class IfClauseNode:
  285. def __init__(self, lineno, colno):
  286. self.lineno = lineno
  287. self.colno = colno
  288. self.ifs = []
  289. self.elseblock = EmptyNode(lineno, colno)
  290. class UMinusNode:
  291. def __init__(self, current_location, value):
  292. self.subdir = current_location.subdir
  293. self.lineno = current_location.lineno
  294. self.colno = current_location.colno
  295. self.value = value
  296. class IfNode:
  297. def __init__(self, lineno, colno, condition, block):
  298. self.lineno = lineno
  299. self.colno = colno
  300. self.condition = condition
  301. self.block = block
  302. class TernaryNode:
  303. def __init__(self, lineno, colno, condition, trueblock, falseblock):
  304. self.lineno = lineno
  305. self.colno = colno
  306. self.condition = condition
  307. self.trueblock = trueblock
  308. self.falseblock = falseblock
  309. class ArgumentNode:
  310. def __init__(self, token):
  311. self.lineno = token.lineno
  312. self.colno = token.colno
  313. self.subdir = token.subdir
  314. self.arguments = []
  315. self.commas = []
  316. self.kwargs = {}
  317. self.order_error = False
  318. def prepend(self, statement):
  319. if self.num_kwargs() > 0:
  320. self.order_error = True
  321. if not isinstance(statement, EmptyNode):
  322. self.arguments = [statement] + self.arguments
  323. def append(self, statement):
  324. if self.num_kwargs() > 0:
  325. self.order_error = True
  326. if not isinstance(statement, EmptyNode):
  327. self.arguments += [statement]
  328. def set_kwarg(self, name, value):
  329. if name in self.kwargs:
  330. mlog.warning('Keyword argument "%s" defined multiple times. This will be a an error in future Meson releases.' % name)
  331. self.kwargs[name] = value
  332. def num_args(self):
  333. return len(self.arguments)
  334. def num_kwargs(self):
  335. return len(self.kwargs)
  336. def incorrect_order(self):
  337. return self.order_error
  338. def __len__(self):
  339. return self.num_args() # Fixme
  340. comparison_map = {'equal': '==',
  341. 'nequal': '!=',
  342. 'lt': '<',
  343. 'le': '<=',
  344. 'gt': '>',
  345. 'ge': '>='
  346. }
  347. # Recursive descent parser for Meson's definition language.
  348. # Very basic apart from the fact that we have many precedence
  349. # levels so there are not enough words to describe them all.
  350. # Enter numbering:
  351. #
  352. # 1 assignment
  353. # 2 or
  354. # 3 and
  355. # 4 comparison
  356. # 5 arithmetic
  357. # 6 negation
  358. # 7 funcall, method call
  359. # 8 parentheses
  360. # 9 plain token
  361. class Parser:
  362. def __init__(self, code, subdir):
  363. self.lexer = Lexer(code)
  364. self.stream = self.lexer.lex(subdir)
  365. self.current = Token('eof', '', 0, 0, 0, (0, 0), None)
  366. self.getsym()
  367. self.in_ternary = False
  368. def getsym(self):
  369. try:
  370. self.current = next(self.stream)
  371. except StopIteration:
  372. self.current = Token('eof', '', self.current.line_start, self.current.lineno, self.current.colno + self.current.bytespan[1] - self.current.bytespan[0], (0, 0), None)
  373. def getline(self):
  374. return self.lexer.getline(self.current.line_start)
  375. def accept(self, s):
  376. if self.current.tid == s:
  377. self.getsym()
  378. return True
  379. return False
  380. def expect(self, s):
  381. if self.accept(s):
  382. return True
  383. raise ParseException('Expecting %s got %s.' % (s, self.current.tid), self.getline(), self.current.lineno, self.current.colno)
  384. def block_expect(self, s, block_start):
  385. if self.accept(s):
  386. return True
  387. raise BlockParseException('Expecting %s got %s.' % (s, self.current.tid), self.getline(), self.current.lineno, self.current.colno, self.lexer.getline(block_start.line_start), block_start.lineno, block_start.colno)
  388. def parse(self):
  389. block = self.codeblock()
  390. self.expect('eof')
  391. return block
  392. def statement(self):
  393. return self.e1()
  394. def e1(self):
  395. left = self.e2()
  396. if self.accept('plusassign'):
  397. value = self.e1()
  398. if not isinstance(left, IdNode):
  399. raise ParseException('Plusassignment target must be an id.', self.getline(), left.lineno, left.colno)
  400. return PlusAssignmentNode(left.lineno, left.colno, left.value, value)
  401. elif self.accept('assign'):
  402. value = self.e1()
  403. if not isinstance(left, IdNode):
  404. raise ParseException('Assignment target must be an id.',
  405. self.getline(), left.lineno, left.colno)
  406. return AssignmentNode(left.lineno, left.colno, left.value, value)
  407. elif self.accept('questionmark'):
  408. if self.in_ternary:
  409. raise ParseException('Nested ternary operators are not allowed.',
  410. self.getline(), left.lineno, left.colno)
  411. self.in_ternary = True
  412. trueblock = self.e1()
  413. self.expect('colon')
  414. falseblock = self.e1()
  415. self.in_ternary = False
  416. return TernaryNode(left.lineno, left.colno, left, trueblock, falseblock)
  417. return left
  418. def e2(self):
  419. left = self.e3()
  420. while self.accept('or'):
  421. if isinstance(left, EmptyNode):
  422. raise ParseException('Invalid or clause.',
  423. self.getline(), left.lineno, left.colno)
  424. left = OrNode(left, self.e3())
  425. return left
  426. def e3(self):
  427. left = self.e4()
  428. while self.accept('and'):
  429. if isinstance(left, EmptyNode):
  430. raise ParseException('Invalid and clause.',
  431. self.getline(), left.lineno, left.colno)
  432. left = AndNode(left, self.e4())
  433. return left
  434. def e4(self):
  435. left = self.e5()
  436. for nodename, operator_type in comparison_map.items():
  437. if self.accept(nodename):
  438. return ComparisonNode(operator_type, left, self.e5())
  439. return left
  440. def e5(self):
  441. return self.e5add()
  442. def e5add(self):
  443. left = self.e5sub()
  444. if self.accept('plus'):
  445. return ArithmeticNode('add', left, self.e5add())
  446. return left
  447. def e5sub(self):
  448. left = self.e5mod()
  449. if self.accept('dash'):
  450. return ArithmeticNode('sub', left, self.e5sub())
  451. return left
  452. def e5mod(self):
  453. left = self.e5mul()
  454. if self.accept('percent'):
  455. return ArithmeticNode('mod', left, self.e5mod())
  456. return left
  457. def e5mul(self):
  458. left = self.e5div()
  459. if self.accept('star'):
  460. return ArithmeticNode('mul', left, self.e5mul())
  461. return left
  462. def e5div(self):
  463. left = self.e6()
  464. if self.accept('fslash'):
  465. return ArithmeticNode('div', left, self.e5div())
  466. return left
  467. def e6(self):
  468. if self.accept('not'):
  469. return NotNode(self.current, self.e7())
  470. if self.accept('dash'):
  471. return UMinusNode(self.current, self.e7())
  472. return self.e7()
  473. def e7(self):
  474. left = self.e8()
  475. block_start = self.current
  476. if self.accept('lparen'):
  477. args = self.args()
  478. self.block_expect('rparen', block_start)
  479. if not isinstance(left, IdNode):
  480. raise ParseException('Function call must be applied to plain id',
  481. self.getline(), left.lineno, left.colno)
  482. left = FunctionNode(left.subdir, left.lineno, left.colno, left.value, args)
  483. go_again = True
  484. while go_again:
  485. go_again = False
  486. if self.accept('dot'):
  487. go_again = True
  488. left = self.method_call(left)
  489. if self.accept('lbracket'):
  490. go_again = True
  491. left = self.index_call(left)
  492. return left
  493. def e8(self):
  494. block_start = self.current
  495. if self.accept('lparen'):
  496. e = self.statement()
  497. self.block_expect('rparen', block_start)
  498. return e
  499. elif self.accept('lbracket'):
  500. args = self.args()
  501. self.block_expect('rbracket', block_start)
  502. return ArrayNode(args)
  503. else:
  504. return self.e9()
  505. def e9(self):
  506. t = self.current
  507. if self.accept('true'):
  508. return BooleanNode(t, True)
  509. if self.accept('false'):
  510. return BooleanNode(t, False)
  511. if self.accept('id'):
  512. return IdNode(t)
  513. if self.accept('number'):
  514. return NumberNode(t)
  515. if self.accept('string'):
  516. return StringNode(t)
  517. return EmptyNode(self.current.lineno, self.current.colno)
  518. def args(self):
  519. s = self.statement()
  520. a = ArgumentNode(s)
  521. while not isinstance(s, EmptyNode):
  522. potential = self.current
  523. if self.accept('comma'):
  524. a.commas.append(potential)
  525. a.append(s)
  526. elif self.accept('colon'):
  527. if not isinstance(s, IdNode):
  528. raise ParseException('Keyword argument must be a plain identifier.',
  529. self.getline(), s.lineno, s.colno)
  530. a.set_kwarg(s.value, self.statement())
  531. potential = self.current
  532. if not self.accept('comma'):
  533. return a
  534. a.commas.append(potential)
  535. else:
  536. a.append(s)
  537. return a
  538. s = self.statement()
  539. return a
  540. def method_call(self, source_object):
  541. methodname = self.e9()
  542. if not(isinstance(methodname, IdNode)):
  543. raise ParseException('Method name must be plain id',
  544. self.getline(), self.current.lineno, self.current.colno)
  545. self.expect('lparen')
  546. args = self.args()
  547. self.expect('rparen')
  548. method = MethodNode(methodname.subdir, methodname.lineno, methodname.colno, source_object, methodname.value, args)
  549. if self.accept('dot'):
  550. return self.method_call(method)
  551. return method
  552. def index_call(self, source_object):
  553. index_statement = self.statement()
  554. self.expect('rbracket')
  555. return IndexNode(source_object, index_statement)
  556. def foreachblock(self):
  557. t = self.current
  558. self.expect('id')
  559. varname = t
  560. self.expect('colon')
  561. items = self.statement()
  562. block = self.codeblock()
  563. return ForeachClauseNode(varname.lineno, varname.colno, varname, items, block)
  564. def ifblock(self):
  565. condition = self.statement()
  566. clause = IfClauseNode(condition.lineno, condition.colno)
  567. self.expect('eol')
  568. block = self.codeblock()
  569. clause.ifs.append(IfNode(clause.lineno, clause.colno, condition, block))
  570. self.elseifblock(clause)
  571. clause.elseblock = self.elseblock()
  572. return clause
  573. def elseifblock(self, clause):
  574. while self.accept('elif'):
  575. s = self.statement()
  576. self.expect('eol')
  577. b = self.codeblock()
  578. clause.ifs.append(IfNode(s.lineno, s.colno, s, b))
  579. def elseblock(self):
  580. if self.accept('else'):
  581. self.expect('eol')
  582. return self.codeblock()
  583. def line(self):
  584. block_start = self.current
  585. if self.current == 'eol':
  586. return EmptyNode(self.current.lineno, self.current.colno)
  587. if self.accept('if'):
  588. block = self.ifblock()
  589. self.block_expect('endif', block_start)
  590. return block
  591. if self.accept('foreach'):
  592. block = self.foreachblock()
  593. self.block_expect('endforeach', block_start)
  594. return block
  595. return self.statement()
  596. def codeblock(self):
  597. block = CodeBlockNode(self.current)
  598. cond = True
  599. while cond:
  600. curline = self.line()
  601. if not isinstance(curline, EmptyNode):
  602. block.lines.append(curline)
  603. cond = self.accept('eol')
  604. return block