123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771 |
- # Copyright 2014-2017 The Meson development team
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- # http://www.apache.org/licenses/LICENSE-2.0
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import re
- import codecs
- from .mesonlib import MesonException
- from . import mlog
- # This is the regex for the supported escape sequences of a regular string
- # literal, like 'abc\x00'
- ESCAPE_SEQUENCE_SINGLE_RE = re.compile(r'''
- ( \\U........ # 8-digit hex escapes
- | \\u.... # 4-digit hex escapes
- | \\x.. # 2-digit hex escapes
- | \\[0-7]{1,3} # Octal escapes
- | \\N\{[^}]+\} # Unicode characters by name
- | \\[\\'abfnrtv] # Single-character escapes
- )''', re.UNICODE | re.VERBOSE)
- class MesonUnicodeDecodeError(MesonException):
- def __init__(self, match):
- super().__init__("%s" % match)
- self.match = match
- def decode_match(match):
- try:
- return codecs.decode(match.group(0), 'unicode_escape')
- except UnicodeDecodeError as err:
- raise MesonUnicodeDecodeError(match.group(0))
- class ParseException(MesonException):
- def __init__(self, text, line, lineno, colno):
- # Format as error message, followed by the line with the error, followed by a caret to show the error column.
- super().__init__("%s\n%s\n%s" % (text, line, '%s^' % (' ' * colno)))
- self.lineno = lineno
- self.colno = colno
- class BlockParseException(MesonException):
- def __init__(self, text, line, lineno, colno, start_line, start_lineno, start_colno):
- # This can be formatted in two ways - one if the block start and end are on the same line, and a different way if they are on different lines.
- if lineno == start_lineno:
- # If block start and end are on the same line, it is formatted as:
- # Error message
- # Followed by the line with the error
- # Followed by a caret to show the block start
- # Followed by underscores
- # Followed by a caret to show the block end.
- super().__init__("%s\n%s\n%s" % (text, line, '%s^%s^' % (' ' * start_colno, '_' * (colno - start_colno - 1))))
- else:
- # If block start and end are on different lines, it is formatted as:
- # Error message
- # Followed by the line with the error
- # Followed by a caret to show the error column.
- # Followed by a message saying where the block started.
- # Followed by the line of the block start.
- # Followed by a caret for the block start.
- super().__init__("%s\n%s\n%s\nFor a block that started at %d,%d\n%s\n%s" % (text, line, '%s^' % (' ' * colno), start_lineno, start_colno, start_line, "%s^" % (' ' * start_colno)))
- self.lineno = lineno
- self.colno = colno
- class Token:
- def __init__(self, tid, subdir, line_start, lineno, colno, bytespan, value):
- self.tid = tid
- self.subdir = subdir
- self.line_start = line_start
- self.lineno = lineno
- self.colno = colno
- self.bytespan = bytespan
- self.value = value
- def __eq__(self, other):
- if isinstance(other, str):
- return self.tid == other
- return self.tid == other.tid
- class Lexer:
- def __init__(self, code):
- self.code = code
- self.keywords = {'true', 'false', 'if', 'else', 'elif',
- 'endif', 'and', 'or', 'not', 'foreach', 'endforeach'}
- self.token_specification = [
- # Need to be sorted longest to shortest.
- ('ignore', re.compile(r'[ \t]')),
- ('id', re.compile('[_a-zA-Z][_0-9a-zA-Z]*')),
- ('number', re.compile(r'0[bB][01]+|0[oO][0-7]+|0[xX][0-9a-fA-F]+|0|[1-9]\d*')),
- ('eol_cont', re.compile(r'\\\n')),
- ('eol', re.compile(r'\n')),
- ('multiline_string', re.compile(r"'''(.|\n)*?'''", re.M)),
- ('comment', re.compile(r'#.*')),
- ('lparen', re.compile(r'\(')),
- ('rparen', re.compile(r'\)')),
- ('lbracket', re.compile(r'\[')),
- ('rbracket', re.compile(r'\]')),
- ('lcurl', re.compile(r'\{')),
- ('rcurl', re.compile(r'\}')),
- ('dblquote', re.compile(r'"')),
- ('string', re.compile(r"'([^'\\]|(\\.))*'")),
- ('comma', re.compile(r',')),
- ('plusassign', re.compile(r'\+=')),
- ('dot', re.compile(r'\.')),
- ('plus', re.compile(r'\+')),
- ('dash', re.compile(r'-')),
- ('star', re.compile(r'\*')),
- ('percent', re.compile(r'%')),
- ('fslash', re.compile(r'/')),
- ('colon', re.compile(r':')),
- ('equal', re.compile(r'==')),
- ('nequal', re.compile(r'!=')),
- ('assign', re.compile(r'=')),
- ('le', re.compile(r'<=')),
- ('lt', re.compile(r'<')),
- ('ge', re.compile(r'>=')),
- ('gt', re.compile(r'>')),
- ('questionmark', re.compile(r'\?')),
- ]
- def getline(self, line_start):
- return self.code[line_start:self.code.find('\n', line_start)]
- def lex(self, subdir):
- line_start = 0
- lineno = 1
- loc = 0
- par_count = 0
- bracket_count = 0
- curl_count = 0
- col = 0
- while loc < len(self.code):
- matched = False
- value = None
- for (tid, reg) in self.token_specification:
- mo = reg.match(self.code, loc)
- if mo:
- curline = lineno
- curline_start = line_start
- col = mo.start() - line_start
- matched = True
- span_start = loc
- loc = mo.end()
- span_end = loc
- bytespan = (span_start, span_end)
- match_text = mo.group()
- if tid == 'ignore' or tid == 'comment':
- break
- elif tid == 'lparen':
- par_count += 1
- elif tid == 'rparen':
- par_count -= 1
- elif tid == 'lbracket':
- bracket_count += 1
- elif tid == 'rbracket':
- bracket_count -= 1
- elif tid == 'lcurl':
- curl_count += 1
- elif tid == 'rcurl':
- curl_count -= 1
- elif tid == 'dblquote':
- raise ParseException('Double quotes are not supported. Use single quotes.', self.getline(line_start), lineno, col)
- elif tid == 'string':
- # Handle here and not on the regexp to give a better error message.
- if match_text.find("\n") != -1:
- mlog.warning("""Newline character in a string detected, use ''' (three single quotes) for multiline strings instead.
- This will become a hard error in a future Meson release.""", self.getline(line_start), lineno, col)
- value = match_text[1:-1]
- try:
- value = ESCAPE_SEQUENCE_SINGLE_RE.sub(decode_match, value)
- except MesonUnicodeDecodeError as err:
- raise MesonException("Failed to parse escape sequence: '{}' in string:\n {}".format(err.match, match_text))
- elif tid == 'multiline_string':
- tid = 'string'
- value = match_text[3:-3]
- lines = match_text.split('\n')
- if len(lines) > 1:
- lineno += len(lines) - 1
- line_start = mo.end() - len(lines[-1])
- elif tid == 'number':
- value = int(match_text, base=0)
- elif tid == 'eol' or tid == 'eol_cont':
- lineno += 1
- line_start = loc
- if par_count > 0 or bracket_count > 0 or curl_count > 0:
- break
- elif tid == 'id':
- if match_text in self.keywords:
- tid = match_text
- else:
- value = match_text
- yield Token(tid, subdir, curline_start, curline, col, bytespan, value)
- break
- if not matched:
- raise ParseException('lexer', self.getline(line_start), lineno, col)
- class ElementaryNode:
- def __init__(self, token):
- self.lineno = token.lineno
- self.subdir = token.subdir
- self.colno = token.colno
- self.value = token.value
- self.bytespan = token.bytespan
- class BooleanNode(ElementaryNode):
- def __init__(self, token, value):
- super().__init__(token)
- self.value = value
- assert(isinstance(self.value, bool))
- class IdNode(ElementaryNode):
- def __init__(self, token):
- super().__init__(token)
- assert(isinstance(self.value, str))
- def __str__(self):
- return "Id node: '%s' (%d, %d)." % (self.value, self.lineno, self.colno)
- class NumberNode(ElementaryNode):
- def __init__(self, token):
- super().__init__(token)
- assert(isinstance(self.value, int))
- class StringNode(ElementaryNode):
- def __init__(self, token):
- super().__init__(token)
- assert(isinstance(self.value, str))
- def __str__(self):
- return "String node: '%s' (%d, %d)." % (self.value, self.lineno, self.colno)
- class ArrayNode:
- def __init__(self, args):
- self.subdir = args.subdir
- self.lineno = args.lineno
- self.colno = args.colno
- self.args = args
- class DictNode:
- def __init__(self, args):
- self.subdir = args.subdir
- self.lineno = args.lineno
- self.colno = args.colno
- self.args = args
- class EmptyNode:
- def __init__(self, lineno, colno):
- self.subdir = ''
- self.lineno = lineno
- self.colno = colno
- self.value = None
- class OrNode:
- def __init__(self, left, right):
- self.subdir = left.subdir
- self.lineno = left.lineno
- self.colno = left.colno
- self.left = left
- self.right = right
- class AndNode:
- def __init__(self, left, right):
- self.subdir = left.subdir
- self.lineno = left.lineno
- self.colno = left.colno
- self.left = left
- self.right = right
- class ComparisonNode:
- def __init__(self, ctype, left, right):
- self.lineno = left.lineno
- self.colno = left.colno
- self.subdir = left.subdir
- self.left = left
- self.right = right
- self.ctype = ctype
- class ArithmeticNode:
- def __init__(self, operation, left, right):
- self.subdir = left.subdir
- self.lineno = left.lineno
- self.colno = left.colno
- self.left = left
- self.right = right
- self.operation = operation
- class NotNode:
- def __init__(self, location_node, value):
- self.subdir = location_node.subdir
- self.lineno = location_node.lineno
- self.colno = location_node.colno
- self.value = value
- class CodeBlockNode:
- def __init__(self, location_node):
- self.subdir = location_node.subdir
- self.lineno = location_node.lineno
- self.colno = location_node.colno
- self.lines = []
- class IndexNode:
- def __init__(self, iobject, index):
- self.iobject = iobject
- self.index = index
- self.subdir = iobject.subdir
- self.lineno = iobject.lineno
- self.colno = iobject.colno
- class MethodNode:
- def __init__(self, subdir, lineno, colno, source_object, name, args):
- self.subdir = subdir
- self.lineno = lineno
- self.colno = colno
- self.source_object = source_object
- self.name = name
- assert(isinstance(self.name, str))
- self.args = args
- class FunctionNode:
- def __init__(self, subdir, lineno, colno, func_name, args):
- self.subdir = subdir
- self.lineno = lineno
- self.colno = colno
- self.func_name = func_name
- assert(isinstance(func_name, str))
- self.args = args
- class AssignmentNode:
- def __init__(self, lineno, colno, var_name, value):
- self.lineno = lineno
- self.colno = colno
- self.var_name = var_name
- assert(isinstance(var_name, str))
- self.value = value
- class PlusAssignmentNode:
- def __init__(self, lineno, colno, var_name, value):
- self.lineno = lineno
- self.colno = colno
- self.var_name = var_name
- assert(isinstance(var_name, str))
- self.value = value
- class ForeachClauseNode:
- def __init__(self, lineno, colno, varnames, items, block):
- self.lineno = lineno
- self.colno = colno
- self.varnames = varnames
- self.items = items
- self.block = block
- class IfClauseNode:
- def __init__(self, lineno, colno):
- self.lineno = lineno
- self.colno = colno
- self.ifs = []
- self.elseblock = EmptyNode(lineno, colno)
- class UMinusNode:
- def __init__(self, current_location, value):
- self.subdir = current_location.subdir
- self.lineno = current_location.lineno
- self.colno = current_location.colno
- self.value = value
- class IfNode:
- def __init__(self, lineno, colno, condition, block):
- self.lineno = lineno
- self.colno = colno
- self.condition = condition
- self.block = block
- class TernaryNode:
- def __init__(self, lineno, colno, condition, trueblock, falseblock):
- self.lineno = lineno
- self.colno = colno
- self.condition = condition
- self.trueblock = trueblock
- self.falseblock = falseblock
- class ArgumentNode:
- def __init__(self, token):
- self.lineno = token.lineno
- self.colno = token.colno
- self.subdir = token.subdir
- self.arguments = []
- self.commas = []
- self.kwargs = {}
- self.kwargs_dict = None
- self.order_error = False
- def prepend(self, statement):
- if self.num_kwargs() > 0:
- self.order_error = True
- if not isinstance(statement, EmptyNode):
- self.arguments = [statement] + self.arguments
- def append(self, statement):
- if self.num_kwargs() > 0:
- self.order_error = True
- if not isinstance(statement, EmptyNode):
- self.arguments += [statement]
- def set_kwarg(self, name, value):
- if name in self.kwargs:
- mlog.warning('Keyword argument "{}" defined multiple times.'.format(name), location=self)
- mlog.warning('This will be an error in future Meson releases.')
- self.kwargs[name] = value
- def num_args(self):
- return len(self.arguments)
- def num_kwargs(self):
- return len(self.kwargs)
- def incorrect_order(self):
- return self.order_error
- def __len__(self):
- return self.num_args() # Fixme
- comparison_map = {'equal': '==',
- 'nequal': '!=',
- 'lt': '<',
- 'le': '<=',
- 'gt': '>',
- 'ge': '>='
- }
- # Recursive descent parser for Meson's definition language.
- # Very basic apart from the fact that we have many precedence
- # levels so there are not enough words to describe them all.
- # Enter numbering:
- #
- # 1 assignment
- # 2 or
- # 3 and
- # 4 comparison
- # 5 arithmetic
- # 6 negation
- # 7 funcall, method call
- # 8 parentheses
- # 9 plain token
- class Parser:
- def __init__(self, code, subdir):
- self.lexer = Lexer(code)
- self.stream = self.lexer.lex(subdir)
- self.current = Token('eof', '', 0, 0, 0, (0, 0), None)
- self.getsym()
- self.in_ternary = False
- def getsym(self):
- try:
- self.current = next(self.stream)
- except StopIteration:
- self.current = Token('eof', '', self.current.line_start, self.current.lineno, self.current.colno + self.current.bytespan[1] - self.current.bytespan[0], (0, 0), None)
- def getline(self):
- return self.lexer.getline(self.current.line_start)
- def accept(self, s):
- if self.current.tid == s:
- self.getsym()
- return True
- return False
- def expect(self, s):
- if self.accept(s):
- return True
- raise ParseException('Expecting %s got %s.' % (s, self.current.tid), self.getline(), self.current.lineno, self.current.colno)
- def block_expect(self, s, block_start):
- if self.accept(s):
- return True
- raise BlockParseException('Expecting %s got %s.' % (s, self.current.tid), self.getline(), self.current.lineno, self.current.colno, self.lexer.getline(block_start.line_start), block_start.lineno, block_start.colno)
- def parse(self):
- block = self.codeblock()
- self.expect('eof')
- return block
- def statement(self):
- return self.e1()
- def e1(self):
- left = self.e2()
- if self.accept('plusassign'):
- value = self.e1()
- if not isinstance(left, IdNode):
- raise ParseException('Plusassignment target must be an id.', self.getline(), left.lineno, left.colno)
- return PlusAssignmentNode(left.lineno, left.colno, left.value, value)
- elif self.accept('assign'):
- value = self.e1()
- if not isinstance(left, IdNode):
- raise ParseException('Assignment target must be an id.',
- self.getline(), left.lineno, left.colno)
- return AssignmentNode(left.lineno, left.colno, left.value, value)
- elif self.accept('questionmark'):
- if self.in_ternary:
- raise ParseException('Nested ternary operators are not allowed.',
- self.getline(), left.lineno, left.colno)
- self.in_ternary = True
- trueblock = self.e1()
- self.expect('colon')
- falseblock = self.e1()
- self.in_ternary = False
- return TernaryNode(left.lineno, left.colno, left, trueblock, falseblock)
- return left
- def e2(self):
- left = self.e3()
- while self.accept('or'):
- if isinstance(left, EmptyNode):
- raise ParseException('Invalid or clause.',
- self.getline(), left.lineno, left.colno)
- left = OrNode(left, self.e3())
- return left
- def e3(self):
- left = self.e4()
- while self.accept('and'):
- if isinstance(left, EmptyNode):
- raise ParseException('Invalid and clause.',
- self.getline(), left.lineno, left.colno)
- left = AndNode(left, self.e4())
- return left
- def e4(self):
- left = self.e5()
- for nodename, operator_type in comparison_map.items():
- if self.accept(nodename):
- return ComparisonNode(operator_type, left, self.e5())
- return left
- def e5(self):
- return self.e5add()
- def e5add(self):
- left = self.e5sub()
- if self.accept('plus'):
- return ArithmeticNode('add', left, self.e5add())
- return left
- def e5sub(self):
- left = self.e5mod()
- if self.accept('dash'):
- return ArithmeticNode('sub', left, self.e5sub())
- return left
- def e5mod(self):
- left = self.e5mul()
- if self.accept('percent'):
- return ArithmeticNode('mod', left, self.e5mod())
- return left
- def e5mul(self):
- left = self.e5div()
- if self.accept('star'):
- return ArithmeticNode('mul', left, self.e5mul())
- return left
- def e5div(self):
- left = self.e6()
- if self.accept('fslash'):
- return ArithmeticNode('div', left, self.e5div())
- return left
- def e6(self):
- if self.accept('not'):
- return NotNode(self.current, self.e7())
- if self.accept('dash'):
- return UMinusNode(self.current, self.e7())
- return self.e7()
- def e7(self):
- left = self.e8()
- block_start = self.current
- if self.accept('lparen'):
- args = self.args()
- self.block_expect('rparen', block_start)
- if not isinstance(left, IdNode):
- raise ParseException('Function call must be applied to plain id',
- self.getline(), left.lineno, left.colno)
- left = FunctionNode(left.subdir, left.lineno, left.colno, left.value, args)
- go_again = True
- while go_again:
- go_again = False
- if self.accept('dot'):
- go_again = True
- left = self.method_call(left)
- if self.accept('lbracket'):
- go_again = True
- left = self.index_call(left)
- return left
- def e8(self):
- block_start = self.current
- if self.accept('lparen'):
- e = self.statement()
- self.block_expect('rparen', block_start)
- return e
- elif self.accept('lbracket'):
- args = self.args()
- self.block_expect('rbracket', block_start)
- return ArrayNode(args)
- elif self.accept('lcurl'):
- key_values = self.key_values()
- self.block_expect('rcurl', block_start)
- return DictNode(key_values)
- else:
- return self.e9()
- def e9(self):
- t = self.current
- if self.accept('true'):
- return BooleanNode(t, True)
- if self.accept('false'):
- return BooleanNode(t, False)
- if self.accept('id'):
- return IdNode(t)
- if self.accept('number'):
- return NumberNode(t)
- if self.accept('string'):
- return StringNode(t)
- return EmptyNode(self.current.lineno, self.current.colno)
- def key_values(self):
- s = self.statement()
- a = ArgumentNode(s)
- while not isinstance(s, EmptyNode):
- potential = self.current
- if self.accept('colon'):
- if not isinstance(s, StringNode):
- raise ParseException('Key must be a string.',
- self.getline(), s.lineno, s.colno)
- if s.value in a.kwargs:
- # + 1 to colno to point to the actual string, not the opening quote
- raise ParseException('Duplicate dictionary key: {}'.format(s.value),
- self.getline(), s.lineno, s.colno + 1)
- a.set_kwarg(s.value, self.statement())
- potential = self.current
- if not self.accept('comma'):
- return a
- a.commas.append(potential)
- else:
- raise ParseException('Only key:value pairs are valid in dict construction.',
- self.getline(), s.lineno, s.colno)
- s = self.statement()
- return a
- def args(self):
- s = self.statement()
- a = ArgumentNode(s)
- while not isinstance(s, EmptyNode):
- potential = self.current
- if self.accept('comma'):
- a.commas.append(potential)
- a.append(s)
- elif self.accept('colon'):
- if not isinstance(s, IdNode):
- raise ParseException('Dictionary key must be a plain identifier.',
- self.getline(), s.lineno, s.colno)
- a.set_kwarg(s.value, self.statement())
- potential = self.current
- if not self.accept('comma'):
- return a
- a.commas.append(potential)
- else:
- a.append(s)
- return a
- if self.accept('star'):
- dict_expr = self.statement()
- a.kwargs_dict = dict_expr
- return a
- s = self.statement()
- return a
- def method_call(self, source_object):
- methodname = self.e9()
- if not(isinstance(methodname, IdNode)):
- raise ParseException('Method name must be plain id',
- self.getline(), self.current.lineno, self.current.colno)
- self.expect('lparen')
- args = self.args()
- self.expect('rparen')
- method = MethodNode(methodname.subdir, methodname.lineno, methodname.colno, source_object, methodname.value, args)
- if self.accept('dot'):
- return self.method_call(method)
- return method
- def index_call(self, source_object):
- index_statement = self.statement()
- self.expect('rbracket')
- return IndexNode(source_object, index_statement)
- def foreachblock(self):
- t = self.current
- self.expect('id')
- varname = t
- varnames = [t]
- if self.accept('comma'):
- t = self.current
- self.expect('id')
- varnames.append(t)
- self.expect('colon')
- items = self.statement()
- block = self.codeblock()
- return ForeachClauseNode(varname.lineno, varname.colno, varnames, items, block)
- def ifblock(self):
- condition = self.statement()
- clause = IfClauseNode(condition.lineno, condition.colno)
- self.expect('eol')
- block = self.codeblock()
- clause.ifs.append(IfNode(clause.lineno, clause.colno, condition, block))
- self.elseifblock(clause)
- clause.elseblock = self.elseblock()
- return clause
- def elseifblock(self, clause):
- while self.accept('elif'):
- s = self.statement()
- self.expect('eol')
- b = self.codeblock()
- clause.ifs.append(IfNode(s.lineno, s.colno, s, b))
- def elseblock(self):
- if self.accept('else'):
- self.expect('eol')
- return self.codeblock()
- def line(self):
- block_start = self.current
- if self.current == 'eol':
- return EmptyNode(self.current.lineno, self.current.colno)
- if self.accept('if'):
- block = self.ifblock()
- self.block_expect('endif', block_start)
- return block
- if self.accept('foreach'):
- block = self.foreachblock()
- self.block_expect('endforeach', block_start)
- return block
- return self.statement()
- def codeblock(self):
- block = CodeBlockNode(self.current)
- cond = True
- while cond:
- curline = self.line()
- if not isinstance(curline, EmptyNode):
- block.lines.append(curline)
- cond = self.accept('eol')
- return block
|