123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112 |
- import codecs
- def parse(text):
- stack = [[]]
- state = 'list'
- i = 0
- line = 1
- column = 0
- while i < len(text):
- c = text[i]
- if c == '\n':
- line += 1
- column = 0
- else:
- column += 1
- if state == 'list':
- if c == '(':
- stack.append([])
- elif c == ')':
- stack[-2].append(stack.pop())
- elif c == "\"":
- state = 'string'
- atom = ""
- elif c == ";":
- state = 'comment'
- elif c.isalpha():
- state = 'symbol'
- atom = c
- elif c.isdigit():
- state = 'number'
- atom = c
- elif c.isspace():
- pass
- else:
- raise Exception("%d:%d: error: unexpected character: '%s'" % (line, column, c))
- elif state == 'comment':
- if c == '\n':
- state = 'list'
- else:
- pass
- elif state == 'string':
- if c == "\\":
- i += 1
- atom += text[i]
- elif c == "\"":
- stack[-1].append(atom)
- state = 'list'
- else:
- atom += c
- elif state == 'number':
- if not c.isdigit() or c != ".":
- stack[-1].append(int(atom))
- state = 'list'
- i -= 1
- else:
- atom += c
- elif state == 'symbol':
- if c.isspace() or c == '(' or c == ')':
- stack[-1].append(atom)
- state = 'list'
- i -= 1
- else:
- atom += c
-
- i += 1
- if len(stack) == 1:
- return stack[0]
- else:
- raise Exception("error: list not closed")
- if __name__ == "__main__":
- print("parsing...")
- result = parse(r'(() ("bar" foo) ()) () bar ')
- print("1.", result)
- print("2.", parse(""";;comment
- ("Hello World" 5 1 123) ("Hello" 123 123 "foobar") ;; comment"""))
- print("3.", parse(r'(8(8)8)'))
- print("4.", parse(r''))
- print("5.", parse(r' '))
- with codecs.open("white.stf", encoding='utf-8') as fin:
- print("6.", parse(fin.read()))
|