123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292 |
- // Copyright 2017 The go-ethereum Authors
- // This file is part of the go-ethereum library.
- //
- // The go-ethereum library is free software: you can redistribute it and/or modify
- // it under the terms of the GNU Lesser General Public License as published by
- // the Free Software Foundation, either version 3 of the License, or
- // (at your option) any later version.
- //
- // The go-ethereum library is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- // GNU Lesser General Public License for more details.
- //
- // You should have received a copy of the GNU Lesser General Public License
- // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
- package asm
- import (
- "fmt"
- "os"
- "strings"
- "unicode"
- "unicode/utf8"
- )
- // stateFn is used through the lifetime of the
- // lexer to parse the different values at the
- // current state.
- type stateFn func(*lexer) stateFn
- // token is emitted when the lexer has discovered
- // a new parsable token. These are delivered over
- // the tokens channels of the lexer
- type token struct {
- typ tokenType
- lineno int
- text string
- }
- // tokenType are the different types the lexer
- // is able to parse and return.
- type tokenType int
- const (
- eof tokenType = iota // end of file
- lineStart // emitted when a line starts
- lineEnd // emitted when a line ends
- invalidStatement // any invalid statement
- element // any element during element parsing
- label // label is emitted when a label is found
- labelDef // label definition is emitted when a new label is found
- number // number is emitted when a number is found
- stringValue // stringValue is emitted when a string has been found
- Numbers = "1234567890" // characters representing any decimal number
- HexadecimalNumbers = Numbers + "aAbBcCdDeEfF" // characters representing any hexadecimal
- Alpha = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric
- )
- // String implements stringer
- func (it tokenType) String() string {
- if int(it) > len(stringtokenTypes) {
- return "invalid"
- }
- return stringtokenTypes[it]
- }
- var stringtokenTypes = []string{
- eof: "EOF",
- invalidStatement: "invalid statement",
- element: "element",
- lineEnd: "end of line",
- lineStart: "new line",
- label: "label",
- labelDef: "label definition",
- number: "number",
- stringValue: "string",
- }
- // lexer is the basic construct for parsing
- // source code and turning them in to tokens.
- // Tokens are interpreted by the compiler.
- type lexer struct {
- input string // input contains the source code of the program
- tokens chan token // tokens is used to deliver tokens to the listener
- state stateFn // the current state function
- lineno int // current line number in the source file
- start, pos, width int // positions for lexing and returning value
- debug bool // flag for triggering debug output
- }
- // lex lexes the program by name with the given source. It returns a
- // channel on which the tokens are delivered.
- func Lex(name string, source []byte, debug bool) <-chan token {
- ch := make(chan token)
- l := &lexer{
- input: string(source),
- tokens: ch,
- state: lexLine,
- debug: debug,
- }
- go func() {
- l.emit(lineStart)
- for l.state != nil {
- l.state = l.state(l)
- }
- l.emit(eof)
- close(l.tokens)
- }()
- return ch
- }
- // next returns the next rune in the program's source.
- func (l *lexer) next() (rune rune) {
- if l.pos >= len(l.input) {
- l.width = 0
- return 0
- }
- rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
- l.pos += l.width
- return rune
- }
- // backup backsup the last parsed element (multi-character)
- func (l *lexer) backup() {
- l.pos -= l.width
- }
- // peek returns the next rune but does not advance the seeker
- func (l *lexer) peek() rune {
- r := l.next()
- l.backup()
- return r
- }
- // ignore advances the seeker and ignores the value
- func (l *lexer) ignore() {
- l.start = l.pos
- }
- // Accepts checks whether the given input matches the next rune
- func (l *lexer) accept(valid string) bool {
- if strings.ContainsRune(valid, l.next()) {
- return true
- }
- l.backup()
- return false
- }
- // acceptRun will continue to advance the seeker until valid
- // can no longer be met.
- func (l *lexer) acceptRun(valid string) {
- for strings.ContainsRune(valid, l.next()) {
- }
- l.backup()
- }
- // acceptRunUntil is the inverse of acceptRun and will continue
- // to advance the seeker until the rune has been found.
- func (l *lexer) acceptRunUntil(until rune) bool {
- // Continues running until a rune is found
- for i := l.next(); !strings.ContainsRune(string(until), i); i = l.next() {
- if i == 0 {
- return false
- }
- }
- return true
- }
- // blob returns the current value
- func (l *lexer) blob() string {
- return l.input[l.start:l.pos]
- }
- // Emits a new token on to token channel for processing
- func (l *lexer) emit(t tokenType) {
- token := token{t, l.lineno, l.blob()}
- if l.debug {
- fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text)
- }
- l.tokens <- token
- l.start = l.pos
- }
- // lexLine is state function for lexing lines
- func lexLine(l *lexer) stateFn {
- for {
- switch r := l.next(); {
- case r == '\n':
- l.emit(lineEnd)
- l.ignore()
- l.lineno++
- l.emit(lineStart)
- case r == ';' && l.peek() == ';':
- return lexComment
- case isSpace(r):
- l.ignore()
- case isLetter(r) || r == '_':
- return lexElement
- case isNumber(r):
- return lexNumber
- case r == '@':
- l.ignore()
- return lexLabel
- case r == '"':
- return lexInsideString
- default:
- return nil
- }
- }
- }
- // lexComment parses the current position until the end
- // of the line and discards the text.
- func lexComment(l *lexer) stateFn {
- l.acceptRunUntil('\n')
- l.ignore()
- return lexLine
- }
- // lexLabel parses the current label, emits and returns
- // the lex text state function to advance the parsing
- // process.
- func lexLabel(l *lexer) stateFn {
- l.acceptRun(Alpha + "_")
- l.emit(label)
- return lexLine
- }
- // lexInsideString lexes the inside of a string until
- // until the state function finds the closing quote.
- // It returns the lex text state function.
- func lexInsideString(l *lexer) stateFn {
- if l.acceptRunUntil('"') {
- l.emit(stringValue)
- }
- return lexLine
- }
- func lexNumber(l *lexer) stateFn {
- acceptance := Numbers
- if l.accept("0") || l.accept("xX") {
- acceptance = HexadecimalNumbers
- }
- l.acceptRun(acceptance)
- l.emit(number)
- return lexLine
- }
- func lexElement(l *lexer) stateFn {
- l.acceptRun(Alpha + "_" + Numbers)
- if l.peek() == ':' {
- l.emit(labelDef)
- l.accept(":")
- l.ignore()
- } else {
- l.emit(element)
- }
- return lexLine
- }
- func isLetter(t rune) bool {
- return unicode.IsLetter(t)
- }
- func isSpace(t rune) bool {
- return unicode.IsSpace(t)
- }
- func isNumber(t rune) bool {
- return unicode.IsNumber(t)
- }
|