123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271 |
- package dirty
- import (
- "bufio"
- "fmt"
- "io"
- )
- // todo use reader.Peek()
- var skipped rune = 0
- type tokenType int
- const (
- UNKNOWN tokenType = iota
- LBRACKET
- RBRACKET
- STRING
- STRING_RAW
- NUMBER
- FLOAT
- CONST
- COMMENT
- EOF
- )
- type token struct {
- ttype tokenType
- t string
- i *int64
- f *float64
- }
- func nextToken(reader *bufio.Reader) (token, error) {
- t, finished, e := nextToken_initial(reader)
- if finished || e != nil {
- return t, e
- }
- t, e = nextToken_rest(reader, t)
- return t, e
- }
- func nextToken_initial(reader *bufio.Reader) (token, bool, error) {
- var (
- r rune = 0
- err error = nil
- t token
- )
- initialTokenLoop:
- for {
- if skipped != 0 {
- r = skipped
- skipped = 0
- } else {
- r, _, err = reader.ReadRune()
- }
- //debugf("%c\n", r)
- if err != nil {
- if err == io.EOF {
- t := token{ttype: EOF}
- return t, true, nil
- }
- return token{}, true, fmt.Errorf("while reading: %w", err)
- }
- switch {
- case r == '(':
- return token{ttype: LBRACKET}, true, nil
- case r == ')':
- return token{ttype: RBRACKET}, true, nil
- case r == '#':
- t = token{ttype: COMMENT}
- break initialTokenLoop
- case r == '`':
- t = token{ttype: STRING_RAW}
- break initialTokenLoop
- case r == '\'':
- t = token{ttype: STRING}
- break initialTokenLoop
- case r == 't':
- t = token{ttype: CONST, t: "t"}
- break initialTokenLoop
- case r == 'f':
- t = token{ttype: CONST, t: "f"}
- break initialTokenLoop
- case r == 'n':
- t = token{ttype: CONST, t: "n"}
- break initialTokenLoop
- case in(r, []rune{'1', '2', '3', '4', '5', '6', '7', '8', '9', '↊', '↋', '-', '.', '·', ','}):
- t = token{ttype: NUMBER, t: string(r)}
- break initialTokenLoop
- case r == '0':
- r, _, err = reader.ReadRune()
- //debugf("%c\n", r)
- if err != nil {
- if err == io.EOF {
- t := token{ttype: EOF}
- return t, true, nil
- }
- return token{}, true, fmt.Errorf("while reading: %w", err)
- }
- switch r {
- case 'b':
- t = token{ttype: NUMBER, t: "0b"}
- break initialTokenLoop
- case 'o':
- t = token{ttype: NUMBER, t: "0o"}
- break initialTokenLoop
- case 'x':
- t = token{ttype: NUMBER, t: "0x"}
- break initialTokenLoop
- default:
- skipped = r
- var zero int64 = 0
- return token{ttype: NUMBER, i: &zero}, true, nil
- }
- case in(r, []rune{' ', '\t', '\n', '\r'}):
- continue
- default:
- return token{}, true, nil
- }
- }
- return t, false, err
- }
- func nextToken_rest(reader *bufio.Reader, t token) (token, error) {
- var (
- r rune
- err error = nil
- escaping bool = false
- stringRawIndent string = ""
- stringRawIndentSkip string = ""
- stringRawState int = 0 // todo enum
- )
- tokenLoop:
- for {
- if skipped != 0 {
- r = skipped
- skipped = 0
- } else {
- r, _, err = reader.ReadRune()
- }
- //debugf("%c\n", r)
- // todo line, column
- if err != nil {
- if err == io.EOF {
- if t.ttype == STRING || t.ttype == STRING_RAW {
- return token{}, NewUnterminatedError("string", t.t)
- } else {
- return token{ttype: EOF}, nil
- }
- }
- return token{}, fmt.Errorf("while reading: %w", err)
- }
- switch t.ttype {
- case COMMENT:
- if r != '\n' {
- t.t += string(r)
- } else {
- break tokenLoop
- }
- case STRING:
- if !escaping && r == '\'' {
- t, err = parseString(t)
- if err != nil {
- return token{}, err
- }
- break tokenLoop
- } else if r == '\n' {
- return token{}, NewUnterminatedError("string", t.t)
- } else {
- t.t += string(r)
- }
- if escaping {
- escaping = false
- } else if r == '\\' {
- escaping = true
- }
- case STRING_RAW:
- if stringRawState == 0 {
- if r != '\n' {
- return token{}, NewRawStringError("missing new line after opening `")
- } else {
- stringRawState = 1
- continue
- }
- }
- if stringRawState == 1 {
- if r == ' ' || r == '\t' {
- stringRawIndent += string(r)
- } else {
- stringRawState = 2
- t.t += string(r)
- }
- continue
- }
- if stringRawState == 2 {
- // fixme assumes lines ending with \n; get to end of line
- if r == '\n' {
- stringRawState = 3
- stringRawIndentSkip = ""
- }
- t.t += string(r)
- continue
- }
- if stringRawState == 3 {
- if len(stringRawIndentSkip) == 0 && r == '`' {
- break tokenLoop
- }
- if len(stringRawIndentSkip) < len(stringRawIndent) {
- stringRawIndentSkip += string(r)
- } else {
- if stringRawIndent != stringRawIndentSkip {
- // todo convert whitespace to escape codes
- return token{}, NewRawStringError("Indent ‘" + stringRawIndent + "’ does not begin with ‘" + stringRawIndentSkip + "’")
- }
- skipped = r
- stringRawState = 2
- }
- }
- case CONST:
- if t.t[0] == 't' && in(r, []rune{'r', 'u', 'e'}) && len(t.t) < 4 {
- t.t += string(r)
- continue
- }
- if t.t[0] == 'f' && in(r, []rune{'a', 'l', 's', 'e'}) && len(t.t) < 5 {
- t.t += string(r)
- continue
- }
- if in(r, []rune{'u', 'l'}) && len(t.t) < 4 {
- t.t += string(r)
- continue
- }
- skipped = r
- t, err = parseConst(t)
- break tokenLoop
- case NUMBER:
- if t.t[0] == '0' && t.t[1] == 'b' && in(r, []rune{'0', '1', ','}) {
- t.t += string(r)
- continue
- }
- if t.t[0] == '0' && t.t[1] == 'o' && in(r, []rune{'0', '1', '2', '3', '4', '5', '6', '7'}) {
- t.t += string(r)
- continue
- }
- if t.t[0] == '0' && t.t[1] == 'x' && in(r, []rune{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F'}) {
- t.t += string(r)
- continue
- }
- if in(r, []rune{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '↊', '↋', ',', '.', '·', 'e', '-'}) {
- t.t += string(r)
- continue
- }
- skipped = r
- t, err = parseNumber(t) // todo errors that are not CommaError <- NumberError
- break tokenLoop
- }
- }
- return t, err
- }
- func in(c rune, expected []rune) bool {
- for _, e := range expected {
- if c == e {
- return true
- }
- }
- return false
- }
|