123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124 |
- package dirty
- import (
- "strconv"
- "unicode/utf8"
- )
- type String string
- func (String) isElement() {}
- func (String) getType() ElementType {
- return ElemString
- }
- func (s String) String() string {
- return "‘" + string(s) + "’"
- }
- func parseString(t token) (token, error) {
- result := ""
- ucode := ""
- mode := 0
- for _, r := range t.t {
- if mode == 0 {
- if r < 0x20 || r == 0x7f || (r >= 0x80 && r <= 0x9f) {
- return token{}, NewInvalidCharError(rune(r))
- }
- if r == '\\' {
- mode = '\\'
- continue
- }
- result += string(rune(r))
- } else if mode == '\\' {
- switch r {
- case 'n':
- result += "\n"
- mode = 0
- case '\'':
- result += "'"
- mode = 0
- case 'r':
- result += "\r"
- mode = 0
- case 't':
- result += "\t"
- mode = 0
- case '\\':
- result += "\\"
- mode = 0
- case 'u':
- mode = 'u'
- case 'U':
- mode = 'U'
- default:
- return token{}, NewEscapeError(r)
- }
- } else if mode == 'u' {
- ucode += string(rune(r))
- if len(ucode) == 4 {
- mode = 0
- char, err := parseUnicode(ucode)
- ucode = ""
- if err != nil {
- return token{}, err
- }
- result += char
- }
- } else if mode == 'U' {
- ucode += string(rune(r))
- if len(ucode) == 8 {
- mode = 0
- char, err := parseUnicode(ucode)
- ucode = ""
- if err != nil {
- return token{}, err
- }
- result += char
- }
- }
- }
- t.t = result
- return t, nil
- }
- func parseUnicode(ucode string) (string, error) {
- var (
- b []byte
- r rune
- )
- codepoint, err := strconv.ParseInt(ucode, 16, 64)
- if err != nil {
- return "", err
- }
- switch {
- case codepoint < 0x7f:
- b = []byte{byte(codepoint)}
- // todo check r, s for error
- r, _ = utf8.DecodeRune(b)
- case codepoint < 0x7ff:
- b = []byte{
- byte((codepoint>>6)&0b00011111 | 0b11000000),
- byte(codepoint&0b00111111 | 0b10000000),
- }
- r, _ = utf8.DecodeRune(b)
- case codepoint < 0xffff:
- b = []byte{
- byte((codepoint>>12)&0b00001111 | 0b11100000),
- byte((codepoint>>6)&0b00111111 | 0b10000000),
- byte(codepoint&0b00111111 | 0b10000000),
- }
- r, _ = utf8.DecodeRune(b)
- case codepoint < 0x1fffff:
- b = []byte{
- byte((codepoint>>18)&0b00000111 | 0b11110000),
- byte((codepoint>>12)&0b00111111 | 0b10000000),
- byte((codepoint>>6)&0b00111111 | 0b10000000),
- byte(codepoint&0b00111111 | 0b10000000),
- }
- r, _ = utf8.DecodeRune(b)
- default:
- return "", InvalidCodepointError{ucode}
- }
- return string(r), nil
- }
|