string.go 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. package dirty
  2. import (
  3. "strconv"
  4. "unicode/utf8"
  5. )
  6. type String string
  7. func (String) isElement() {}
  8. func (String) getType() ElementType {
  9. return ElemString
  10. }
  11. func (s String) String() string {
  12. return "‘" + string(s) + "’"
  13. }
  14. func parseString(t token) (token, error) {
  15. result := ""
  16. ucode := ""
  17. mode := 0
  18. for _, r := range t.t {
  19. if mode == 0 {
  20. if r < 0x20 || r == 0x7f || (r >= 0x80 && r <= 0x9f) {
  21. return token{}, NewInvalidCharError(rune(r))
  22. }
  23. if r == '\\' {
  24. mode = '\\'
  25. continue
  26. }
  27. result += string(rune(r))
  28. } else if mode == '\\' {
  29. switch r {
  30. case 'n':
  31. result += "\n"
  32. mode = 0
  33. case '\'':
  34. result += "'"
  35. mode = 0
  36. case 'r':
  37. result += "\r"
  38. mode = 0
  39. case 't':
  40. result += "\t"
  41. mode = 0
  42. case '\\':
  43. result += "\\"
  44. mode = 0
  45. case 'u':
  46. mode = 'u'
  47. case 'U':
  48. mode = 'U'
  49. default:
  50. return token{}, NewEscapeError(r)
  51. }
  52. } else if mode == 'u' {
  53. ucode += string(rune(r))
  54. if len(ucode) == 4 {
  55. mode = 0
  56. char, err := parseUnicode(ucode)
  57. ucode = ""
  58. if err != nil {
  59. return token{}, err
  60. }
  61. result += char
  62. }
  63. } else if mode == 'U' {
  64. ucode += string(rune(r))
  65. if len(ucode) == 8 {
  66. mode = 0
  67. char, err := parseUnicode(ucode)
  68. ucode = ""
  69. if err != nil {
  70. return token{}, err
  71. }
  72. result += char
  73. }
  74. }
  75. }
  76. t.t = result
  77. return t, nil
  78. }
  79. func parseUnicode(ucode string) (string, error) {
  80. var (
  81. b []byte
  82. r rune
  83. )
  84. codepoint, err := strconv.ParseInt(ucode, 16, 64)
  85. if err != nil {
  86. return "", err
  87. }
  88. switch {
  89. case codepoint < 0x7f:
  90. b = []byte{byte(codepoint)}
  91. // todo check r, s for error
  92. r, _ = utf8.DecodeRune(b)
  93. case codepoint < 0x7ff:
  94. b = []byte{
  95. byte((codepoint>>6)&0b00011111 | 0b11000000),
  96. byte(codepoint&0b00111111 | 0b10000000),
  97. }
  98. r, _ = utf8.DecodeRune(b)
  99. case codepoint < 0xffff:
  100. b = []byte{
  101. byte((codepoint>>12)&0b00001111 | 0b11100000),
  102. byte((codepoint>>6)&0b00111111 | 0b10000000),
  103. byte(codepoint&0b00111111 | 0b10000000),
  104. }
  105. r, _ = utf8.DecodeRune(b)
  106. case codepoint < 0x1fffff:
  107. b = []byte{
  108. byte((codepoint>>18)&0b00000111 | 0b11110000),
  109. byte((codepoint>>12)&0b00111111 | 0b10000000),
  110. byte((codepoint>>6)&0b00111111 | 0b10000000),
  111. byte(codepoint&0b00111111 | 0b10000000),
  112. }
  113. r, _ = utf8.DecodeRune(b)
  114. default:
  115. return "", InvalidCodepointError{ucode}
  116. }
  117. return string(r), nil
  118. }