shlex.go 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. /*
  2. Package shlex implements a simple lexer which splits input in to tokens using
  3. shell-style rules for quoting.
  4. The basic use case uses the default ASCII lexer to split a string into sub-strings:
  5. shlex.Split("one \"two three\" four") -> []string{"one", "two three", "four"}
  6. To process a stream of strings:
  7. l := NewLexer(os.Stdin)
  8. for ; token, err := l.Next(); err != nil {
  9. // process token
  10. }
  11. */
  12. package shlex
  13. import (
  14. "fmt"
  15. "strings"
  16. "unicode/utf8"
  17. )
  18. type Word struct {
  19. Value string // The word is empty if EOF is reached
  20. Pos int // The position in the input string of the word or the trailer
  21. Err error // Indicates an error (unterminated string or trailing unescaped backslash)
  22. Trailer string // Extra trailing data such as an unterminated string or an unescaped backslash. Present only if Err != nil
  23. }
  24. type lexer_state int
  25. // Lexer state machine states
  26. const (
  27. lex_normal lexer_state = iota
  28. word
  29. string_without_escapes
  30. string_with_escapes
  31. )
  32. // Lexer turns an input stream into a sequence of tokens. Whitespace is skipped.
  33. type Lexer struct {
  34. state lexer_state
  35. src string
  36. src_sz, src_pos, word_start int
  37. buf strings.Builder
  38. }
  39. // NewLexer creates a new lexer from an input string.
  40. func NewLexer(x string) *Lexer {
  41. return &Lexer{src: x, src_sz: len(x)}
  42. }
  43. func (self *Lexer) start_word() {
  44. self.buf.Reset()
  45. self.word_start = self.src_pos - 1
  46. }
  47. func (self *Lexer) get_word() Word {
  48. return Word{Pos: self.word_start, Value: self.buf.String()}
  49. }
  50. func (self *Lexer) write_ch(ch byte) {
  51. self.buf.WriteByte(ch)
  52. }
  53. func (self *Lexer) write_escaped_ch() bool {
  54. ch, count := utf8.DecodeRuneInString(self.src[self.src_pos:])
  55. if count > 0 {
  56. self.src_pos += count
  57. if ch != utf8.RuneError {
  58. self.buf.WriteRune(ch)
  59. }
  60. return true
  61. }
  62. return false
  63. }
  64. // Next returns the next word. At EOF Word.Value will be ""
  65. func (self *Lexer) Next() (ans Word) {
  66. const string_with_escapes_delim = '"'
  67. const string_without_escapes_delim = '\''
  68. const escape_char = '\\'
  69. for self.src_pos < self.src_sz {
  70. ch := self.src[self.src_pos]
  71. self.src_pos++
  72. switch self.state {
  73. case lex_normal:
  74. switch ch {
  75. case ' ', '\n', '\r', '\t':
  76. case string_with_escapes_delim:
  77. self.state = string_with_escapes
  78. self.start_word()
  79. case string_without_escapes_delim:
  80. self.state = string_without_escapes
  81. self.start_word()
  82. case escape_char:
  83. self.start_word()
  84. if !self.write_escaped_ch() {
  85. ans.Trailer = "\\"
  86. ans.Err = fmt.Errorf("Extra backslash at end of input")
  87. ans.Pos = self.word_start
  88. return
  89. }
  90. self.state = word
  91. default:
  92. self.state = word
  93. self.start_word()
  94. self.write_ch(ch)
  95. }
  96. case word:
  97. switch ch {
  98. case ' ', '\n', '\r', '\t':
  99. self.state = lex_normal
  100. if self.buf.Len() > 0 {
  101. return self.get_word()
  102. }
  103. case string_with_escapes_delim:
  104. self.state = string_with_escapes
  105. case string_without_escapes_delim:
  106. self.state = string_without_escapes
  107. case escape_char:
  108. if !self.write_escaped_ch() {
  109. ans.Pos = self.word_start
  110. ans.Trailer = self.buf.String() + "\\"
  111. ans.Err = fmt.Errorf("Extra backslash at end of input")
  112. return
  113. }
  114. default:
  115. self.write_ch(ch)
  116. }
  117. case string_without_escapes:
  118. switch ch {
  119. case string_without_escapes_delim:
  120. self.state = word
  121. default:
  122. self.write_ch(ch)
  123. }
  124. case string_with_escapes:
  125. switch ch {
  126. case string_with_escapes_delim:
  127. self.state = word
  128. case escape_char:
  129. self.write_escaped_ch()
  130. default:
  131. self.write_ch(ch)
  132. }
  133. }
  134. }
  135. switch self.state {
  136. case word:
  137. self.state = lex_normal
  138. if self.buf.Len() > 0 {
  139. return self.get_word()
  140. }
  141. case string_with_escapes, string_without_escapes:
  142. self.state = lex_normal
  143. ans.Trailer = self.buf.String()
  144. ans.Pos = self.word_start
  145. ans.Err = fmt.Errorf("Unterminated string at end of input")
  146. return
  147. case lex_normal:
  148. }
  149. return
  150. }
  151. // Split partitions a string into a slice of strings.
  152. func Split(s string) (ans []string, err error) {
  153. l := NewLexer(s)
  154. var word Word
  155. for {
  156. word = l.Next()
  157. if word.Err != nil {
  158. return ans, word.Err
  159. }
  160. if word.Value == "" {
  161. break
  162. }
  163. ans = append(ans, word.Value)
  164. }
  165. return
  166. }
  167. // SplitForCompletion partitions a string into a slice of strings. It differs from Split in being
  168. // more relaxed about errors and also adding an empty string at the end if s ends with a Space.
  169. func SplitForCompletion(s string) (argv []string, position_of_last_arg int) {
  170. t := NewLexer(s)
  171. argv = make([]string, 0, len(s)/4)
  172. for {
  173. word := t.Next()
  174. if word.Value == "" {
  175. if word.Trailer == "" {
  176. trimmed := strings.TrimRight(s, " ")
  177. if len(trimmed) < len(s) { // trailing spaces
  178. pos := position_of_last_arg
  179. if len(argv) > 0 {
  180. pos += len(argv[len(argv)-1])
  181. }
  182. if pos < len(s) { // trailing whitespace
  183. argv = append(argv, "")
  184. position_of_last_arg += len(s) - pos + 1
  185. }
  186. }
  187. } else {
  188. argv = append(argv, word.Trailer)
  189. position_of_last_arg = word.Pos
  190. }
  191. break
  192. }
  193. position_of_last_arg = word.Pos
  194. argv = append(argv, word.Value)
  195. }
  196. return
  197. }