parser.go 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. package parser
  2. import (
  3. "bufio"
  4. "bytes"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "strconv"
  9. "strings"
  10. "golang.org/x/text/encoding/unicode"
  11. "golang.org/x/text/transform"
  12. )
  13. type File struct {
  14. Commands []Command
  15. }
  16. func (f File) String() string {
  17. var sb strings.Builder
  18. for _, cmd := range f.Commands {
  19. fmt.Fprintln(&sb, cmd.String())
  20. }
  21. return sb.String()
  22. }
  23. type Command struct {
  24. Name string
  25. Args string
  26. }
  27. func (c Command) String() string {
  28. var sb strings.Builder
  29. switch c.Name {
  30. case "model":
  31. fmt.Fprintf(&sb, "FROM %s", c.Args)
  32. case "license", "template", "system", "adapter":
  33. fmt.Fprintf(&sb, "%s %s", strings.ToUpper(c.Name), quote(c.Args))
  34. case "message":
  35. role, message, _ := strings.Cut(c.Args, ": ")
  36. fmt.Fprintf(&sb, "MESSAGE %s %s", role, quote(message))
  37. default:
  38. fmt.Fprintf(&sb, "PARAMETER %s %s", c.Name, quote(c.Args))
  39. }
  40. return sb.String()
  41. }
  42. type state int
  43. const (
  44. stateNil state = iota
  45. stateName
  46. stateValue
  47. stateParameter
  48. stateMessage
  49. stateComment
  50. )
  51. var (
  52. errMissingFrom = errors.New("no FROM line")
  53. errInvalidMessageRole = errors.New("message role must be one of \"system\", \"user\", or \"assistant\"")
  54. errInvalidCommand = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"parameter\", or \"message\"")
  55. )
  56. func ParseFile(r io.Reader) (*File, error) {
  57. var cmd Command
  58. var curr state
  59. var b bytes.Buffer
  60. var role string
  61. var f File
  62. tr := unicode.BOMOverride(unicode.UTF8.NewDecoder())
  63. br := bufio.NewReader(transform.NewReader(r, tr))
  64. for {
  65. r, _, err := br.ReadRune()
  66. if errors.Is(err, io.EOF) {
  67. break
  68. } else if err != nil {
  69. return nil, err
  70. }
  71. next, r, err := parseRuneForState(r, curr)
  72. if errors.Is(err, io.ErrUnexpectedEOF) {
  73. return nil, fmt.Errorf("%w: %s", err, b.String())
  74. } else if err != nil {
  75. return nil, err
  76. }
  77. // process the state transition, some transitions need to be intercepted and redirected
  78. if next != curr {
  79. switch curr {
  80. case stateName:
  81. if !isValidCommand(b.String()) {
  82. return nil, errInvalidCommand
  83. }
  84. // next state sometimes depends on the current buffer value
  85. switch s := strings.ToLower(b.String()); s {
  86. case "from":
  87. cmd.Name = "model"
  88. case "parameter":
  89. // transition to stateParameter which sets command name
  90. next = stateParameter
  91. case "message":
  92. // transition to stateMessage which validates the message role
  93. next = stateMessage
  94. fallthrough
  95. default:
  96. cmd.Name = s
  97. }
  98. case stateParameter:
  99. cmd.Name = b.String()
  100. case stateMessage:
  101. if !isValidMessageRole(b.String()) {
  102. return nil, errInvalidMessageRole
  103. }
  104. role = b.String()
  105. case stateComment, stateNil:
  106. // pass
  107. case stateValue:
  108. s, ok := unquote(strings.TrimSpace(b.String()))
  109. if !ok || isSpace(r) {
  110. if _, err := b.WriteRune(r); err != nil {
  111. return nil, err
  112. }
  113. continue
  114. }
  115. if role != "" {
  116. s = role + ": " + s
  117. role = ""
  118. }
  119. cmd.Args = s
  120. f.Commands = append(f.Commands, cmd)
  121. }
  122. b.Reset()
  123. curr = next
  124. }
  125. if strconv.IsPrint(r) {
  126. if _, err := b.WriteRune(r); err != nil {
  127. return nil, err
  128. }
  129. }
  130. }
  131. // flush the buffer
  132. switch curr {
  133. case stateComment, stateNil:
  134. // pass; nothing to flush
  135. case stateValue:
  136. s, ok := unquote(strings.TrimSpace(b.String()))
  137. if !ok {
  138. return nil, io.ErrUnexpectedEOF
  139. }
  140. if role != "" {
  141. s = role + ": " + s
  142. }
  143. cmd.Args = s
  144. f.Commands = append(f.Commands, cmd)
  145. default:
  146. return nil, io.ErrUnexpectedEOF
  147. }
  148. for _, cmd := range f.Commands {
  149. if cmd.Name == "model" {
  150. return &f, nil
  151. }
  152. }
  153. return nil, errMissingFrom
  154. }
  155. func parseRuneForState(r rune, cs state) (state, rune, error) {
  156. switch cs {
  157. case stateNil:
  158. switch {
  159. case r == '#':
  160. return stateComment, 0, nil
  161. case isSpace(r), isNewline(r):
  162. return stateNil, 0, nil
  163. default:
  164. return stateName, r, nil
  165. }
  166. case stateName:
  167. switch {
  168. case isAlpha(r):
  169. return stateName, r, nil
  170. case isSpace(r):
  171. return stateValue, 0, nil
  172. default:
  173. return stateNil, 0, errInvalidCommand
  174. }
  175. case stateValue:
  176. switch {
  177. case isNewline(r):
  178. return stateNil, r, nil
  179. case isSpace(r):
  180. return stateNil, r, nil
  181. default:
  182. return stateValue, r, nil
  183. }
  184. case stateParameter:
  185. switch {
  186. case isAlpha(r), isNumber(r), r == '_':
  187. return stateParameter, r, nil
  188. case isSpace(r):
  189. return stateValue, 0, nil
  190. default:
  191. return stateNil, 0, io.ErrUnexpectedEOF
  192. }
  193. case stateMessage:
  194. switch {
  195. case isAlpha(r):
  196. return stateMessage, r, nil
  197. case isSpace(r):
  198. return stateValue, 0, nil
  199. default:
  200. return stateNil, 0, io.ErrUnexpectedEOF
  201. }
  202. case stateComment:
  203. switch {
  204. case isNewline(r):
  205. return stateNil, 0, nil
  206. default:
  207. return stateComment, 0, nil
  208. }
  209. default:
  210. return stateNil, 0, errors.New("")
  211. }
  212. }
  213. func quote(s string) string {
  214. if strings.Contains(s, "\n") || strings.HasPrefix(s, " ") || strings.HasSuffix(s, " ") {
  215. if strings.Contains(s, "\"") {
  216. return `"""` + s + `"""`
  217. }
  218. return `"` + s + `"`
  219. }
  220. return s
  221. }
  222. func unquote(s string) (string, bool) {
  223. // TODO: single quotes
  224. if len(s) >= 3 && s[:3] == `"""` {
  225. if len(s) >= 6 && s[len(s)-3:] == `"""` {
  226. return s[3 : len(s)-3], true
  227. }
  228. return "", false
  229. }
  230. if len(s) >= 1 && s[0] == '"' {
  231. if len(s) >= 2 && s[len(s)-1] == '"' {
  232. return s[1 : len(s)-1], true
  233. }
  234. return "", false
  235. }
  236. return s, true
  237. }
  238. func isAlpha(r rune) bool {
  239. return r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z'
  240. }
  241. func isNumber(r rune) bool {
  242. return r >= '0' && r <= '9'
  243. }
  244. func isSpace(r rune) bool {
  245. return r == ' ' || r == '\t'
  246. }
  247. func isNewline(r rune) bool {
  248. return r == '\r' || r == '\n'
  249. }
  250. func isValidMessageRole(role string) bool {
  251. return role == "system" || role == "user" || role == "assistant"
  252. }
  253. func isValidCommand(cmd string) bool {
  254. switch strings.ToLower(cmd) {
  255. case "from", "license", "template", "system", "adapter", "parameter", "message":
  256. return true
  257. default:
  258. return false
  259. }
  260. }