123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643 |
- package parser
- import (
- "bytes"
- "encoding/binary"
- "fmt"
- "io"
- "strings"
- "testing"
- "unicode/utf16"
- "github.com/stretchr/testify/assert"
- "github.com/stretchr/testify/require"
- "golang.org/x/text/encoding"
- "golang.org/x/text/encoding/unicode"
- )
- func TestParseFileFile(t *testing.T) {
- input := `
- FROM model1
- ADAPTER adapter1
- LICENSE MIT
- PARAMETER param1 value1
- PARAMETER param2 value2
- TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
- {{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
- {{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
- {{ .Response }}<|eot_id|>"""
- `
- reader := strings.NewReader(input)
- modelfile, err := ParseFile(reader)
- require.NoError(t, err)
- expectedCommands := []Command{
- {Name: "model", Args: "model1"},
- {Name: "adapter", Args: "adapter1"},
- {Name: "license", Args: "MIT"},
- {Name: "param1", Args: "value1"},
- {Name: "param2", Args: "value2"},
- {Name: "template", Args: "{{ if .System }}<|start_header_id|>system<|end_header_id|>\n\n{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>\n\n{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>\n\n{{ .Response }}<|eot_id|>"},
- }
- assert.Equal(t, expectedCommands, modelfile.Commands)
- }
- func TestParseFileTrimSpace(t *testing.T) {
- input := `
- FROM " model 1"
- ADAPTER adapter3
- LICENSE "MIT "
- PARAMETER param1 value1
- PARAMETER param2 value2
- TEMPLATE """ {{ if .System }}<|start_header_id|>system<|end_header_id|>
- {{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
- {{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
- {{ .Response }}<|eot_id|> """
- `
- reader := strings.NewReader(input)
- modelfile, err := ParseFile(reader)
- require.NoError(t, err)
- expectedCommands := []Command{
- {Name: "model", Args: " model 1"},
- {Name: "adapter", Args: "adapter3"},
- {Name: "license", Args: "MIT "},
- {Name: "param1", Args: "value1"},
- {Name: "param2", Args: "value2"},
- {Name: "template", Args: " {{ if .System }}<|start_header_id|>system<|end_header_id|>\n\n{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>\n\n{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>\n\n{{ .Response }}<|eot_id|> "},
- }
- assert.Equal(t, expectedCommands, modelfile.Commands)
- }
- func TestParseFileFrom(t *testing.T) {
- var cases = []struct {
- input string
- expected []Command
- err error
- }{
- {
- "FROM \"FOO BAR \"",
- []Command{{Name: "model", Args: "FOO BAR "}},
- nil,
- },
- {
- "FROM \"FOO BAR\"\nPARAMETER param1 value1",
- []Command{{Name: "model", Args: "FOO BAR"}, {Name: "param1", Args: "value1"}},
- nil,
- },
- {
- "FROM FOOO BAR ",
- []Command{{Name: "model", Args: "FOOO BAR"}},
- nil,
- },
- {
- "FROM /what/is/the path ",
- []Command{{Name: "model", Args: "/what/is/the path"}},
- nil,
- },
- {
- "FROM foo",
- []Command{{Name: "model", Args: "foo"}},
- nil,
- },
- {
- "FROM /path/to/model",
- []Command{{Name: "model", Args: "/path/to/model"}},
- nil,
- },
- {
- "FROM /path/to/model/fp16.bin",
- []Command{{Name: "model", Args: "/path/to/model/fp16.bin"}},
- nil,
- },
- {
- "FROM llama3:latest",
- []Command{{Name: "model", Args: "llama3:latest"}},
- nil,
- },
- {
- "FROM llama3:7b-instruct-q4_K_M",
- []Command{{Name: "model", Args: "llama3:7b-instruct-q4_K_M"}},
- nil,
- },
- {
- "", nil, errMissingFrom,
- },
- {
- "PARAMETER param1 value1",
- nil,
- errMissingFrom,
- },
- {
- "PARAMETER param1 value1\nFROM foo",
- []Command{{Name: "param1", Args: "value1"}, {Name: "model", Args: "foo"}},
- nil,
- },
- {
- "PARAMETER what the \nFROM lemons make lemonade ",
- []Command{{Name: "what", Args: "the"}, {Name: "model", Args: "lemons make lemonade"}},
- nil,
- },
- }
- for _, c := range cases {
- t.Run("", func(t *testing.T) {
- modelfile, err := ParseFile(strings.NewReader(c.input))
- require.ErrorIs(t, err, c.err)
- if modelfile != nil {
- assert.Equal(t, c.expected, modelfile.Commands)
- }
- })
- }
- }
- func TestParseFileParametersMissingValue(t *testing.T) {
- input := `
- FROM foo
- PARAMETER param1
- `
- reader := strings.NewReader(input)
- _, err := ParseFile(reader)
- require.ErrorIs(t, err, io.ErrUnexpectedEOF)
- }
- func TestParseFileBadCommand(t *testing.T) {
- input := `
- FROM foo
- BADCOMMAND param1 value1
- `
- _, err := ParseFile(strings.NewReader(input))
- require.ErrorIs(t, err, errInvalidCommand)
- }
- func TestParseFileMessages(t *testing.T) {
- var cases = []struct {
- input string
- expected []Command
- err error
- }{
- {
- `
- FROM foo
- MESSAGE system You are a file parser. Always parse things.
- `,
- []Command{
- {Name: "model", Args: "foo"},
- {Name: "message", Args: "system: You are a file parser. Always parse things."},
- },
- nil,
- },
- {
- `
- FROM foo
- MESSAGE system You are a file parser. Always parse things.`,
- []Command{
- {Name: "model", Args: "foo"},
- {Name: "message", Args: "system: You are a file parser. Always parse things."},
- },
- nil,
- },
- {
- `
- FROM foo
- MESSAGE system You are a file parser. Always parse things.
- MESSAGE user Hey there!
- MESSAGE assistant Hello, I want to parse all the things!
- `,
- []Command{
- {Name: "model", Args: "foo"},
- {Name: "message", Args: "system: You are a file parser. Always parse things."},
- {Name: "message", Args: "user: Hey there!"},
- {Name: "message", Args: "assistant: Hello, I want to parse all the things!"},
- },
- nil,
- },
- {
- `
- FROM foo
- MESSAGE system """
- You are a multiline file parser. Always parse things.
- """
- `,
- []Command{
- {Name: "model", Args: "foo"},
- {Name: "message", Args: "system: \nYou are a multiline file parser. Always parse things.\n"},
- },
- nil,
- },
- {
- `
- FROM foo
- MESSAGE badguy I'm a bad guy!
- `,
- nil,
- errInvalidMessageRole,
- },
- {
- `
- FROM foo
- MESSAGE system
- `,
- nil,
- io.ErrUnexpectedEOF,
- },
- {
- `
- FROM foo
- MESSAGE system`,
- nil,
- io.ErrUnexpectedEOF,
- },
- }
- for _, c := range cases {
- t.Run("", func(t *testing.T) {
- modelfile, err := ParseFile(strings.NewReader(c.input))
- require.ErrorIs(t, err, c.err)
- if modelfile != nil {
- assert.Equal(t, c.expected, modelfile.Commands)
- }
- })
- }
- }
- func TestParseFileQuoted(t *testing.T) {
- var cases = []struct {
- multiline string
- expected []Command
- err error
- }{
- {
- `
- FROM foo
- SYSTEM """
- This is a
- multiline system.
- """
- `,
- []Command{
- {Name: "model", Args: "foo"},
- {Name: "system", Args: "\nThis is a\nmultiline system.\n"},
- },
- nil,
- },
- {
- `
- FROM foo
- SYSTEM """
- This is a
- multiline system."""
- `,
- []Command{
- {Name: "model", Args: "foo"},
- {Name: "system", Args: "\nThis is a\nmultiline system."},
- },
- nil,
- },
- {
- `
- FROM foo
- SYSTEM """This is a
- multiline system."""
- `,
- []Command{
- {Name: "model", Args: "foo"},
- {Name: "system", Args: "This is a\nmultiline system."},
- },
- nil,
- },
- {
- `
- FROM foo
- SYSTEM """This is a multiline system."""
- `,
- []Command{
- {Name: "model", Args: "foo"},
- {Name: "system", Args: "This is a multiline system."},
- },
- nil,
- },
- {
- `
- FROM foo
- SYSTEM """This is a multiline system.""
- `,
- nil,
- io.ErrUnexpectedEOF,
- },
- {
- `
- FROM foo
- SYSTEM "
- `,
- nil,
- io.ErrUnexpectedEOF,
- },
- {
- `
- FROM foo
- SYSTEM """
- This is a multiline system with "quotes".
- """
- `,
- []Command{
- {Name: "model", Args: "foo"},
- {Name: "system", Args: "\nThis is a multiline system with \"quotes\".\n"},
- },
- nil,
- },
- {
- `
- FROM foo
- SYSTEM """"""
- `,
- []Command{
- {Name: "model", Args: "foo"},
- {Name: "system", Args: ""},
- },
- nil,
- },
- {
- `
- FROM foo
- SYSTEM ""
- `,
- []Command{
- {Name: "model", Args: "foo"},
- {Name: "system", Args: ""},
- },
- nil,
- },
- {
- `
- FROM foo
- SYSTEM "'"
- `,
- []Command{
- {Name: "model", Args: "foo"},
- {Name: "system", Args: "'"},
- },
- nil,
- },
- {
- `
- FROM foo
- SYSTEM """''"'""'""'"'''''""'""'"""
- `,
- []Command{
- {Name: "model", Args: "foo"},
- {Name: "system", Args: `''"'""'""'"'''''""'""'`},
- },
- nil,
- },
- {
- `
- FROM foo
- TEMPLATE """
- {{ .Prompt }}
- """`,
- []Command{
- {Name: "model", Args: "foo"},
- {Name: "template", Args: "\n{{ .Prompt }}\n"},
- },
- nil,
- },
- }
- for _, c := range cases {
- t.Run("", func(t *testing.T) {
- modelfile, err := ParseFile(strings.NewReader(c.multiline))
- require.ErrorIs(t, err, c.err)
- if modelfile != nil {
- assert.Equal(t, c.expected, modelfile.Commands)
- }
- })
- }
- }
- func TestParseFileParameters(t *testing.T) {
- var cases = map[string]struct {
- name, value string
- }{
- "numa true": {"numa", "true"},
- "num_ctx 1": {"num_ctx", "1"},
- "num_batch 1": {"num_batch", "1"},
- "num_gqa 1": {"num_gqa", "1"},
- "num_gpu 1": {"num_gpu", "1"},
- "main_gpu 1": {"main_gpu", "1"},
- "low_vram true": {"low_vram", "true"},
- "f16_kv true": {"f16_kv", "true"},
- "logits_all true": {"logits_all", "true"},
- "vocab_only true": {"vocab_only", "true"},
- "use_mmap true": {"use_mmap", "true"},
- "use_mlock true": {"use_mlock", "true"},
- "num_thread 1": {"num_thread", "1"},
- "num_keep 1": {"num_keep", "1"},
- "seed 1": {"seed", "1"},
- "num_predict 1": {"num_predict", "1"},
- "top_k 1": {"top_k", "1"},
- "top_p 1.0": {"top_p", "1.0"},
- "tfs_z 1.0": {"tfs_z", "1.0"},
- "typical_p 1.0": {"typical_p", "1.0"},
- "repeat_last_n 1": {"repeat_last_n", "1"},
- "temperature 1.0": {"temperature", "1.0"},
- "repeat_penalty 1.0": {"repeat_penalty", "1.0"},
- "presence_penalty 1.0": {"presence_penalty", "1.0"},
- "frequency_penalty 1.0": {"frequency_penalty", "1.0"},
- "mirostat 1": {"mirostat", "1"},
- "mirostat_tau 1.0": {"mirostat_tau", "1.0"},
- "mirostat_eta 1.0": {"mirostat_eta", "1.0"},
- "penalize_newline true": {"penalize_newline", "true"},
- "stop ### User:": {"stop", "### User:"},
- "stop ### User: ": {"stop", "### User:"},
- "stop \"### User:\"": {"stop", "### User:"},
- "stop \"### User: \"": {"stop", "### User: "},
- "stop \"\"\"### User:\"\"\"": {"stop", "### User:"},
- "stop \"\"\"### User:\n\"\"\"": {"stop", "### User:\n"},
- "stop <|endoftext|>": {"stop", "<|endoftext|>"},
- "stop <|eot_id|>": {"stop", "<|eot_id|>"},
- "stop </s>": {"stop", "</s>"},
- }
- for k, v := range cases {
- t.Run(k, func(t *testing.T) {
- var b bytes.Buffer
- fmt.Fprintln(&b, "FROM foo")
- fmt.Fprintln(&b, "PARAMETER", k)
- modelfile, err := ParseFile(&b)
- require.NoError(t, err)
- assert.Equal(t, []Command{
- {Name: "model", Args: "foo"},
- {Name: v.name, Args: v.value},
- }, modelfile.Commands)
- })
- }
- }
- func TestParseFileComments(t *testing.T) {
- var cases = []struct {
- input string
- expected []Command
- }{
- {
- `
- # comment
- FROM foo
- `,
- []Command{
- {Name: "model", Args: "foo"},
- },
- },
- }
- for _, c := range cases {
- t.Run("", func(t *testing.T) {
- modelfile, err := ParseFile(strings.NewReader(c.input))
- require.NoError(t, err)
- assert.Equal(t, c.expected, modelfile.Commands)
- })
- }
- }
- func TestParseFileFormatParseFile(t *testing.T) {
- var cases = []string{
- `
- FROM foo
- ADAPTER adapter1
- LICENSE MIT
- PARAMETER param1 value1
- PARAMETER param2 value2
- TEMPLATE template1
- MESSAGE system You are a file parser. Always parse things.
- MESSAGE user Hey there!
- MESSAGE assistant Hello, I want to parse all the things!
- `,
- `
- FROM foo
- ADAPTER adapter1
- LICENSE MIT
- PARAMETER param1 value1
- PARAMETER param2 value2
- TEMPLATE template1
- MESSAGE system """
- You are a store greeter. Always responsed with "Hello!".
- """
- MESSAGE user Hey there!
- MESSAGE assistant Hello, I want to parse all the things!
- `,
- `
- FROM foo
- ADAPTER adapter1
- LICENSE """
- Very long and boring legal text.
- Blah blah blah.
- "Oh look, a quote!"
- """
- PARAMETER param1 value1
- PARAMETER param2 value2
- TEMPLATE template1
- MESSAGE system """
- You are a store greeter. Always responsed with "Hello!".
- """
- MESSAGE user Hey there!
- MESSAGE assistant Hello, I want to parse all the things!
- `,
- `
- FROM foo
- SYSTEM ""
- `,
- }
- for _, c := range cases {
- t.Run("", func(t *testing.T) {
- modelfile, err := ParseFile(strings.NewReader(c))
- require.NoError(t, err)
- modelfile2, err := ParseFile(strings.NewReader(modelfile.String()))
- require.NoError(t, err)
- assert.Equal(t, modelfile, modelfile2)
- })
- }
- }
- func TestParseFileUTF16ParseFile(t *testing.T) {
- data := `FROM bob
- PARAMETER param1 1
- PARAMETER param2 4096
- SYSTEM You are a utf16 file.
- `
- expected := []Command{
- {Name: "model", Args: "bob"},
- {Name: "param1", Args: "1"},
- {Name: "param2", Args: "4096"},
- {Name: "system", Args: "You are a utf16 file."},
- }
- t.Run("le", func(t *testing.T) {
- var b bytes.Buffer
- require.NoError(t, binary.Write(&b, binary.LittleEndian, []byte{0xff, 0xfe}))
- require.NoError(t, binary.Write(&b, binary.LittleEndian, utf16.Encode([]rune(data))))
- actual, err := ParseFile(&b)
- require.NoError(t, err)
- assert.Equal(t, expected, actual.Commands)
- })
- t.Run("be", func(t *testing.T) {
- var b bytes.Buffer
- require.NoError(t, binary.Write(&b, binary.BigEndian, []byte{0xfe, 0xff}))
- require.NoError(t, binary.Write(&b, binary.BigEndian, utf16.Encode([]rune(data))))
- actual, err := ParseFile(&b)
- require.NoError(t, err)
- assert.Equal(t, expected, actual.Commands)
- })
- }
- func TestParseMultiByte(t *testing.T) {
- input := `FROM test
- SYSTEM 你好👋`
- expect := []Command{
- {Name: "model", Args: "test"},
- {Name: "system", Args: "你好👋"},
- }
- encodings := []encoding.Encoding{
- unicode.UTF8,
- unicode.UTF16(unicode.LittleEndian, unicode.UseBOM),
- unicode.UTF16(unicode.BigEndian, unicode.UseBOM),
- }
- for _, encoding := range encodings {
- t.Run(fmt.Sprintf("%s", encoding), func(t *testing.T) {
- s, err := encoding.NewEncoder().String(input)
- require.NoError(t, err)
- actual, err := ParseFile(strings.NewReader(s))
- require.NoError(t, err)
- assert.Equal(t, expect, actual.Commands)
- })
- }
- }
|