123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310 |
- package convert
- import (
- "bytes"
- "encoding/binary"
- "encoding/json"
- "fmt"
- "io"
- "os"
- "path/filepath"
- "regexp"
- "slices"
- "strings"
- "github.com/d4l3k/go-bfloat16"
- "github.com/x448/float16"
- "github.com/ollama/ollama/llm"
- )
- type safetensorWriterTo struct {
- t *llm.Tensor
- params *Params
- bo ByteOrder
- filename string
- dtype string
- offset, size int64
- repacker func(string, []float32, []uint64) ([]float32, error)
- }
- type safetensorMetadata struct {
- Type string `json:"dtype"`
- Shape []uint64 `json:"shape"`
- Offsets []int64 `json:"data_offsets"`
- }
- type SafetensorFormat struct{}
- func (m *SafetensorFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
- var tensors []llm.Tensor
- matches, err := filepath.Glob(filepath.Join(dirpath, "*.safetensors"))
- if err != nil {
- return nil, err
- }
- var offset uint64
- for _, f := range matches {
- var t []llm.Tensor
- var err error
- t, offset, err = m.readTensors(f, offset, params)
- if err != nil {
- return nil, err
- }
- tensors = append(tensors, t...)
- }
- return tensors, nil
- }
- func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params) ([]llm.Tensor, uint64, error) {
- f, err := os.Open(fn)
- if err != nil {
- return nil, 0, err
- }
- defer f.Close()
- var n int64
- if err := binary.Read(f, binary.LittleEndian, &n); err != nil {
- return nil, 0, err
- }
- b := bytes.NewBuffer(make([]byte, 0, n))
- if _, err = io.CopyN(b, f, n); err != nil {
- return nil, 0, err
- }
- var headers map[string]safetensorMetadata
- if err := json.NewDecoder(b).Decode(&headers); err != nil {
- return nil, 0, err
- }
- var keys []string
- for key := range headers {
- if !strings.HasSuffix(key, "self_attn.rotary_embd.inv_freq") {
- keys = append(keys, key)
- }
- }
- slices.Sort(keys)
- var tensors []llm.Tensor
- for _, key := range keys {
- value := headers[key]
- var kind uint32
- switch len(value.Shape) {
- case 0:
- // valuedata
- continue
- case 2:
- kind = 1
- }
- name, err := m.GetLayerName(key)
- if err != nil {
- return nil, 0, err
- }
- shape := make([]uint64, len(value.Shape))
- copy(shape, value.Shape)
- pad := func(s int64) int64 {
- return 8 + n + s
- }
- t := llm.Tensor{
- Name: name,
- Kind: kind,
- Offset: offset,
- Shape: shape,
- }
- t.WriterTo = safetensorWriterTo{
- t: &t,
- params: params,
- bo: params.ByteOrder,
- filename: fn,
- dtype: value.Type,
- offset: pad(value.Offsets[0]),
- size: pad(value.Offsets[1]) - pad(value.Offsets[0]),
- }
- offset += t.Size()
- tensors = append(tensors, t)
- }
- return tensors, offset, nil
- }
- func (m *SafetensorFormat) GetParams(dirpath string) (*Params, error) {
- f, err := os.Open(filepath.Join(dirpath, "config.json"))
- if err != nil {
- return nil, err
- }
- defer f.Close()
- var params Params
- if err := json.NewDecoder(f).Decode(¶ms); err != nil {
- return nil, err
- }
- params.ByteOrder = binary.LittleEndian
- return ¶ms, nil
- }
- func (m *SafetensorFormat) GetLayerName(n string) (string, error) {
- directMap := map[string]string{
- "model.embed_tokens.weight": "token_embd.weight",
- "lm_head.weight": "output.weight",
- "model.norm.weight": "output_norm.weight",
- }
- tMap := map[string]string{
- "model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight",
- "model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight",
- "model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight",
- "model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight",
- "model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
- "model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight",
- "model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight",
- "model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight",
- "model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight",
- "model.layers.(\\d+).block_sparse_moe.gate.weight": "blk.$1.ffn_gate_inp.weight",
- "model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w1.weight": "blk.$1.ffn_gate.$2.weight",
- "model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w2.weight": "blk.$1.ffn_down.$2.weight",
- "model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w3.weight": "blk.$1.ffn_up.$2.weight",
- }
- v, ok := directMap[n]
- if ok {
- return v, nil
- }
- // quick hack to rename the layers to gguf format
- for k, v := range tMap {
- re := regexp.MustCompile(k)
- newName := re.ReplaceAllString(n, v)
- if newName != n {
- return newName, nil
- }
- }
- return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
- }
- func (r safetensorWriterTo) WriteTo(w io.Writer) (n int64, err error) {
- f, err := os.Open(r.filename)
- if err != nil {
- return 0, err
- }
- defer f.Close()
- if _, err = f.Seek(r.offset, io.SeekStart); err != nil {
- return 0, err
- }
- var f32s []float32
- switch r.dtype {
- case "F32":
- f32s = make([]float32, r.size/4)
- if err = binary.Read(f, r.bo, f32s); err != nil {
- return 0, err
- }
- case "F16":
- u16s := make([]uint16, r.size/2)
- if err = binary.Read(f, r.bo, u16s); err != nil {
- return 0, err
- }
- for _, b := range u16s {
- f32s = append(f32s, float16.Frombits(b).Float32())
- }
- case "BF16":
- u8s := make([]uint8, r.size)
- if err = binary.Read(f, r.bo, u8s); err != nil {
- return 0, err
- }
- f32s = bfloat16.DecodeFloat32(u8s)
- default:
- return 0, fmt.Errorf("unknown data type: %s", r.dtype)
- }
- if r.repacker != nil {
- f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape)
- if err != nil {
- return 0, err
- }
- }
- switch r.t.Kind {
- case 0:
- return 0, binary.Write(w, r.bo, f32s)
- case 1:
- f16s := make([]uint16, len(f32s))
- for i := range f32s {
- f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
- }
- return 0, binary.Write(w, r.bo, f16s)
- default:
- return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind)
- }
- }
- func (m *SafetensorFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
- switch len(params.Architectures) {
- case 0:
- return nil, fmt.Errorf("No architecture specified to convert")
- case 1:
- switch params.Architectures[0] {
- case "LlamaForCausalLM":
- return &LlamaModel{
- ModelData{
- Name: name,
- Path: dirPath,
- Params: params,
- Format: m,
- },
- }, nil
- case "MistralForCausalLM":
- return &MistralModel{
- ModelData{
- Name: name,
- Path: dirPath,
- Params: params,
- Format: m,
- },
- }, nil
- case "MixtralForCausalLM":
- return &MixtralModel{
- ModelData{
- Name: name,
- Path: dirPath,
- Params: params,
- Format: m,
- },
- }, nil
- case "GemmaForCausalLM":
- return &GemmaModel{
- ModelData{
- Name: name,
- Path: dirPath,
- Params: params,
- Format: m,
- },
- }, nil
- default:
- return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
- }
- }
- return nil, fmt.Errorf("Unknown error")
- }
|