123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288 |
- package convert
- import (
- "encoding/binary"
- "encoding/json"
- "fmt"
- "io"
- "log/slog"
- "os"
- "path/filepath"
- "regexp"
- "strings"
- "github.com/nlpodyssey/gopickle/pytorch"
- "github.com/nlpodyssey/gopickle/types"
- "github.com/x448/float16"
- "github.com/ollama/ollama/llm"
- )
- type torchWriterTo struct {
- t *llm.Tensor
- params *Params
- bo ByteOrder
- storage pytorch.StorageInterface
- repacker func(string, []float32, []uint64) ([]float32, error)
- }
- type TorchFormat struct{}
- func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
- slog.Debug("getting torch tensors")
- var files []string
- if pt, _ := filepath.Glob(filepath.Join(dirpath, "consolidated*.pth")); len(pt) > 0 {
- files = append(files, pt...)
- } else if pt, _ := filepath.Glob(filepath.Join(dirpath, "pytorch_model*.pth")); len(pt) > 0 {
- files = append(files, pt...)
- }
- var offset uint64
- var tensors []llm.Tensor
- for _, fn := range files {
- m, err := pytorch.Load(fn)
- if err != nil {
- slog.Error(fmt.Sprintf("error unpickling: %q", err))
- return []llm.Tensor{}, err
- }
- for _, k := range m.(*types.Dict).Keys() {
- if strings.HasSuffix(k.(string), "self_attn.rotary_emb.inv_freq") {
- continue
- }
- t, _ := m.(*types.Dict).Get(k)
- tshape := t.(*pytorch.Tensor).Size
- var size uint64
- var kind uint32
- switch len(tshape) {
- case 0:
- continue
- case 1:
- // convert to float32
- kind = 0
- size = uint64(tshape[0] * 4)
- case 2:
- // convert to float16
- kind = 1
- size = uint64(tshape[0] * tshape[1] * 2)
- }
- ggufName, err := tf.GetLayerName(k.(string))
- if err != nil {
- slog.Error(err.Error())
- return nil, err
- }
- slog.Debug(fmt.Sprintf("'%35s': '%30s' %10d [%#v]", k.(string), ggufName, size, tshape))
- shape := []uint64{0, 0, 0, 0}
- for i := range tshape {
- shape[i] = uint64(tshape[i])
- }
- tensor := llm.Tensor{
- Name: ggufName,
- Kind: kind,
- Offset: offset, // calculate the offset
- Shape: shape,
- }
- tensor.WriterTo = torchWriterTo{
- t: &tensor,
- params: params,
- bo: params.ByteOrder,
- storage: t.(*pytorch.Tensor).Source,
- }
- tensors = append(tensors, tensor)
- offset += size
- }
- }
- return tensors, nil
- }
- func getAltParams(dirpath string) (*Params, error) {
- f, err := os.Open(filepath.Join(dirpath, "params.json"))
- if err != nil {
- slog.Error("no params.json")
- return nil, err
- }
- defer f.Close()
- type TorchParams struct {
- HiddenSize int `json:"dim"`
- AttentionHeads int `json:"n_heads"`
- KeyValHeads int `json:"n_kv_heads"`
- HiddenLayers int `json:"n_layers"`
- RopeTheta float64 `json:"rope_theta"`
- NormEPS float64 `json:"norm_eps"`
- }
- var tparams TorchParams
- d := json.NewDecoder(f)
- err = d.Decode(&tparams)
- if err != nil {
- return nil, err
- }
- params := &Params{
- Architectures: []string{"LlamaForCausalLM"},
- HiddenSize: tparams.HiddenSize,
- AttentionHeads: tparams.AttentionHeads,
- KeyValHeads: tparams.KeyValHeads,
- HiddenLayers: tparams.HiddenLayers,
- NormEPS: tparams.NormEPS,
- }
- switch {
- case tparams.RopeTheta == 1000000:
- // Codellama
- params.ContextSize = 16384
- case tparams.NormEPS == 1e-06:
- // llama2
- slog.Debug("Found llama2 - setting context size to 4096")
- params.ContextSize = 4096
- default:
- params.ContextSize = 2048
- }
- params.ByteOrder = binary.LittleEndian
- return params, nil
- }
- func (m *TorchFormat) GetParams(dirpath string) (*Params, error) {
- f, err := os.Open(filepath.Join(dirpath, "config.json"))
- if err != nil {
- if os.IsNotExist(err) {
- // try params.json instead
- return getAltParams(dirpath)
- } else {
- return nil, err
- }
- }
- var params Params
- d := json.NewDecoder(f)
- err = d.Decode(¶ms)
- if err != nil {
- return nil, err
- }
- params.ByteOrder = binary.LittleEndian
- return ¶ms, nil
- }
- func (m *TorchFormat) GetLayerName(n string) (string, error) {
- directMap := map[string]string{
- "tok_embeddings.weight": "token_embd.weight",
- "output.weight": "output.weight",
- "norm.weight": "output_norm.weight",
- "rope.freqs": "rope_freqs.weight",
- "model.embed_tokens.weight": "token_embd.weight",
- "lm_head.weight": "output.weight",
- "model.norm.weight": "output_norm.weight",
- }
- lMap := map[string]string{
- "layers.(\\d+).attention_norm.weight": "blk.$1.attn_norm.weight",
- "layers.(\\d+).attention_output_norm.weight": "blk.$1.attn_norm.weight",
- "layers.(\\d+).feed_forward.w2.weight": "blk.$1.ffn_down.weight",
- "layers.(\\d+).feed_forward.w1.weight": "blk.$1.ffn_gate.weight",
- "layers.(\\d+).feed_forward.w3.weight": "blk.$1.ffn_up.weight",
- "layers.(\\d+).ffn_norm.weight": "blk.$1.ffn_norm.weight",
- "layers.(\\d+).attention.wk.weight": "blk.$1.attn_k.weight",
- "layers.(\\d+).attention.wo.weight": "blk.$1.attn_output.weight",
- "layers.(\\d+).attention.wq.weight": "blk.$1.attn_q.weight",
- "layers.(\\d+).attention.wv.weight": "blk.$1.attn_v.weight",
- "model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight",
- "model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight",
- "model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight",
- "model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight",
- "model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
- "model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight",
- "model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight",
- "model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight",
- "model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight",
- }
- v, ok := directMap[n]
- if ok {
- return v, nil
- }
- // quick hack to rename the layers to gguf format
- for k, v := range lMap {
- re := regexp.MustCompile(k)
- newName := re.ReplaceAllString(n, v)
- if newName != n {
- return newName, nil
- }
- }
- return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
- }
- func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) {
- var f32s []float32
- switch s := r.storage.(type) {
- case *pytorch.FloatStorage:
- f32s = s.Data
- case *pytorch.HalfStorage:
- f32s = s.Data
- case *pytorch.BFloat16Storage:
- f32s = s.Data
- default:
- return 0, fmt.Errorf("unknown data type: %T", s)
- }
- if r.repacker != nil {
- f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape)
- if err != nil {
- return 0, err
- }
- }
- switch r.t.Kind {
- case 0:
- return 0, binary.Write(w, r.bo, f32s)
- case 1:
- f16s := make([]uint16, len(f32s))
- for i := range f32s {
- f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
- }
- return 0, binary.Write(w, r.bo, f16s)
- default:
- return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind)
- }
- }
- func (m *TorchFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
- switch len(params.Architectures) {
- case 0:
- return nil, fmt.Errorf("No architecture specified to convert")
- case 1:
- switch params.Architectures[0] {
- case "LlamaForCausalLM":
- return &LlamaModel{
- ModelData{
- Name: name,
- Path: dirPath,
- Params: params,
- Format: m,
- },
- }, nil
- default:
- return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
- }
- }
- return nil, fmt.Errorf("Unknown error")
- }
|