123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234 |
- package llm
- import (
- "compress/gzip"
- "errors"
- "fmt"
- "io"
- "io/fs"
- "log/slog"
- "os"
- "path/filepath"
- "runtime"
- "slices"
- "strings"
- "golang.org/x/sync/errgroup"
- "github.com/ollama/ollama/gpu"
- )
- var errPayloadMissing = errors.New("expected payloads not included in this build of ollama")
- func Init() error {
- payloadsDir, err := gpu.PayloadsDir()
- if err != nil {
- return err
- }
- if runtime.GOOS != "windows" {
- slog.Info("extracting embedded files", "dir", payloadsDir)
- binGlob := "build/*/*/*/bin/*"
- // extract server libraries
- err = extractFiles(payloadsDir, binGlob)
- if err != nil {
- return fmt.Errorf("extract binaries: %v", err)
- }
- }
- var variants []string
- for v := range getAvailableServers() {
- variants = append(variants, v)
- }
- slog.Info(fmt.Sprintf("Dynamic LLM libraries %v", variants))
- slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
- return nil
- }
- // binary names may contain an optional variant separated by '_'
- // For example, "ollama_rocm_v6" and "ollama_rocm_v5" or "ollama_cpu" and "ollama_cpu_avx2"
- // Any library without a variant is the lowest common denominator
- func getAvailableServers() map[string]string {
- payloadsDir, err := gpu.PayloadsDir()
- if err != nil {
- slog.Error("payload lookup error", "error", err)
- return nil
- }
- // glob payloadsDir for files that start with ollama_
- pattern := filepath.Join(payloadsDir, "*", "ollama_*")
- files, err := filepath.Glob(pattern)
- if err != nil {
- slog.Debug("could not glob", "pattern", pattern, "error", err)
- return nil
- }
- servers := make(map[string]string)
- for _, file := range files {
- slog.Debug("availableServers : found", "file", file)
- servers[filepath.Base(filepath.Dir(file))] = filepath.Dir(file)
- }
- return servers
- }
- // serversForGpu returns a list of compatible servers give the provided GPU
- // info, ordered by performance. assumes Init() has been called
- // TODO - switch to metadata based mapping
- func serversForGpu(info gpu.GpuInfo) []string {
- // glob workDir for files that start with ollama_
- availableServers := getAvailableServers()
- requested := info.Library
- if info.Variant != gpu.CPUCapabilityNone {
- requested += "_" + info.Variant.String()
- }
- servers := []string{}
- // exact match first
- for a := range availableServers {
- if a == requested {
- servers = []string{a}
- if a == "metal" {
- return servers
- }
- break
- }
- }
- alt := []string{}
- // Then for GPUs load alternates and sort the list for consistent load ordering
- if info.Library != "cpu" {
- for a := range availableServers {
- if info.Library == strings.Split(a, "_")[0] && a != requested {
- alt = append(alt, a)
- }
- }
- slices.Sort(alt)
- servers = append(servers, alt...)
- }
- if !(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64") {
- // Load up the best CPU variant if not primary requested
- if info.Library != "cpu" {
- variant := gpu.GetCPUCapability()
- // If no variant, then we fall back to default
- // If we have a variant, try that if we find an exact match
- // Attempting to run the wrong CPU instructions will panic the
- // process
- if variant != gpu.CPUCapabilityNone {
- for cmp := range availableServers {
- if cmp == "cpu_"+variant.String() {
- servers = append(servers, cmp)
- break
- }
- }
- } else {
- servers = append(servers, "cpu")
- }
- }
- if len(servers) == 0 {
- servers = []string{"cpu"}
- }
- }
- return servers
- }
- // Return the optimal server for this CPU architecture
- func serverForCpu() string {
- if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
- return "metal"
- }
- variant := gpu.GetCPUCapability()
- availableServers := getAvailableServers()
- if variant != gpu.CPUCapabilityNone {
- for cmp := range availableServers {
- if cmp == "cpu_"+variant.String() {
- return cmp
- }
- }
- }
- return "cpu"
- }
- // extract extracts the embedded files to the target directory
- func extractFiles(targetDir string, glob string) error {
- files, err := fs.Glob(libEmbed, glob)
- if err != nil || len(files) == 0 {
- return errPayloadMissing
- }
- if err := os.MkdirAll(targetDir, 0o755); err != nil {
- return fmt.Errorf("extractFiles could not mkdir %s: %v", targetDir, err)
- }
- g := new(errgroup.Group)
- // build/$OS/$GOARCH/$VARIANT/{bin,lib}/$FILE
- for _, file := range files {
- filename := file
- variant := filepath.Base(filepath.Dir(filepath.Dir(filename)))
- slog.Debug("extracting", "variant", variant, "file", filename)
- g.Go(func() error {
- srcf, err := libEmbed.Open(filename)
- if err != nil {
- return err
- }
- defer srcf.Close()
- src := io.Reader(srcf)
- if strings.HasSuffix(filename, ".gz") {
- src, err = gzip.NewReader(src)
- if err != nil {
- return fmt.Errorf("decompress payload %s: %v", filename, err)
- }
- filename = strings.TrimSuffix(filename, ".gz")
- }
- variantDir := filepath.Join(targetDir, variant)
- if err := os.MkdirAll(variantDir, 0o755); err != nil {
- return fmt.Errorf("extractFiles could not mkdir %s: %v", variantDir, err)
- }
- base := filepath.Base(filename)
- destFilename := filepath.Join(variantDir, base)
- _, err = os.Stat(destFilename)
- switch {
- case errors.Is(err, os.ErrNotExist):
- destFile, err := os.OpenFile(destFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
- if err != nil {
- return fmt.Errorf("write payload %s: %v", filename, err)
- }
- defer destFile.Close()
- if _, err := io.Copy(destFile, src); err != nil {
- return fmt.Errorf("copy payload %s: %v", filename, err)
- }
- case err != nil:
- return fmt.Errorf("stat payload %s: %v", filename, err)
- }
- return nil
- })
- }
- err = g.Wait()
- if err != nil {
- // If we fail to extract, the payload dir is unusable, so cleanup whatever we extracted
- gpu.Cleanup()
- return err
- }
- return nil
- }
|