123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374 |
- package envconfig
- import (
- "errors"
- "fmt"
- "log/slog"
- "math"
- "net"
- "os"
- "path/filepath"
- "runtime"
- "strconv"
- "strings"
- "time"
- )
- type OllamaHost struct {
- Scheme string
- Host string
- Port string
- }
- func (o OllamaHost) String() string {
- return fmt.Sprintf("%s://%s:%s", o.Scheme, o.Host, o.Port)
- }
- var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST")
- var (
- // Set via OLLAMA_ORIGINS in the environment
- AllowOrigins []string
- // Set via OLLAMA_DEBUG in the environment
- Debug bool
- // Experimental flash attention
- FlashAttention bool
- // Set via OLLAMA_HOST in the environment
- Host *OllamaHost
- // Set via OLLAMA_KEEP_ALIVE in the environment
- KeepAlive time.Duration
- // Set via OLLAMA_LLM_LIBRARY in the environment
- LLMLibrary string
- // Set via OLLAMA_MAX_LOADED_MODELS in the environment
- MaxRunners int
- // Set via OLLAMA_MAX_QUEUE in the environment
- MaxQueuedRequests int
- // Set via OLLAMA_MAX_VRAM in the environment
- MaxVRAM uint64
- // Set via OLLAMA_MODELS in the environment
- ModelsDir string
- // Set via OLLAMA_NOHISTORY in the environment
- NoHistory bool
- // Set via OLLAMA_NOPRUNE in the environment
- NoPrune bool
- // Set via OLLAMA_NUM_PARALLEL in the environment
- NumParallel int
- // Set via OLLAMA_RUNNERS_DIR in the environment
- RunnersDir string
- // Set via OLLAMA_SCHED_SPREAD in the environment
- SchedSpread bool
- // Set via OLLAMA_TMPDIR in the environment
- TmpDir string
- // Set via OLLAMA_INTEL_GPU in the environment
- IntelGpu bool
- // Set via CUDA_VISIBLE_DEVICES in the environment
- CudaVisibleDevices string
- // Set via HIP_VISIBLE_DEVICES in the environment
- HipVisibleDevices string
- // Set via ROCR_VISIBLE_DEVICES in the environment
- RocrVisibleDevices string
- // Set via GPU_DEVICE_ORDINAL in the environment
- GpuDeviceOrdinal string
- // Set via HSA_OVERRIDE_GFX_VERSION in the environment
- HsaOverrideGfxVersion string
- )
- type EnvVar struct {
- Name string
- Value any
- Description string
- }
- func AsMap() map[string]EnvVar {
- ret := map[string]EnvVar{
- "OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug, "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
- "OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention, "Enabled flash attention"},
- "OLLAMA_HOST": {"OLLAMA_HOST", Host, "IP Address for the ollama server (default 127.0.0.1:11434)"},
- "OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive, "The duration that models stay loaded in memory (default \"5m\")"},
- "OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary, "Set LLM library to bypass autodetection"},
- "OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models per GPU"},
- "OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueuedRequests, "Maximum number of queued requests"},
- "OLLAMA_MAX_VRAM": {"OLLAMA_MAX_VRAM", MaxVRAM, "Maximum VRAM"},
- "OLLAMA_MODELS": {"OLLAMA_MODELS", ModelsDir, "The path to the models directory"},
- "OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory, "Do not preserve readline history"},
- "OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune, "Do not prune model blobs on startup"},
- "OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel, "Maximum number of parallel requests"},
- "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowOrigins, "A comma separated list of allowed origins"},
- "OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir, "Location for runners"},
- "OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread, "Always schedule model across all GPUs"},
- "OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir, "Location for temporary files"},
- }
- if runtime.GOOS != "darwin" {
- ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices, "Set which NVIDIA devices are visible"}
- ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices, "Set which AMD devices are visible"}
- ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices, "Set which AMD devices are visible"}
- ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal, "Set which AMD devices are visible"}
- ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion, "Override the gfx used for all detected AMD GPUs"}
- ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGpu, "Enable experimental Intel GPU detection"}
- }
- return ret
- }
- func Values() map[string]string {
- vals := make(map[string]string)
- for k, v := range AsMap() {
- vals[k] = fmt.Sprintf("%v", v.Value)
- }
- return vals
- }
- var defaultAllowOrigins = []string{
- "localhost",
- "127.0.0.1",
- "0.0.0.0",
- }
- // Clean quotes and spaces from the value
- func clean(key string) string {
- return strings.Trim(os.Getenv(key), "\"' ")
- }
- func init() {
- // default values
- NumParallel = 0 // Autoselect
- MaxRunners = 0 // Autoselect
- MaxQueuedRequests = 512
- KeepAlive = 5 * time.Minute
- LoadConfig()
- }
- func LoadConfig() {
- if debug := clean("OLLAMA_DEBUG"); debug != "" {
- d, err := strconv.ParseBool(debug)
- if err == nil {
- Debug = d
- } else {
- Debug = true
- }
- }
- if fa := clean("OLLAMA_FLASH_ATTENTION"); fa != "" {
- d, err := strconv.ParseBool(fa)
- if err == nil {
- FlashAttention = d
- }
- }
- RunnersDir = clean("OLLAMA_RUNNERS_DIR")
- if runtime.GOOS == "windows" && RunnersDir == "" {
- // On Windows we do not carry the payloads inside the main executable
- appExe, err := os.Executable()
- if err != nil {
- slog.Error("failed to lookup executable path", "error", err)
- }
- cwd, err := os.Getwd()
- if err != nil {
- slog.Error("failed to lookup working directory", "error", err)
- }
- var paths []string
- for _, root := range []string{filepath.Dir(appExe), cwd} {
- paths = append(paths,
- root,
- filepath.Join(root, "windows-"+runtime.GOARCH),
- filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
- )
- }
- // Try a few variations to improve developer experience when building from source in the local tree
- for _, p := range paths {
- candidate := filepath.Join(p, "ollama_runners")
- _, err := os.Stat(candidate)
- if err == nil {
- RunnersDir = candidate
- break
- }
- }
- if RunnersDir == "" {
- slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
- }
- }
- TmpDir = clean("OLLAMA_TMPDIR")
- userLimit := clean("OLLAMA_MAX_VRAM")
- if userLimit != "" {
- avail, err := strconv.ParseUint(userLimit, 10, 64)
- if err != nil {
- slog.Error("invalid setting, ignoring", "OLLAMA_MAX_VRAM", userLimit, "error", err)
- } else {
- MaxVRAM = avail
- }
- }
- LLMLibrary = clean("OLLAMA_LLM_LIBRARY")
- if onp := clean("OLLAMA_NUM_PARALLEL"); onp != "" {
- val, err := strconv.Atoi(onp)
- if err != nil {
- slog.Error("invalid setting, ignoring", "OLLAMA_NUM_PARALLEL", onp, "error", err)
- } else {
- NumParallel = val
- }
- }
- if nohistory := clean("OLLAMA_NOHISTORY"); nohistory != "" {
- NoHistory = true
- }
- if spread := clean("OLLAMA_SCHED_SPREAD"); spread != "" {
- s, err := strconv.ParseBool(spread)
- if err == nil {
- SchedSpread = s
- } else {
- SchedSpread = true
- }
- }
- if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" {
- NoPrune = true
- }
- if origins := clean("OLLAMA_ORIGINS"); origins != "" {
- AllowOrigins = strings.Split(origins, ",")
- }
- for _, allowOrigin := range defaultAllowOrigins {
- AllowOrigins = append(AllowOrigins,
- fmt.Sprintf("http://%s", allowOrigin),
- fmt.Sprintf("https://%s", allowOrigin),
- fmt.Sprintf("http://%s", net.JoinHostPort(allowOrigin, "*")),
- fmt.Sprintf("https://%s", net.JoinHostPort(allowOrigin, "*")),
- )
- }
- AllowOrigins = append(AllowOrigins,
- "app://*",
- "file://*",
- "tauri://*",
- )
- maxRunners := clean("OLLAMA_MAX_LOADED_MODELS")
- if maxRunners != "" {
- m, err := strconv.Atoi(maxRunners)
- if err != nil {
- slog.Error("invalid setting, ignoring", "OLLAMA_MAX_LOADED_MODELS", maxRunners, "error", err)
- } else {
- MaxRunners = m
- }
- }
- if onp := os.Getenv("OLLAMA_MAX_QUEUE"); onp != "" {
- p, err := strconv.Atoi(onp)
- if err != nil || p <= 0 {
- slog.Error("invalid setting, ignoring", "OLLAMA_MAX_QUEUE", onp, "error", err)
- } else {
- MaxQueuedRequests = p
- }
- }
- ka := clean("OLLAMA_KEEP_ALIVE")
- if ka != "" {
- loadKeepAlive(ka)
- }
- var err error
- ModelsDir, err = getModelsDir()
- if err != nil {
- slog.Error("invalid setting", "OLLAMA_MODELS", ModelsDir, "error", err)
- }
- Host, err = getOllamaHost()
- if err != nil {
- slog.Error("invalid setting", "OLLAMA_HOST", Host, "error", err, "using default port", Host.Port)
- }
- if set, err := strconv.ParseBool(clean("OLLAMA_INTEL_GPU")); err == nil {
- IntelGpu = set
- }
- CudaVisibleDevices = clean("CUDA_VISIBLE_DEVICES")
- HipVisibleDevices = clean("HIP_VISIBLE_DEVICES")
- RocrVisibleDevices = clean("ROCR_VISIBLE_DEVICES")
- GpuDeviceOrdinal = clean("GPU_DEVICE_ORDINAL")
- HsaOverrideGfxVersion = clean("HSA_OVERRIDE_GFX_VERSION")
- }
- func getModelsDir() (string, error) {
- if models, exists := os.LookupEnv("OLLAMA_MODELS"); exists {
- return models, nil
- }
- home, err := os.UserHomeDir()
- if err != nil {
- return "", err
- }
- return filepath.Join(home, ".ollama", "models"), nil
- }
- func getOllamaHost() (*OllamaHost, error) {
- defaultPort := "11434"
- hostVar := os.Getenv("OLLAMA_HOST")
- hostVar = strings.TrimSpace(strings.Trim(strings.TrimSpace(hostVar), "\"'"))
- scheme, hostport, ok := strings.Cut(hostVar, "://")
- switch {
- case !ok:
- scheme, hostport = "http", hostVar
- case scheme == "http":
- defaultPort = "80"
- case scheme == "https":
- defaultPort = "443"
- }
- // trim trailing slashes
- hostport = strings.TrimRight(hostport, "/")
- host, port, err := net.SplitHostPort(hostport)
- if err != nil {
- host, port = "127.0.0.1", defaultPort
- if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
- host = ip.String()
- } else if hostport != "" {
- host = hostport
- }
- }
- if portNum, err := strconv.ParseInt(port, 10, 32); err != nil || portNum > 65535 || portNum < 0 {
- return &OllamaHost{
- Scheme: scheme,
- Host: host,
- Port: defaultPort,
- }, ErrInvalidHostPort
- }
- return &OllamaHost{
- Scheme: scheme,
- Host: host,
- Port: port,
- }, nil
- }
- func loadKeepAlive(ka string) {
- v, err := strconv.Atoi(ka)
- if err != nil {
- d, err := time.ParseDuration(ka)
- if err == nil {
- if d < 0 {
- KeepAlive = time.Duration(math.MaxInt64)
- } else {
- KeepAlive = d
- }
- }
- } else {
- d := time.Duration(v) * time.Second
- if d < 0 {
- KeepAlive = time.Duration(math.MaxInt64)
- } else {
- KeepAlive = d
- }
- }
- }
|