config.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. package envconfig
  2. import (
  3. "errors"
  4. "fmt"
  5. "log/slog"
  6. "math"
  7. "net"
  8. "os"
  9. "path/filepath"
  10. "runtime"
  11. "strconv"
  12. "strings"
  13. "time"
  14. )
  15. type OllamaHost struct {
  16. Scheme string
  17. Host string
  18. Port string
  19. }
  20. func (o OllamaHost) String() string {
  21. return fmt.Sprintf("%s://%s:%s", o.Scheme, o.Host, o.Port)
  22. }
  23. var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST")
  24. var (
  25. // Set via OLLAMA_ORIGINS in the environment
  26. AllowOrigins []string
  27. // Set via OLLAMA_DEBUG in the environment
  28. Debug bool
  29. // Experimental flash attention
  30. FlashAttention bool
  31. // Set via OLLAMA_HOST in the environment
  32. Host *OllamaHost
  33. // Set via OLLAMA_KEEP_ALIVE in the environment
  34. KeepAlive time.Duration
  35. // Set via OLLAMA_LLM_LIBRARY in the environment
  36. LLMLibrary string
  37. // Set via OLLAMA_MAX_LOADED_MODELS in the environment
  38. MaxRunners int
  39. // Set via OLLAMA_MAX_QUEUE in the environment
  40. MaxQueuedRequests int
  41. // Set via OLLAMA_MAX_VRAM in the environment
  42. MaxVRAM uint64
  43. // Set via OLLAMA_MODELS in the environment
  44. ModelsDir string
  45. // Set via OLLAMA_NOHISTORY in the environment
  46. NoHistory bool
  47. // Set via OLLAMA_NOPRUNE in the environment
  48. NoPrune bool
  49. // Set via OLLAMA_NUM_PARALLEL in the environment
  50. NumParallel int
  51. // Set via OLLAMA_RUNNERS_DIR in the environment
  52. RunnersDir string
  53. // Set via OLLAMA_SCHED_SPREAD in the environment
  54. SchedSpread bool
  55. // Set via OLLAMA_TMPDIR in the environment
  56. TmpDir string
  57. // Set via OLLAMA_INTEL_GPU in the environment
  58. IntelGpu bool
  59. // Set via CUDA_VISIBLE_DEVICES in the environment
  60. CudaVisibleDevices string
  61. // Set via HIP_VISIBLE_DEVICES in the environment
  62. HipVisibleDevices string
  63. // Set via ROCR_VISIBLE_DEVICES in the environment
  64. RocrVisibleDevices string
  65. // Set via GPU_DEVICE_ORDINAL in the environment
  66. GpuDeviceOrdinal string
  67. // Set via HSA_OVERRIDE_GFX_VERSION in the environment
  68. HsaOverrideGfxVersion string
  69. )
  70. type EnvVar struct {
  71. Name string
  72. Value any
  73. Description string
  74. }
  75. func AsMap() map[string]EnvVar {
  76. ret := map[string]EnvVar{
  77. "OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug, "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
  78. "OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention, "Enabled flash attention"},
  79. "OLLAMA_HOST": {"OLLAMA_HOST", Host, "IP Address for the ollama server (default 127.0.0.1:11434)"},
  80. "OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive, "The duration that models stay loaded in memory (default \"5m\")"},
  81. "OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary, "Set LLM library to bypass autodetection"},
  82. "OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models per GPU"},
  83. "OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueuedRequests, "Maximum number of queued requests"},
  84. "OLLAMA_MAX_VRAM": {"OLLAMA_MAX_VRAM", MaxVRAM, "Maximum VRAM"},
  85. "OLLAMA_MODELS": {"OLLAMA_MODELS", ModelsDir, "The path to the models directory"},
  86. "OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory, "Do not preserve readline history"},
  87. "OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune, "Do not prune model blobs on startup"},
  88. "OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel, "Maximum number of parallel requests"},
  89. "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowOrigins, "A comma separated list of allowed origins"},
  90. "OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir, "Location for runners"},
  91. "OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread, "Always schedule model across all GPUs"},
  92. "OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir, "Location for temporary files"},
  93. }
  94. if runtime.GOOS != "darwin" {
  95. ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices, "Set which NVIDIA devices are visible"}
  96. ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices, "Set which AMD devices are visible"}
  97. ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices, "Set which AMD devices are visible"}
  98. ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal, "Set which AMD devices are visible"}
  99. ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion, "Override the gfx used for all detected AMD GPUs"}
  100. ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGpu, "Enable experimental Intel GPU detection"}
  101. }
  102. return ret
  103. }
  104. func Values() map[string]string {
  105. vals := make(map[string]string)
  106. for k, v := range AsMap() {
  107. vals[k] = fmt.Sprintf("%v", v.Value)
  108. }
  109. return vals
  110. }
  111. var defaultAllowOrigins = []string{
  112. "localhost",
  113. "127.0.0.1",
  114. "0.0.0.0",
  115. }
  116. // Clean quotes and spaces from the value
  117. func clean(key string) string {
  118. return strings.Trim(os.Getenv(key), "\"' ")
  119. }
  120. func init() {
  121. // default values
  122. NumParallel = 0 // Autoselect
  123. MaxRunners = 0 // Autoselect
  124. MaxQueuedRequests = 512
  125. KeepAlive = 5 * time.Minute
  126. LoadConfig()
  127. }
  128. func LoadConfig() {
  129. if debug := clean("OLLAMA_DEBUG"); debug != "" {
  130. d, err := strconv.ParseBool(debug)
  131. if err == nil {
  132. Debug = d
  133. } else {
  134. Debug = true
  135. }
  136. }
  137. if fa := clean("OLLAMA_FLASH_ATTENTION"); fa != "" {
  138. d, err := strconv.ParseBool(fa)
  139. if err == nil {
  140. FlashAttention = d
  141. }
  142. }
  143. RunnersDir = clean("OLLAMA_RUNNERS_DIR")
  144. if runtime.GOOS == "windows" && RunnersDir == "" {
  145. // On Windows we do not carry the payloads inside the main executable
  146. appExe, err := os.Executable()
  147. if err != nil {
  148. slog.Error("failed to lookup executable path", "error", err)
  149. }
  150. cwd, err := os.Getwd()
  151. if err != nil {
  152. slog.Error("failed to lookup working directory", "error", err)
  153. }
  154. var paths []string
  155. for _, root := range []string{filepath.Dir(appExe), cwd} {
  156. paths = append(paths,
  157. root,
  158. filepath.Join(root, "windows-"+runtime.GOARCH),
  159. filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
  160. )
  161. }
  162. // Try a few variations to improve developer experience when building from source in the local tree
  163. for _, p := range paths {
  164. candidate := filepath.Join(p, "ollama_runners")
  165. _, err := os.Stat(candidate)
  166. if err == nil {
  167. RunnersDir = candidate
  168. break
  169. }
  170. }
  171. if RunnersDir == "" {
  172. slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
  173. }
  174. }
  175. TmpDir = clean("OLLAMA_TMPDIR")
  176. userLimit := clean("OLLAMA_MAX_VRAM")
  177. if userLimit != "" {
  178. avail, err := strconv.ParseUint(userLimit, 10, 64)
  179. if err != nil {
  180. slog.Error("invalid setting, ignoring", "OLLAMA_MAX_VRAM", userLimit, "error", err)
  181. } else {
  182. MaxVRAM = avail
  183. }
  184. }
  185. LLMLibrary = clean("OLLAMA_LLM_LIBRARY")
  186. if onp := clean("OLLAMA_NUM_PARALLEL"); onp != "" {
  187. val, err := strconv.Atoi(onp)
  188. if err != nil {
  189. slog.Error("invalid setting, ignoring", "OLLAMA_NUM_PARALLEL", onp, "error", err)
  190. } else {
  191. NumParallel = val
  192. }
  193. }
  194. if nohistory := clean("OLLAMA_NOHISTORY"); nohistory != "" {
  195. NoHistory = true
  196. }
  197. if spread := clean("OLLAMA_SCHED_SPREAD"); spread != "" {
  198. s, err := strconv.ParseBool(spread)
  199. if err == nil {
  200. SchedSpread = s
  201. } else {
  202. SchedSpread = true
  203. }
  204. }
  205. if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" {
  206. NoPrune = true
  207. }
  208. if origins := clean("OLLAMA_ORIGINS"); origins != "" {
  209. AllowOrigins = strings.Split(origins, ",")
  210. }
  211. for _, allowOrigin := range defaultAllowOrigins {
  212. AllowOrigins = append(AllowOrigins,
  213. fmt.Sprintf("http://%s", allowOrigin),
  214. fmt.Sprintf("https://%s", allowOrigin),
  215. fmt.Sprintf("http://%s", net.JoinHostPort(allowOrigin, "*")),
  216. fmt.Sprintf("https://%s", net.JoinHostPort(allowOrigin, "*")),
  217. )
  218. }
  219. AllowOrigins = append(AllowOrigins,
  220. "app://*",
  221. "file://*",
  222. "tauri://*",
  223. )
  224. maxRunners := clean("OLLAMA_MAX_LOADED_MODELS")
  225. if maxRunners != "" {
  226. m, err := strconv.Atoi(maxRunners)
  227. if err != nil {
  228. slog.Error("invalid setting, ignoring", "OLLAMA_MAX_LOADED_MODELS", maxRunners, "error", err)
  229. } else {
  230. MaxRunners = m
  231. }
  232. }
  233. if onp := os.Getenv("OLLAMA_MAX_QUEUE"); onp != "" {
  234. p, err := strconv.Atoi(onp)
  235. if err != nil || p <= 0 {
  236. slog.Error("invalid setting, ignoring", "OLLAMA_MAX_QUEUE", onp, "error", err)
  237. } else {
  238. MaxQueuedRequests = p
  239. }
  240. }
  241. ka := clean("OLLAMA_KEEP_ALIVE")
  242. if ka != "" {
  243. loadKeepAlive(ka)
  244. }
  245. var err error
  246. ModelsDir, err = getModelsDir()
  247. if err != nil {
  248. slog.Error("invalid setting", "OLLAMA_MODELS", ModelsDir, "error", err)
  249. }
  250. Host, err = getOllamaHost()
  251. if err != nil {
  252. slog.Error("invalid setting", "OLLAMA_HOST", Host, "error", err, "using default port", Host.Port)
  253. }
  254. if set, err := strconv.ParseBool(clean("OLLAMA_INTEL_GPU")); err == nil {
  255. IntelGpu = set
  256. }
  257. CudaVisibleDevices = clean("CUDA_VISIBLE_DEVICES")
  258. HipVisibleDevices = clean("HIP_VISIBLE_DEVICES")
  259. RocrVisibleDevices = clean("ROCR_VISIBLE_DEVICES")
  260. GpuDeviceOrdinal = clean("GPU_DEVICE_ORDINAL")
  261. HsaOverrideGfxVersion = clean("HSA_OVERRIDE_GFX_VERSION")
  262. }
  263. func getModelsDir() (string, error) {
  264. if models, exists := os.LookupEnv("OLLAMA_MODELS"); exists {
  265. return models, nil
  266. }
  267. home, err := os.UserHomeDir()
  268. if err != nil {
  269. return "", err
  270. }
  271. return filepath.Join(home, ".ollama", "models"), nil
  272. }
  273. func getOllamaHost() (*OllamaHost, error) {
  274. defaultPort := "11434"
  275. hostVar := os.Getenv("OLLAMA_HOST")
  276. hostVar = strings.TrimSpace(strings.Trim(strings.TrimSpace(hostVar), "\"'"))
  277. scheme, hostport, ok := strings.Cut(hostVar, "://")
  278. switch {
  279. case !ok:
  280. scheme, hostport = "http", hostVar
  281. case scheme == "http":
  282. defaultPort = "80"
  283. case scheme == "https":
  284. defaultPort = "443"
  285. }
  286. // trim trailing slashes
  287. hostport = strings.TrimRight(hostport, "/")
  288. host, port, err := net.SplitHostPort(hostport)
  289. if err != nil {
  290. host, port = "127.0.0.1", defaultPort
  291. if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
  292. host = ip.String()
  293. } else if hostport != "" {
  294. host = hostport
  295. }
  296. }
  297. if portNum, err := strconv.ParseInt(port, 10, 32); err != nil || portNum > 65535 || portNum < 0 {
  298. return &OllamaHost{
  299. Scheme: scheme,
  300. Host: host,
  301. Port: defaultPort,
  302. }, ErrInvalidHostPort
  303. }
  304. return &OllamaHost{
  305. Scheme: scheme,
  306. Host: host,
  307. Port: port,
  308. }, nil
  309. }
  310. func loadKeepAlive(ka string) {
  311. v, err := strconv.Atoi(ka)
  312. if err != nil {
  313. d, err := time.ParseDuration(ka)
  314. if err == nil {
  315. if d < 0 {
  316. KeepAlive = time.Duration(math.MaxInt64)
  317. } else {
  318. KeepAlive = d
  319. }
  320. }
  321. } else {
  322. d := time.Duration(v) * time.Second
  323. if d < 0 {
  324. KeepAlive = time.Duration(math.MaxInt64)
  325. } else {
  326. KeepAlive = d
  327. }
  328. }
  329. }