types.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. package gpu
  2. import (
  3. "fmt"
  4. "log/slog"
  5. "github.com/ollama/ollama/format"
  6. )
  7. type memInfo struct {
  8. TotalMemory uint64 `json:"total_memory,omitempty"`
  9. FreeMemory uint64 `json:"free_memory,omitempty"`
  10. }
  11. // Beginning of an `ollama info` command
  12. type GpuInfo struct {
  13. memInfo
  14. Library string `json:"library,omitempty"`
  15. // Optional variant to select (e.g. versions, cpu feature flags)
  16. Variant CPUCapability `json:"variant"`
  17. // MinimumMemory represents the minimum memory required to use the GPU
  18. MinimumMemory uint64 `json:"-"`
  19. // Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly
  20. DependencyPath string `json:"lib_path,omitempty"`
  21. // Extra environment variables specific to the GPU as list of [key,value]
  22. EnvWorkarounds [][2]string `json:"envs,omitempty"`
  23. // Set to true if we can NOT reliably discover FreeMemory. A value of true indicates
  24. // the FreeMemory is best effort, and may over or under report actual memory usage
  25. // False indicates FreeMemory can generally be trusted on this GPU
  26. UnreliableFreeMemory bool
  27. // GPU information
  28. ID string `json:"gpu_id"` // string to use for selection of this specific GPU
  29. Name string `json:"name"` // user friendly name if available
  30. Compute string `json:"compute"` // Compute Capability or gfx
  31. // Driver Information - TODO no need to put this on each GPU
  32. DriverMajor int `json:"driver_major,omitempty"`
  33. DriverMinor int `json:"driver_minor,omitempty"`
  34. // TODO other performance capability info to help in scheduling decisions
  35. }
  36. type CPUInfo struct {
  37. GpuInfo
  38. }
  39. type CudaGPUInfo struct {
  40. GpuInfo
  41. OSOverhead uint64 // Memory overhead between the driver library and management library
  42. index int //nolint:unused,nolintlint
  43. }
  44. type CudaGPUInfoList []CudaGPUInfo
  45. type RocmGPUInfo struct {
  46. GpuInfo
  47. usedFilepath string //nolint:unused,nolintlint
  48. index int //nolint:unused,nolintlint
  49. }
  50. type RocmGPUInfoList []RocmGPUInfo
  51. type OneapiGPUInfo struct {
  52. GpuInfo
  53. driverIndex int //nolint:unused,nolintlint
  54. gpuIndex int //nolint:unused,nolintlint
  55. }
  56. type OneapiGPUInfoList []OneapiGPUInfo
  57. type GpuInfoList []GpuInfo
  58. // Split up the set of gpu info's by Library and variant
  59. func (l GpuInfoList) ByLibrary() []GpuInfoList {
  60. resp := []GpuInfoList{}
  61. libs := []string{}
  62. for _, info := range l {
  63. found := false
  64. requested := info.Library
  65. if info.Variant != CPUCapabilityNone {
  66. requested += "_" + info.Variant.String()
  67. }
  68. for i, lib := range libs {
  69. if lib == requested {
  70. resp[i] = append(resp[i], info)
  71. found = true
  72. break
  73. }
  74. }
  75. if !found {
  76. libs = append(libs, info.Library)
  77. resp = append(resp, []GpuInfo{info})
  78. }
  79. }
  80. return resp
  81. }
  82. // Report the GPU information into the log an Info level
  83. func (l GpuInfoList) LogDetails() {
  84. for _, g := range l {
  85. slog.Info("inference compute",
  86. "id", g.ID,
  87. "library", g.Library,
  88. "compute", g.Compute,
  89. "driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),
  90. "name", g.Name,
  91. "total", format.HumanBytes2(g.TotalMemory),
  92. "available", format.HumanBytes2(g.FreeMemory),
  93. )
  94. }
  95. }
  96. // Sort by Free Space
  97. type ByFreeMemory []GpuInfo
  98. func (a ByFreeMemory) Len() int { return len(a) }
  99. func (a ByFreeMemory) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
  100. func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory }
  101. type CPUCapability uint32
  102. // Override at build time when building base GPU runners
  103. var GPURunnerCPUCapability = CPUCapabilityAVX
  104. const (
  105. CPUCapabilityNone CPUCapability = iota
  106. CPUCapabilityAVX
  107. CPUCapabilityAVX2
  108. // TODO AVX512
  109. )
  110. func (c CPUCapability) String() string {
  111. switch c {
  112. case CPUCapabilityAVX:
  113. return "avx"
  114. case CPUCapabilityAVX2:
  115. return "avx2"
  116. default:
  117. return "no vector extensions"
  118. }
  119. }