mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-14 17:57:06 +00:00
Discovery CPU details for default thread selection (#6264)
On windows, detect large multi-socket systems and reduce to the number of cores in one socket for best performance
This commit is contained in:
24
gpu/types.go
24
gpu/types.go
@@ -10,11 +10,11 @@ import (
|
||||
type memInfo struct {
|
||||
TotalMemory uint64 `json:"total_memory,omitempty"`
|
||||
FreeMemory uint64 `json:"free_memory,omitempty"`
|
||||
FreeSwap uint64 `json:"free_swap,omitempty"`
|
||||
FreeSwap uint64 `json:"free_swap,omitempty"` // TODO split this out for system only
|
||||
}
|
||||
|
||||
// Beginning of an `ollama info` command
|
||||
type GpuInfo struct {
|
||||
type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
|
||||
memInfo
|
||||
Library string `json:"library,omitempty"`
|
||||
|
||||
@@ -49,6 +49,17 @@ type GpuInfo struct {
|
||||
|
||||
type CPUInfo struct {
|
||||
GpuInfo
|
||||
CPUs []CPU
|
||||
}
|
||||
|
||||
// CPU type represents a CPU Package occupying a socket
|
||||
type CPU struct {
|
||||
ID string `cpuinfo:"processor"`
|
||||
VendorID string `cpuinfo:"vendor_id"`
|
||||
ModelName string `cpuinfo:"model name"`
|
||||
CoreCount int
|
||||
EfficiencyCoreCount int // Performance = CoreCount - Efficiency
|
||||
ThreadCount int
|
||||
}
|
||||
|
||||
type CudaGPUInfo struct {
|
||||
@@ -158,3 +169,12 @@ type SystemInfo struct {
|
||||
UnsupportedGPUs []UnsupportedGPUInfo `json:"unsupported_gpus"`
|
||||
DiscoveryErrors []string `json:"discovery_errors"`
|
||||
}
|
||||
|
||||
// Return the optimal number of threads to use for inference
|
||||
func (si SystemInfo) GetOptimalThreadCount() int {
|
||||
if len(si.System.CPUs) == 0 {
|
||||
return 0
|
||||
}
|
||||
// Allocate thread count matching the performance cores on a single socket
|
||||
return si.System.CPUs[0].CoreCount - si.System.CPUs[0].EfficiencyCoreCount
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user