Discovery CPU details for default thread selection (#6264)

On windows, detect large multi-socket systems and reduce to the number of cores
in one socket for best performance
This commit is contained in:
Daniel Hiltgen
2024-10-15 11:36:08 -07:00
committed by GitHub
parent 1d7fa3ad2d
commit 24636dfa87
7 changed files with 408 additions and 24 deletions

View File

@@ -10,11 +10,11 @@ import (
type memInfo struct {
TotalMemory uint64 `json:"total_memory,omitempty"`
FreeMemory uint64 `json:"free_memory,omitempty"`
FreeSwap uint64 `json:"free_swap,omitempty"`
FreeSwap uint64 `json:"free_swap,omitempty"` // TODO split this out for system only
}
// Beginning of an `ollama info` command
type GpuInfo struct {
type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
memInfo
Library string `json:"library,omitempty"`
@@ -49,6 +49,17 @@ type GpuInfo struct {
type CPUInfo struct {
GpuInfo
CPUs []CPU
}
// CPU type represents a CPU Package occupying a socket
type CPU struct {
ID string `cpuinfo:"processor"`
VendorID string `cpuinfo:"vendor_id"`
ModelName string `cpuinfo:"model name"`
CoreCount int
EfficiencyCoreCount int // Performance = CoreCount - Efficiency
ThreadCount int
}
type CudaGPUInfo struct {
@@ -158,3 +169,12 @@ type SystemInfo struct {
UnsupportedGPUs []UnsupportedGPUInfo `json:"unsupported_gpus"`
DiscoveryErrors []string `json:"discovery_errors"`
}
// Return the optimal number of threads to use for inference
func (si SystemInfo) GetOptimalThreadCount() int {
if len(si.System.CPUs) == 0 {
return 0
}
// Allocate thread count matching the performance cores on a single socket
return si.System.CPUs[0].CoreCount - si.System.CPUs[0].EfficiencyCoreCount
}