mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 07:46:59 +00:00
Refine CPU load behavior with system memory visibility
This commit is contained in:
96
gpu/gpu.go
96
gpu/gpu.go
@@ -11,6 +11,8 @@ package gpu
|
||||
*/
|
||||
import "C"
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
@@ -246,6 +248,17 @@ func initOneAPIHandles() *oneapiHandles {
|
||||
return oHandles
|
||||
}
|
||||
|
||||
func GetCPUInfo() GpuInfoList {
|
||||
gpuMutex.Lock()
|
||||
if !bootstrapped {
|
||||
gpuMutex.Unlock()
|
||||
GetGPUInfo()
|
||||
} else {
|
||||
gpuMutex.Unlock()
|
||||
}
|
||||
return GpuInfoList{cpus[0].GpuInfo}
|
||||
}
|
||||
|
||||
func GetGPUInfo() GpuInfoList {
|
||||
// TODO - consider exploring lspci (and equivalent on windows) to check for
|
||||
// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
|
||||
@@ -279,22 +292,19 @@ func GetGPUInfo() GpuInfoList {
|
||||
needRefresh = false
|
||||
cpuCapability = getCPUCapability()
|
||||
var memInfo C.mem_info_t
|
||||
C.cpu_check_ram(&memInfo)
|
||||
if memInfo.err != nil {
|
||||
slog.Info("error looking up CPU memory", "error", C.GoString(memInfo.err))
|
||||
C.free(unsafe.Pointer(memInfo.err))
|
||||
return []GpuInfo{}
|
||||
|
||||
mem, err := GetCPUMem()
|
||||
if err != nil {
|
||||
slog.Warn("error looking up system memory", "error", err)
|
||||
}
|
||||
cpuInfo := CPUInfo{
|
||||
cpus = []CPUInfo{CPUInfo{
|
||||
GpuInfo: GpuInfo{
|
||||
memInfo: mem,
|
||||
Library: "cpu",
|
||||
Variant: cpuCapability.ToVariant(),
|
||||
ID: "0",
|
||||
},
|
||||
}
|
||||
cpuInfo.TotalMemory = uint64(memInfo.total)
|
||||
cpuInfo.FreeMemory = uint64(memInfo.free)
|
||||
cpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
||||
cpus = []CPUInfo{cpuInfo}
|
||||
}}
|
||||
|
||||
// Fallback to CPU mode if we're lacking required vector extensions on x86
|
||||
if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
|
||||
@@ -394,7 +404,25 @@ func GetGPUInfo() GpuInfoList {
|
||||
|
||||
// Refresh free memory usage
|
||||
if needRefresh {
|
||||
// TODO - CPU system memory tracking/refresh
|
||||
mem, err := GetCPUMem()
|
||||
if err != nil {
|
||||
slog.Warn("error looking up system memory", "error", err)
|
||||
} else {
|
||||
slog.Debug("updating system memory data",
|
||||
slog.Group(
|
||||
"before",
|
||||
"total", format.HumanBytes2(cpus[0].TotalMemory),
|
||||
"free", format.HumanBytes2(cpus[0].FreeMemory),
|
||||
),
|
||||
slog.Group(
|
||||
"now",
|
||||
"total", format.HumanBytes2(mem.TotalMemory),
|
||||
"free", format.HumanBytes2(mem.FreeMemory),
|
||||
),
|
||||
)
|
||||
cpus[0].FreeMemory = mem.FreeMemory
|
||||
}
|
||||
|
||||
var memInfo C.mem_info_t
|
||||
if cHandles == nil && len(cudaGPUs) > 0 {
|
||||
cHandles = initCudaHandles()
|
||||
@@ -455,7 +483,7 @@ func GetGPUInfo() GpuInfoList {
|
||||
oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
|
||||
}
|
||||
|
||||
err := RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
|
||||
err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
|
||||
if err != nil {
|
||||
slog.Debug("problem refreshing ROCm free memory", "error", err)
|
||||
}
|
||||
@@ -478,6 +506,9 @@ func GetGPUInfo() GpuInfoList {
|
||||
}
|
||||
|
||||
func GetCPUMem() (memInfo, error) {
|
||||
if runtime.GOOS == "linux" {
|
||||
return GetLinuxMemInfo()
|
||||
}
|
||||
var ret memInfo
|
||||
var info C.mem_info_t
|
||||
C.cpu_check_ram(&info)
|
||||
@@ -651,3 +682,42 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
|
||||
return "", ""
|
||||
}
|
||||
}
|
||||
|
||||
func GetLinuxMemInfo() (memInfo, error) {
|
||||
var mem memInfo
|
||||
var total, available, free, buffers, cached uint64
|
||||
f, err := os.Open("/proc/meminfo")
|
||||
if err != nil {
|
||||
return mem, err
|
||||
}
|
||||
defer f.Close()
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
switch {
|
||||
case bytes.HasPrefix(s.Bytes(), []byte(`MemTotal:`)):
|
||||
_, err = fmt.Sscanf(s.Text(), "MemTotal:%d", &total)
|
||||
case bytes.HasPrefix(s.Bytes(), []byte(`MemAvailable:`)):
|
||||
_, err = fmt.Sscanf(s.Text(), "MemAvailable:%d", &available)
|
||||
case bytes.HasPrefix(s.Bytes(), []byte(`MemFree:`)):
|
||||
_, err = fmt.Sscanf(s.Text(), "MemFree:%d", &free)
|
||||
case bytes.HasPrefix(s.Bytes(), []byte(`Buffers:`)):
|
||||
_, err = fmt.Sscanf(s.Text(), "Buffers:%d", &buffers)
|
||||
case bytes.HasPrefix(s.Bytes(), []byte(`Cached:`)):
|
||||
_, err = fmt.Sscanf(s.Text(), "Cached:%d", &cached)
|
||||
default:
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
return mem, err
|
||||
}
|
||||
|
||||
if total > 0 && available > 0 {
|
||||
mem.TotalMemory = total * 1024
|
||||
mem.FreeMemory = available * 1024
|
||||
return mem, nil
|
||||
}
|
||||
}
|
||||
mem.TotalMemory = total * 1024
|
||||
mem.FreeMemory = (free + buffers + cached) * 1024
|
||||
return mem, nil
|
||||
}
|
||||
|
||||
@@ -42,6 +42,17 @@ func GetGPUInfo() GpuInfoList {
|
||||
return []GpuInfo{info}
|
||||
}
|
||||
|
||||
func GetCPUInfo() GpuInfoList {
|
||||
mem, _ := GetCPUMem()
|
||||
return []GpuInfo{
|
||||
{
|
||||
Library: "cpu",
|
||||
Variant: GetCPUVariant(),
|
||||
memInfo: mem,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func GetCPUMem() (memInfo, error) {
|
||||
return memInfo{
|
||||
TotalMemory: uint64(C.getPhysicalMemory()),
|
||||
|
||||
@@ -35,11 +35,7 @@ void cpu_check_ram(mem_info_t *resp) {
|
||||
}
|
||||
|
||||
#elif __APPLE__
|
||||
// TODO consider an Apple implementation that does something useful
|
||||
// mem_info_t cpu_check_ram() {
|
||||
// mem_info_t resp = {0, 0, NULL};
|
||||
// return resp;
|
||||
// }
|
||||
// Unused - see gpu_darwin.go
|
||||
#else
|
||||
#error "Unsupported platform"
|
||||
#endif
|
||||
|
||||
@@ -11,8 +11,6 @@ void nvml_init(char *nvml_lib_path, nvml_init_resp_t *resp) {
|
||||
char buf[buflen + 1];
|
||||
int i;
|
||||
|
||||
LOG(1, "XXX starting nvml_init %s\n", nvml_lib_path);
|
||||
|
||||
struct lookup {
|
||||
char *s;
|
||||
void **p;
|
||||
@@ -37,13 +35,11 @@ void nvml_init(char *nvml_lib_path, nvml_init_resp_t *resp) {
|
||||
}
|
||||
|
||||
// TODO once we've squashed the remaining corner cases remove this log
|
||||
// LOG(resp->ch.verbose, "wiring nvidia management library functions in %s\n", nvml_lib_path);
|
||||
// LOG(resp->ch.verbose, "wiring nvidia management library functions in %s\n", nvml_lib_path);
|
||||
|
||||
LOG(1, "XXX wiring functions nvml_init\n");
|
||||
|
||||
for (i = 0; l[i].s != NULL; i++) {
|
||||
// TODO once we've squashed the remaining corner cases remove this log
|
||||
LOG(resp->ch.verbose, "dlsym: %s\n", l[i].s);
|
||||
// LOG(resp->ch.verbose, "dlsym: %s\n", l[i].s);
|
||||
|
||||
*l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);
|
||||
if (!l[i].p) {
|
||||
@@ -58,7 +54,6 @@ void nvml_init(char *nvml_lib_path, nvml_init_resp_t *resp) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
LOG(1, "XXX calling init_v2\n");
|
||||
|
||||
ret = (*resp->ch.nvmlInit_v2)();
|
||||
if (ret != NVML_SUCCESS) {
|
||||
@@ -69,8 +64,6 @@ void nvml_init(char *nvml_lib_path, nvml_init_resp_t *resp) {
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
LOG(1, "XXX nvml_init done\n");
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -78,7 +71,6 @@ void nvml_get_free(nvml_handle_t h, int device_id, uint64_t *free, uint64_t *tot
|
||||
nvmlDevice_t device;
|
||||
nvmlMemory_t memInfo = {0};
|
||||
nvmlReturn_t ret;
|
||||
LOG(1, "XXX in nvml_get_free\n");
|
||||
ret = (*h.nvmlDeviceGetHandleByIndex)(device_id, &device);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
LOG(1, "unable to get device handle %d: %d", device_id, ret);
|
||||
|
||||
Reference in New Issue
Block a user