update memory calcualtions

count each layer independently when deciding gpu offloading
2025-12-12 08:47:01 +00:00 · 2024-03-18 10:45:22 +01:00
parent d338d70492
commit 91b3e4d282
7 changed files with 125 additions and 89 deletions
--- a/llm/dyn_ext_server.go
+++ b/llm/dyn_ext_server.go
@@ -39,7 +39,7 @@ import (

 type dynExtServer struct {
 	s       C.struct_dynamic_llama_server
-	options api.Options
+	options *api.Options
 }

 // Note: current implementation does not support concurrent instantiations
@@ -64,7 +64,7 @@ func extServerResponseToErr(resp C.ext_server_resp_t) error {
 	return fmt.Errorf(C.GoString(resp.msg))
 }

-func newDynExtServer(library, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
+func newDynExtServer(library, model string, adapters, projectors []string, opts *api.Options) (LLM, error) {
 	if !mutex.TryLock() {
 		slog.Info("concurrent llm servers not yet supported, waiting for prior server to complete")
 		mutex.Lock()