fix: relay request opts to loaded llm prediction (#1761)

2025-12-12 00:37:04 +00:00 · 2024-01-03 12:01:42 -05:00
parent 05face44ef
commit 0b3118e0af
5 changed files with 106 additions and 71 deletions
--- a/llm/ext_server_default.go
+++ b/llm/ext_server_default.go
@@ -60,7 +60,7 @@ func newDefaultExtServer(model string, adapters, projectors []string, numLayers
 }

 func (llm *llamaExtServer) Predict(ctx context.Context, pred PredictOpts, fn func(PredictResult)) error {
-	return predict(llm, llm.Options, ctx, pred, fn)
+	return predict(ctx, llm, pred, fn)
 }

 func (llm *llamaExtServer) Encode(ctx context.Context, prompt string) ([]int, error) {