Bump llama.cpp to b1662 and set n_parallel=1

2025-12-12 08:47:01 +00:00 · 2023-12-14 10:25:12 -08:00
parent 89bbaafa64
commit 9adca7f711
3 changed files with 9 additions and 9 deletions
--- a/llm/ext_server.go
+++ b/llm/ext_server.go
@@ -160,7 +160,7 @@ func newExtServer(server extServer, model string, adapters, projectors []string,
 	sparams.n_batch = C.uint(opts.NumBatch)
 	sparams.n_gpu_layers = C.int(numGPU)
 	sparams.main_gpu = C.int(opts.MainGPU)
-	sparams.n_parallel = 2 // TODO - wire up concurrency
+	sparams.n_parallel = 1 // TODO - wire up concurrency

 	// Always use the value encoded in the model
 	sparams.rope_freq_base = 0.0