fix: relay request opts to loaded llm prediction (#1761)

This commit is contained in:
Bruce MacDonald
2024-01-03 12:01:42 -05:00
committed by GitHub
parent 05face44ef
commit 0b3118e0af
5 changed files with 106 additions and 71 deletions

View File

@@ -60,7 +60,7 @@ func newDefaultExtServer(model string, adapters, projectors []string, numLayers
}
func (llm *llamaExtServer) Predict(ctx context.Context, pred PredictOpts, fn func(PredictResult)) error {
return predict(llm, llm.Options, ctx, pred, fn)
return predict(ctx, llm, pred, fn)
}
func (llm *llamaExtServer) Encode(ctx context.Context, prompt string) ([]int, error) {