llm: dont cap context window limit to training context window (#3988)

This commit is contained in:
Jeffrey Morgan
2024-04-29 10:07:30 -04:00
committed by GitHub
parent 7e432cdfac
commit 7aa08a77ca

View File

@@ -73,8 +73,7 @@ func LoadModel(model string) (*GGML, error) {
func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, projectors []string, opts api.Options) (LlamaServer, error) { func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, projectors []string, opts api.Options) (LlamaServer, error) {
var err error var err error
if opts.NumCtx > int(ggml.KV().ContextLength()) { if opts.NumCtx > int(ggml.KV().ContextLength()) {
slog.Warn("requested context length is greater than model max context length", "requested", opts.NumCtx, "model", ggml.KV().ContextLength()) slog.Warn("requested context length is greater than the model's training context window size", "requested", opts.NumCtx, "training size", ggml.KV().ContextLength())
opts.NumCtx = int(ggml.KV().ContextLength())
} }
if opts.NumCtx < 4 { if opts.NumCtx < 4 {