mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 15:57:04 +00:00
llm: normalise kvct parameter handling (#7926)
This commit is contained in:
@@ -129,7 +129,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
|
|||||||
|
|
||||||
var kvct string
|
var kvct string
|
||||||
if fa {
|
if fa {
|
||||||
requested := envconfig.KvCacheType()
|
requested := strings.ToLower(envconfig.KvCacheType())
|
||||||
if requested != "" && ggml.SupportsKVCacheType(requested) {
|
if requested != "" && ggml.SupportsKVCacheType(requested) {
|
||||||
kvct = requested
|
kvct = requested
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -225,7 +225,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
|
|||||||
fa = false
|
fa = false
|
||||||
}
|
}
|
||||||
|
|
||||||
kvct := envconfig.KvCacheType()
|
kvct := strings.ToLower(envconfig.KvCacheType())
|
||||||
|
|
||||||
if fa {
|
if fa {
|
||||||
slog.Info("enabling flash attention")
|
slog.Info("enabling flash attention")
|
||||||
|
|||||||
Reference in New Issue
Block a user