mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 07:46:59 +00:00
bool
This commit is contained in:
@@ -221,7 +221,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
params = append(params, "--memory-f32")
|
||||
}
|
||||
|
||||
flashAttnEnabled := envconfig.FlashAttention
|
||||
flashAttnEnabled := envconfig.FlashAttention()
|
||||
|
||||
for _, g := range gpus {
|
||||
// only cuda (compute capability 7+) and metal support flash attention
|
||||
|
||||
Reference in New Issue
Block a user