mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-16 02:37:06 +00:00
Use flash attention flag for now (#4580)
* put flash attention behind flag for now * add test * remove print * up timeout for sheduler tests
This commit is contained in:
@@ -31,6 +31,8 @@ var (
|
||||
RunnersDir string
|
||||
// Set via OLLAMA_TMPDIR in the environment
|
||||
TmpDir string
|
||||
// Experimental flash attention
|
||||
FlashAttention bool
|
||||
)
|
||||
|
||||
func AsMap() map[string]string {
|
||||
@@ -45,6 +47,7 @@ func AsMap() map[string]string {
|
||||
"OLLAMA_NUM_PARALLEL": fmt.Sprintf("%v", NumParallel),
|
||||
"OLLAMA_RUNNERS_DIR": fmt.Sprintf("%v", RunnersDir),
|
||||
"OLLAMA_TMPDIR": fmt.Sprintf("%v", TmpDir),
|
||||
"OLLAMA_FLASH_ATTENTION": fmt.Sprintf("%v", FlashAttention),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,6 +81,13 @@ func LoadConfig() {
|
||||
}
|
||||
}
|
||||
|
||||
if fa := clean("OLLAMA_FLASH_ATTENTION"); fa != "" {
|
||||
d, err := strconv.ParseBool(fa)
|
||||
if err == nil {
|
||||
FlashAttention = d
|
||||
}
|
||||
}
|
||||
|
||||
RunnersDir = clean("OLLAMA_RUNNERS_DIR")
|
||||
if runtime.GOOS == "windows" && RunnersDir == "" {
|
||||
// On Windows we do not carry the payloads inside the main executable
|
||||
|
||||
Reference in New Issue
Block a user