mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 15:57:04 +00:00
The failure pattern 'CPU backend' was incorrectly flagging the normal log message 'load_backend: loaded CPU backend from...' as an error. This is expected behavior - both CUDA and CPU backends are loaded, but GPU is actually used for computation (as shown by 'offloaded 35/35 layers to GPU'). Changed failure patterns to detect actual GPU failures: - Removed: 'CPU backend' (too broad, catches normal backend loading) - Added: 'failed to load.*CUDA' (actual load failures) - Added: 'no GPU detected' (GPU not available) Root cause: monitor.go processes failure patterns first (highest priority), so the 'CPU backend' pattern was creating EventError events before success patterns could be checked, causing tests to fail despite GPU working.
40 lines
773 B
YAML
40 lines
773 B
YAML
# Quick test profile - fast smoke test with small model
|
|
# Run time: ~1-2 minutes
|
|
|
|
profiles:
|
|
quick:
|
|
timeout: 5m
|
|
models:
|
|
- name: gemma3:4b
|
|
prompts:
|
|
- "Hello, respond with a brief greeting."
|
|
min_response_tokens: 5
|
|
max_response_tokens: 100
|
|
timeout: 60s
|
|
|
|
validation:
|
|
gpu_required: true
|
|
single_gpu_preferred: true
|
|
check_patterns:
|
|
success:
|
|
- "loaded model"
|
|
- "offload.*GPU"
|
|
failure:
|
|
- "CUDA.*error"
|
|
- "out of memory"
|
|
- "failed to load.*CUDA"
|
|
- "no GPU detected"
|
|
|
|
server:
|
|
host: "localhost"
|
|
port: 11434
|
|
startup_timeout: 30s
|
|
health_check_interval: 1s
|
|
health_check_endpoint: "/api/tags"
|
|
|
|
reporting:
|
|
formats:
|
|
- json
|
|
include_logs: true
|
|
log_excerpt_lines: 50
|