mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 07:46:59 +00:00
The test configuration was treating 'CPU backend' as a failure pattern, but this is incorrect. Loading the CPU backend library is normal - ollama loads both CUDA and CPU backends for fallback operations. The log line 'load_backend: loaded CPU backend from libggml-cpu-*.so' is a success message, not an error. Changed failure patterns from: - 'CPU backend' (too broad, matches normal loading) - 'failed to load.*CUDA' (too specific) To more accurate patterns: - 'failed to load.*backend' (matches actual load failures) - 'backend.*failed' (matches failure messages) This prevents false positives while still catching real backend failures.
41 lines
802 B
YAML
41 lines
802 B
YAML
# Quick test profile - fast smoke test with small model
|
|
# Run time: ~1-2 minutes
|
|
|
|
profiles:
|
|
quick:
|
|
timeout: 5m
|
|
models:
|
|
- name: gemma3:4b
|
|
prompts:
|
|
- "Hello, respond with a brief greeting."
|
|
min_response_tokens: 5
|
|
max_response_tokens: 100
|
|
timeout: 60s
|
|
|
|
validation:
|
|
gpu_required: true
|
|
single_gpu_preferred: true
|
|
check_patterns:
|
|
success:
|
|
- "loaded model"
|
|
- "offload.*GPU"
|
|
failure:
|
|
- "CUDA.*error"
|
|
- "out of memory"
|
|
- "failed to load.*backend"
|
|
- "backend.*failed"
|
|
- "no GPU detected"
|
|
|
|
server:
|
|
host: "localhost"
|
|
port: 11434
|
|
startup_timeout: 30s
|
|
health_check_interval: 1s
|
|
health_check_endpoint: "/api/tags"
|
|
|
|
reporting:
|
|
formats:
|
|
- json
|
|
include_logs: true
|
|
log_excerpt_lines: 50
|