ollama37/test/config/quick.yaml

# Quick test profile - fast smoke test with small model
# Run time: ~1-2 minutes

profiles:
  quick:
    timeout: 5m
    models:
      - name: gemma2:2b
        prompts:
          - "Hello, respond with a brief greeting."
        min_response_tokens: 5
        max_response_tokens: 100
        timeout: 30s

validation:
  gpu_required: true
  single_gpu_preferred: true
  check_patterns:
    success:
      - "loaded model"
      - "offload.*GPU"
    failure:
      - "CUDA.*error"
      - "out of memory"
      - "CPU backend"

server:
  host: "localhost"
  port: 11434
  startup_timeout: 30s
  health_check_interval: 1s
  health_check_endpoint: "/api/tags"

reporting:
  formats:
    - json
  include_logs: true
  log_excerpt_lines: 50