# Quick test profile - fast smoke test with small model # Run time: ~1-2 minutes profiles: quick: timeout: 5m models: - name: gemma2:2b prompts: - "Hello, respond with a brief greeting." min_response_tokens: 5 max_response_tokens: 100 timeout: 30s validation: gpu_required: true single_gpu_preferred: true check_patterns: success: - "loaded model" - "offload.*GPU" failure: - "CUDA.*error" - "out of memory" - "CPU backend" server: host: "localhost" port: 11434 startup_timeout: 30s health_check_interval: 1s health_check_endpoint: "/api/tags" reporting: formats: - json include_logs: true log_excerpt_lines: 50