# Test configuration for Tesla K80 model testing
# This file defines test profiles with different model sizes and test scenarios

profiles:
  # Quick test profile - small models only, fast execution
  quick:
    timeout: 5m
    models:
      - name: gemma2:2b
        prompts:
          - "Hello, respond with a brief greeting."
        min_response_tokens: 5
        max_response_tokens: 100
        timeout: 30s

  # Full test profile - comprehensive testing across model sizes
  full:
    timeout: 30m
    models:
      - name: gemma2:2b
        prompts:
          - "Hello, respond with a brief greeting."
          - "What is 2+2? Answer briefly."
        min_response_tokens: 5
        max_response_tokens: 100
        timeout: 30s

      - name: gemma3:4b
        prompts:
          - "Explain photosynthesis in one sentence."
        min_response_tokens: 10
        max_response_tokens: 200
        timeout: 60s

      - name: gemma3:12b
        prompts:
          - "Write a short haiku about GPUs."
        min_response_tokens: 15
        max_response_tokens: 150
        timeout: 120s

  # Stress test profile - larger models and longer prompts
  stress:
    timeout: 60m
    models:
      - name: gemma3:12b
        prompts:
          - "Write a detailed explanation of how neural networks work, focusing on backpropagation."
          - "Describe the architecture of a transformer model in detail."
        min_response_tokens: 50
        max_response_tokens: 1000
        timeout: 300s

# Validation rules applied to all tests
validation:
  # Require GPU acceleration (fail if CPU fallback detected)
  gpu_required: true

  # Require single GPU usage for Tesla K80 (detect unnecessary multi-GPU splits)
  single_gpu_preferred: true

  # Log patterns to check for success/failure
  check_patterns:
    success:
      - "loaded model"
      - "offload.*GPU"
      - "CUDA backend"
    failure:
      - "CUDA.*error"
      - "out of memory"
      - "OOM"
      - "CPU backend"
      - "failed to load"
    warning:
      - "fallback"
      - "using CPU"

# Server configuration
server:
  host: "localhost"
  port: 11434
  startup_timeout: 30s
  health_check_interval: 1s
  health_check_endpoint: "/api/tags"

# Reporting configuration
reporting:
  formats:
    - json
    - markdown
  include_logs: true
  log_excerpt_lines: 50  # Lines of log to include per failure