mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 07:46:59 +00:00
Implement Go-based test runner framework for Tesla K80 testing
Add comprehensive test orchestration framework: Test Runner (cmd/test-runner/): - config.go: YAML configuration loading and validation - server.go: Ollama server lifecycle management (start/stop/health checks) - monitor.go: Real-time log monitoring with pattern matching - test.go: Model testing via Ollama API (pull, chat, validation) - validate.go: Test result validation (GPU usage, response quality, log analysis) - report.go: Structured reporting (JSON and Markdown formats) - main.go: CLI interface with run/validate/list commands Test Configurations (test/config/): - models.yaml: Full test suite with quick/full/stress profiles - quick.yaml: Fast smoke test with gemma2:2b Updated Workflow: - tesla-k80-tests.yml: Use test-runner instead of shell scripts - Run quick tests first, then full tests if passing - Generate structured JSON reports for pass/fail checking - Upload test results as artifacts Features: - Multi-model testing with configurable profiles - API-based testing (not CLI commands) - Real-time log monitoring for GPU events and errors - Automatic validation of GPU loading and response quality - Structured JSON and Markdown reports - Graceful server lifecycle management - Interrupt handling (Ctrl+C cleanup) Addresses limitations of shell-based testing by providing: - Better error handling and reporting - Programmatic test orchestration - Reusable test framework - Clear pass/fail criteria - Detailed test metrics and timing
This commit is contained in:
92
test/config/models.yaml
Normal file
92
test/config/models.yaml
Normal file
@@ -0,0 +1,92 @@
|
||||
# Test configuration for Tesla K80 model testing
|
||||
# This file defines test profiles with different model sizes and test scenarios
|
||||
|
||||
profiles:
|
||||
# Quick test profile - small models only, fast execution
|
||||
quick:
|
||||
timeout: 5m
|
||||
models:
|
||||
- name: gemma2:2b
|
||||
prompts:
|
||||
- "Hello, respond with a brief greeting."
|
||||
min_response_tokens: 5
|
||||
max_response_tokens: 100
|
||||
timeout: 30s
|
||||
|
||||
# Full test profile - comprehensive testing across model sizes
|
||||
full:
|
||||
timeout: 30m
|
||||
models:
|
||||
- name: gemma2:2b
|
||||
prompts:
|
||||
- "Hello, respond with a brief greeting."
|
||||
- "What is 2+2? Answer briefly."
|
||||
min_response_tokens: 5
|
||||
max_response_tokens: 100
|
||||
timeout: 30s
|
||||
|
||||
- name: gemma3:4b
|
||||
prompts:
|
||||
- "Explain photosynthesis in one sentence."
|
||||
min_response_tokens: 10
|
||||
max_response_tokens: 200
|
||||
timeout: 60s
|
||||
|
||||
- name: gemma3:12b
|
||||
prompts:
|
||||
- "Write a short haiku about GPUs."
|
||||
min_response_tokens: 15
|
||||
max_response_tokens: 150
|
||||
timeout: 120s
|
||||
|
||||
# Stress test profile - larger models and longer prompts
|
||||
stress:
|
||||
timeout: 60m
|
||||
models:
|
||||
- name: gemma3:12b
|
||||
prompts:
|
||||
- "Write a detailed explanation of how neural networks work, focusing on backpropagation."
|
||||
- "Describe the architecture of a transformer model in detail."
|
||||
min_response_tokens: 50
|
||||
max_response_tokens: 1000
|
||||
timeout: 300s
|
||||
|
||||
# Validation rules applied to all tests
|
||||
validation:
|
||||
# Require GPU acceleration (fail if CPU fallback detected)
|
||||
gpu_required: true
|
||||
|
||||
# Require single GPU usage for Tesla K80 (detect unnecessary multi-GPU splits)
|
||||
single_gpu_preferred: true
|
||||
|
||||
# Log patterns to check for success/failure
|
||||
check_patterns:
|
||||
success:
|
||||
- "loaded model"
|
||||
- "offload.*GPU"
|
||||
- "CUDA backend"
|
||||
failure:
|
||||
- "CUDA.*error"
|
||||
- "out of memory"
|
||||
- "OOM"
|
||||
- "CPU backend"
|
||||
- "failed to load"
|
||||
warning:
|
||||
- "fallback"
|
||||
- "using CPU"
|
||||
|
||||
# Server configuration
|
||||
server:
|
||||
host: "localhost"
|
||||
port: 11434
|
||||
startup_timeout: 30s
|
||||
health_check_interval: 1s
|
||||
health_check_endpoint: "/api/tags"
|
||||
|
||||
# Reporting configuration
|
||||
reporting:
|
||||
formats:
|
||||
- json
|
||||
- markdown
|
||||
include_logs: true
|
||||
log_excerpt_lines: 50 # Lines of log to include per failure
|
||||
38
test/config/quick.yaml
Normal file
38
test/config/quick.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
# Quick test profile - fast smoke test with small model
|
||||
# Run time: ~1-2 minutes
|
||||
|
||||
profiles:
|
||||
quick:
|
||||
timeout: 5m
|
||||
models:
|
||||
- name: gemma2:2b
|
||||
prompts:
|
||||
- "Hello, respond with a brief greeting."
|
||||
min_response_tokens: 5
|
||||
max_response_tokens: 100
|
||||
timeout: 30s
|
||||
|
||||
validation:
|
||||
gpu_required: true
|
||||
single_gpu_preferred: true
|
||||
check_patterns:
|
||||
success:
|
||||
- "loaded model"
|
||||
- "offload.*GPU"
|
||||
failure:
|
||||
- "CUDA.*error"
|
||||
- "out of memory"
|
||||
- "CPU backend"
|
||||
|
||||
server:
|
||||
host: "localhost"
|
||||
port: 11434
|
||||
startup_timeout: 30s
|
||||
health_check_interval: 1s
|
||||
health_check_endpoint: "/api/tags"
|
||||
|
||||
reporting:
|
||||
formats:
|
||||
- json
|
||||
include_logs: true
|
||||
log_excerpt_lines: 50
|
||||
Reference in New Issue
Block a user