Add multi-GPU test workflow and rename single-GPU workflow

- Rename tesla-k80-tests.yml to tesla-k80-single-gpu-tests.yml for clarity
- Add new tesla-k80-multi-gpu-tests.yml workflow for large models
- Add multi-gpu profile to test/config/models.yaml with gemma3:27b and gpt-oss:20b
- Multi-GPU workflow includes GPU count verification and weekly schedule
- Profile-specific validation allows multi-GPU splits for large models
- Separate workflows optimize CI efficiency: quick tests vs. thorough tests
This commit is contained in:
Shang Chieh Tseng
2025-10-30 12:04:50 +08:00
parent 1aa80e9411
commit 6c3876a30d
3 changed files with 110 additions and 10 deletions

View File

@@ -24,19 +24,40 @@ profiles:
max_response_tokens: 100
timeout: 120s
# Stress test profile - larger models and longer prompts
stress:
timeout: 60m
# Multi-GPU test profile - test models requiring 2x Tesla K80s
multi-gpu:
timeout: 45m
models:
- name: gemma3:12b
- name: gemma3:27b
prompts:
- "Write a detailed explanation of how neural networks work, focusing on backpropagation."
- "Describe the architecture of a transformer model in detail."
min_response_tokens: 50
max_response_tokens: 1000
- "Hello, respond with a brief greeting."
min_response_tokens: 5
max_response_tokens: 100
timeout: 300s
- name: gpt-oss:20b
prompts:
- "Hello, respond with a brief greeting."
min_response_tokens: 5
max_response_tokens: 100
timeout: 240s
validation:
# Override single_gpu_preferred for multi-GPU tests
gpu_required: true
single_gpu_preferred: false
check_patterns:
success:
- "loaded model"
- "offload.*GPU"
- "CUDA backend"
- "split.*layer.*GPU" # Expect multi-GPU split
failure:
- "CUDA.*error"
- "out of memory"
- "OOM"
- "CPU backend"
- "failed to load"
# Validation rules applied to all tests
# Validation rules applied to all tests (unless overridden in profile)
validation:
# Require GPU acceleration (fail if CPU fallback detected)
gpu_required: true