mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-09 23:37:06 +00:00
Add multi-GPU test workflow and rename single-GPU workflow
- Rename tesla-k80-tests.yml to tesla-k80-single-gpu-tests.yml for clarity - Add new tesla-k80-multi-gpu-tests.yml workflow for large models - Add multi-gpu profile to test/config/models.yaml with gemma3:27b and gpt-oss:20b - Multi-GPU workflow includes GPU count verification and weekly schedule - Profile-specific validation allows multi-GPU splits for large models - Separate workflows optimize CI efficiency: quick tests vs. thorough tests
This commit is contained in:
79
.github/workflows/tesla-k80-multi-gpu-tests.yml
vendored
Normal file
79
.github/workflows/tesla-k80-multi-gpu-tests.yml
vendored
Normal file
@@ -0,0 +1,79 @@
|
||||
name: Tesla K80 Multi-GPU Tests
|
||||
|
||||
on:
|
||||
workflow_dispatch: # Manual trigger only
|
||||
schedule:
|
||||
# Run weekly on Sundays at 2 AM UTC (less frequent than single-GPU tests)
|
||||
- cron: '0 2 * * 0'
|
||||
|
||||
jobs:
|
||||
multi-gpu-test:
|
||||
runs-on: self-hosted
|
||||
|
||||
timeout-minutes: 90 # Longer timeout for large models
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Verify ollama binary exists
|
||||
run: |
|
||||
if [ ! -f ./ollama ]; then
|
||||
echo "Error: ollama binary not found. Please run the build workflow first."
|
||||
exit 1
|
||||
fi
|
||||
ls -lh ollama
|
||||
|
||||
- name: Verify multi-GPU setup
|
||||
run: |
|
||||
nvidia-smi --list-gpus
|
||||
GPU_COUNT=$(nvidia-smi --list-gpus | wc -l)
|
||||
if [ "$GPU_COUNT" -lt 2 ]; then
|
||||
echo "Error: Multi-GPU tests require at least 2 GPUs. Found: $GPU_COUNT"
|
||||
exit 1
|
||||
fi
|
||||
echo "Found $GPU_COUNT GPUs - proceeding with multi-GPU tests"
|
||||
|
||||
- name: Build test-runner
|
||||
run: |
|
||||
cd cmd/test-runner
|
||||
go mod init github.com/ollama/ollama/cmd/test-runner || true
|
||||
go mod tidy
|
||||
go build -o ../../test-runner .
|
||||
cd ../..
|
||||
ls -lh test-runner
|
||||
|
||||
- name: Validate multi-GPU test configuration
|
||||
run: |
|
||||
./test-runner validate --config test/config/models.yaml --profile multi-gpu
|
||||
|
||||
- name: Run multi-GPU tests
|
||||
run: |
|
||||
./test-runner run --profile multi-gpu --config test/config/models.yaml --output test-report-multi-gpu --verbose
|
||||
timeout-minutes: 60
|
||||
|
||||
- name: Check multi-GPU test results
|
||||
run: |
|
||||
if ! jq -e '.summary.failed == 0' test-report-multi-gpu.json; then
|
||||
echo "Multi-GPU tests failed!"
|
||||
jq '.results[] | select(.status == "FAILED")' test-report-multi-gpu.json
|
||||
exit 1
|
||||
fi
|
||||
echo "All multi-GPU tests passed!"
|
||||
|
||||
- name: Display GPU memory usage
|
||||
if: always()
|
||||
run: |
|
||||
echo "=== Final GPU Memory State ==="
|
||||
nvidia-smi
|
||||
|
||||
- name: Upload multi-GPU test results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: multi-gpu-test-results
|
||||
path: |
|
||||
test-report-multi-gpu.json
|
||||
test-report-multi-gpu.md
|
||||
ollama.log
|
||||
retention-days: 30 # Keep longer for analysis
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Tesla K80 Tests
|
||||
name: Tesla K80 Single-GPU Tests
|
||||
|
||||
on:
|
||||
workflow_dispatch: # Manual trigger only
|
||||
@@ -24,19 +24,40 @@ profiles:
|
||||
max_response_tokens: 100
|
||||
timeout: 120s
|
||||
|
||||
# Stress test profile - larger models and longer prompts
|
||||
stress:
|
||||
timeout: 60m
|
||||
# Multi-GPU test profile - test models requiring 2x Tesla K80s
|
||||
multi-gpu:
|
||||
timeout: 45m
|
||||
models:
|
||||
- name: gemma3:12b
|
||||
- name: gemma3:27b
|
||||
prompts:
|
||||
- "Write a detailed explanation of how neural networks work, focusing on backpropagation."
|
||||
- "Describe the architecture of a transformer model in detail."
|
||||
min_response_tokens: 50
|
||||
max_response_tokens: 1000
|
||||
- "Hello, respond with a brief greeting."
|
||||
min_response_tokens: 5
|
||||
max_response_tokens: 100
|
||||
timeout: 300s
|
||||
- name: gpt-oss:20b
|
||||
prompts:
|
||||
- "Hello, respond with a brief greeting."
|
||||
min_response_tokens: 5
|
||||
max_response_tokens: 100
|
||||
timeout: 240s
|
||||
validation:
|
||||
# Override single_gpu_preferred for multi-GPU tests
|
||||
gpu_required: true
|
||||
single_gpu_preferred: false
|
||||
check_patterns:
|
||||
success:
|
||||
- "loaded model"
|
||||
- "offload.*GPU"
|
||||
- "CUDA backend"
|
||||
- "split.*layer.*GPU" # Expect multi-GPU split
|
||||
failure:
|
||||
- "CUDA.*error"
|
||||
- "out of memory"
|
||||
- "OOM"
|
||||
- "CPU backend"
|
||||
- "failed to load"
|
||||
|
||||
# Validation rules applied to all tests
|
||||
# Validation rules applied to all tests (unless overridden in profile)
|
||||
validation:
|
||||
# Require GPU acceleration (fail if CPU fallback detected)
|
||||
gpu_required: true
|
||||
|
||||
Reference in New Issue
Block a user