mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 15:57:04 +00:00
- Replace actions/download-artifact@v4 with dawidd6/action-download-artifact@v6 - The default download-artifact action only works within same workflow run - Third-party action enables downloading artifacts from different workflow - Both test workflows now download from latest successful tesla-k80-ci.yml run
87 lines
2.6 KiB
YAML
87 lines
2.6 KiB
YAML
name: Tesla K80 Multi-GPU Tests
|
|
|
|
on:
|
|
workflow_dispatch: # Manual trigger only
|
|
schedule:
|
|
# Run weekly on Sundays at 2 AM UTC (less frequent than single-GPU tests)
|
|
- cron: "0 2 * * 0"
|
|
|
|
jobs:
|
|
multi-gpu-test:
|
|
runs-on: self-hosted
|
|
|
|
timeout-minutes: 90 # Longer timeout for large models
|
|
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Download ollama binary from latest build
|
|
uses: dawidd6/action-download-artifact@v6
|
|
with:
|
|
workflow: tesla-k80-ci.yml
|
|
name: ollama-binary
|
|
github_token: ${{ secrets.GITHUB_TOKEN }}
|
|
check_artifacts: true
|
|
search_artifacts: true
|
|
|
|
- name: Make ollama binary executable
|
|
run: |
|
|
chmod +x ollama
|
|
ls -lh ollama
|
|
./ollama --version
|
|
|
|
- name: Verify multi-GPU setup
|
|
run: |
|
|
nvidia-smi --list-gpus
|
|
GPU_COUNT=$(nvidia-smi --list-gpus | wc -l)
|
|
if [ "$GPU_COUNT" -lt 2 ]; then
|
|
echo "Error: Multi-GPU tests require at least 2 GPUs. Found: $GPU_COUNT"
|
|
exit 1
|
|
fi
|
|
echo "Found $GPU_COUNT GPUs - proceeding with multi-GPU tests"
|
|
|
|
- name: Build test-runner
|
|
run: |
|
|
cd cmd/test-runner
|
|
go mod init github.com/ollama/ollama/cmd/test-runner || true
|
|
go mod tidy
|
|
go build -o ../../test-runner .
|
|
cd ../..
|
|
ls -lh test-runner
|
|
|
|
- name: Validate multi-GPU test configuration
|
|
run: |
|
|
./test-runner validate --config test/config/models.yaml --profile multi-gpu
|
|
|
|
- name: Run multi-GPU tests
|
|
run: |
|
|
./test-runner run --profile multi-gpu --config test/config/models.yaml --output test-report-multi-gpu --verbose
|
|
timeout-minutes: 60
|
|
|
|
- name: Check multi-GPU test results
|
|
run: |
|
|
if ! jq -e '.summary.failed == 0' test-report-multi-gpu.json; then
|
|
echo "Multi-GPU tests failed!"
|
|
jq '.results[] | select(.status == "FAILED")' test-report-multi-gpu.json
|
|
exit 1
|
|
fi
|
|
echo "All multi-GPU tests passed!"
|
|
|
|
- name: Display GPU memory usage
|
|
if: always()
|
|
run: |
|
|
echo "=== Final GPU Memory State ==="
|
|
nvidia-smi
|
|
|
|
- name: Upload multi-GPU test results
|
|
if: always()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: multi-gpu-test-results
|
|
path: |
|
|
test-report-multi-gpu.json
|
|
test-report-multi-gpu.md
|
|
ollama.log
|
|
retention-days: 30 # Keep longer for analysis
|