mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-20 12:47:00 +00:00
Add multi-model inference tests for gemma3 12b and 27b
- TC-INFERENCE-004: gemma3:12b single GPU test - TC-INFERENCE-005: gemma3:27b dual-GPU test (K80 layer split) - Each test unloads previous model before loading next - Workflows unload all 3 model sizes after inference suite - 27b test verifies both GPUs have memory allocated
This commit is contained in:
8
.github/workflows/full-pipeline.yml
vendored
8
.github/workflows/full-pipeline.yml
vendored
@@ -165,12 +165,14 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Unload test model from VRAM
|
||||
- name: Unload test models from VRAM
|
||||
if: always()
|
||||
run: |
|
||||
echo "Unloading gemma3:4b from VRAM..."
|
||||
echo "Unloading all test models from VRAM..."
|
||||
curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:4b","keep_alive":0}' || true
|
||||
echo "Model unloaded"
|
||||
curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:12b","keep_alive":0}' || true
|
||||
curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:27b","keep_alive":0}' || true
|
||||
echo "All models unloaded"
|
||||
|
||||
- name: Upload inference results
|
||||
uses: actions/upload-artifact@v4
|
||||
|
||||
8
.github/workflows/inference.yml
vendored
8
.github/workflows/inference.yml
vendored
@@ -111,12 +111,14 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Unload test model from VRAM
|
||||
- name: Unload test models from VRAM
|
||||
if: always()
|
||||
run: |
|
||||
echo "Unloading gemma3:4b from VRAM..."
|
||||
echo "Unloading all test models from VRAM..."
|
||||
curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:4b","keep_alive":0}' || true
|
||||
echo "Model unloaded"
|
||||
curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:12b","keep_alive":0}' || true
|
||||
curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:27b","keep_alive":0}' || true
|
||||
echo "All models unloaded"
|
||||
|
||||
- name: Upload inference results
|
||||
uses: actions/upload-artifact@v4
|
||||
|
||||
Reference in New Issue
Block a user