mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-20 20:57:01 +00:00
- Add unloadModel() method to LLMJudge class - CLI calls unloadModel() after judging completes - Workflows unload gemma3:4b after inference tests - Uses Ollama API with keep_alive:0 to trigger unload
134 lines
3.9 KiB
YAML
134 lines
3.9 KiB
YAML
name: Inference Tests
|
|
|
|
on:
|
|
workflow_dispatch: # Manual trigger
|
|
inputs:
|
|
use_existing_container:
|
|
description: "Use existing running container"
|
|
required: false
|
|
default: "false"
|
|
type: choice
|
|
options:
|
|
- "true"
|
|
- "false"
|
|
judge_mode:
|
|
description: "Test judge mode"
|
|
required: false
|
|
default: "dual"
|
|
type: choice
|
|
options:
|
|
- "simple"
|
|
- "llm"
|
|
- "dual"
|
|
judge_model:
|
|
description: "LLM model for judging (if llm/dual mode)"
|
|
required: false
|
|
default: "gemma3:12b"
|
|
type: string
|
|
workflow_call: # Called by other workflows
|
|
inputs:
|
|
use_existing_container:
|
|
description: "Container is already running"
|
|
required: false
|
|
default: false
|
|
type: boolean
|
|
judge_mode:
|
|
description: "Test judge mode (simple, llm, dual)"
|
|
required: false
|
|
default: "dual"
|
|
type: string
|
|
judge_model:
|
|
description: "LLM model for judging"
|
|
required: false
|
|
default: "gemma3:12b"
|
|
type: string
|
|
outputs:
|
|
result:
|
|
description: "Inference test result"
|
|
value: ${{ jobs.inference.outputs.result }}
|
|
|
|
env:
|
|
OLLAMA_HOST: http://localhost:11434
|
|
|
|
jobs:
|
|
inference:
|
|
name: Inference Tests
|
|
runs-on: self-hosted
|
|
outputs:
|
|
result: ${{ steps.inference-tests.outcome }}
|
|
|
|
steps:
|
|
- name: Checkout
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Setup Node.js
|
|
uses: actions/setup-node@v4
|
|
with:
|
|
node-version: "20"
|
|
|
|
- name: Install test runner dependencies
|
|
run: cd tests && npm ci
|
|
|
|
- name: Start container (if needed)
|
|
if: ${{ inputs.use_existing_container != 'true' && inputs.use_existing_container != true }}
|
|
run: |
|
|
cd docker
|
|
docker compose down 2>/dev/null || true
|
|
docker compose up -d
|
|
sleep 10
|
|
|
|
- name: Run inference tests
|
|
id: inference-tests
|
|
run: |
|
|
cd tests
|
|
|
|
# Build judge flags based on input
|
|
JUDGE_FLAGS=""
|
|
if [ "${{ inputs.judge_mode }}" = "simple" ]; then
|
|
JUDGE_FLAGS="--no-llm"
|
|
elif [ "${{ inputs.judge_mode }}" = "llm" ]; then
|
|
JUDGE_FLAGS="--judge-model ${{ inputs.judge_model || 'gemma3:12b' }}"
|
|
else
|
|
# dual mode (default)
|
|
JUDGE_FLAGS="--dual-judge --judge-model ${{ inputs.judge_model || 'gemma3:12b' }}"
|
|
fi
|
|
|
|
echo "Judge mode: ${{ inputs.judge_mode || 'dual' }}"
|
|
echo "Judge flags: $JUDGE_FLAGS"
|
|
|
|
# Progress goes to stderr (visible), JSON results go to file
|
|
npm run --silent dev -- run --suite inference $JUDGE_FLAGS --output json > /tmp/inference-results.json || true
|
|
|
|
echo "--- JSON Results ---"
|
|
cat /tmp/inference-results.json
|
|
|
|
- name: Check test results
|
|
run: |
|
|
FAILED=$(jq '.summary.failed' /tmp/inference-results.json)
|
|
echo "Failed tests: $FAILED"
|
|
if [ "$FAILED" -gt 0 ]; then
|
|
echo "::error::$FAILED inference test(s) failed"
|
|
exit 1
|
|
fi
|
|
|
|
- name: Unload test model from VRAM
|
|
if: always()
|
|
run: |
|
|
echo "Unloading gemma3:4b from VRAM..."
|
|
curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:4b","keep_alive":0}' || true
|
|
echo "Model unloaded"
|
|
|
|
- name: Upload inference results
|
|
uses: actions/upload-artifact@v4
|
|
if: always()
|
|
with:
|
|
name: inference-test-results
|
|
path: /tmp/inference-results.json
|
|
|
|
- name: Stop container (if we started it)
|
|
if: ${{ always() && inputs.use_existing_container != 'true' && inputs.use_existing_container != true }}
|
|
run: |
|
|
cd docker
|
|
docker compose down || true
|
|
echo "Container stopped"
|