Files
ollama37/.github/workflows/full-pipeline.yml
Shang Chieh Tseng 1a185f7926 Add comprehensive Ollama log checking and configurable LLM judge mode
Test case enhancements:
- TC-RUNTIME-001: Add startup log error checking (CUDA, CUBLAS, CPU fallback)
- TC-RUNTIME-002: Add GPU detection verification, CUDA init checks, error detection
- TC-RUNTIME-003: Add server listening verification, runtime error checks
- TC-INFERENCE-001: Add model loading logs, layer offload verification
- TC-INFERENCE-002: Add inference error checking (CUBLAS/CUDA errors)
- TC-INFERENCE-003: Add API request log verification, response time display

Workflow enhancements:
- Add judge_mode input (simple/llm/dual) to all workflows
- Add judge_model input to specify LLM model for judging
- Configurable via GitHub Actions UI without code changes

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-16 23:27:57 +08:00

253 lines
7.3 KiB
YAML

name: Full Pipeline
on:
workflow_dispatch: # Manual trigger
inputs:
judge_mode:
description: 'Test judge mode'
required: false
default: 'simple'
type: choice
options:
- 'simple'
- 'llm'
- 'dual'
judge_model:
description: 'LLM model for judging (if llm/dual mode)'
required: false
default: 'gemma3:4b'
type: string
skip_llm_judge_stage:
description: 'Skip separate LLM judge evaluation stage'
required: false
default: 'true'
type: choice
options:
- 'true'
- 'false'
env:
OLLAMA_HOST: http://localhost:11434
jobs:
build:
name: Build Verification
uses: ./.github/workflows/build.yml
with:
judge_mode: ${{ inputs.judge_mode || 'simple' }}
judge_model: ${{ inputs.judge_model || 'gemma3:4b' }}
start-container:
name: Start Container
runs-on: self-hosted
needs: build
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Start container
run: |
cd docker
docker compose down 2>/dev/null || true
docker compose up -d
echo "Waiting for container to be ready..."
sleep 10
- name: Verify container health
run: |
docker ps
curl -s http://localhost:11434/api/tags || echo "Ollama not ready yet, continuing..."
runtime:
name: Runtime Tests
runs-on: self-hosted
needs: start-container
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Install test runner dependencies
run: cd tests && npm ci
- name: Run runtime tests
id: runtime-tests
run: |
cd tests
# Build judge flags based on input
JUDGE_FLAGS=""
if [ "${{ inputs.judge_mode }}" = "simple" ] || [ -z "${{ inputs.judge_mode }}" ]; then
JUDGE_FLAGS="--no-llm"
elif [ "${{ inputs.judge_mode }}" = "dual" ]; then
JUDGE_FLAGS="--dual-judge --judge-model ${{ inputs.judge_model || 'gemma3:4b' }}"
else
# llm mode
JUDGE_FLAGS="--judge-model ${{ inputs.judge_model || 'gemma3:4b' }}"
fi
echo "Judge mode: ${{ inputs.judge_mode || 'simple' }}"
echo "Judge flags: $JUDGE_FLAGS"
npm run --silent dev -- run --suite runtime $JUDGE_FLAGS --output json > /tmp/runtime-results.json || true
echo "--- JSON Results ---"
cat /tmp/runtime-results.json
- name: Check test results
run: |
FAILED=$(jq '.summary.failed' /tmp/runtime-results.json)
echo "Failed tests: $FAILED"
if [ "$FAILED" -gt 0 ]; then
echo "::error::$FAILED runtime test(s) failed"
exit 1
fi
- name: Upload runtime results
uses: actions/upload-artifact@v4
if: always()
with:
name: runtime-test-results
path: /tmp/runtime-results.json
inference:
name: Inference Tests
runs-on: self-hosted
needs: runtime
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Install test runner dependencies
run: cd tests && npm ci
- name: Run inference tests
id: inference-tests
run: |
cd tests
# Build judge flags based on input
JUDGE_FLAGS=""
if [ "${{ inputs.judge_mode }}" = "simple" ] || [ -z "${{ inputs.judge_mode }}" ]; then
JUDGE_FLAGS="--no-llm"
elif [ "${{ inputs.judge_mode }}" = "dual" ]; then
JUDGE_FLAGS="--dual-judge --judge-model ${{ inputs.judge_model || 'gemma3:4b' }}"
else
# llm mode
JUDGE_FLAGS="--judge-model ${{ inputs.judge_model || 'gemma3:4b' }}"
fi
echo "Judge mode: ${{ inputs.judge_mode || 'simple' }}"
echo "Judge flags: $JUDGE_FLAGS"
npm run --silent dev -- run --suite inference $JUDGE_FLAGS --output json > /tmp/inference-results.json || true
echo "--- JSON Results ---"
cat /tmp/inference-results.json
- name: Check test results
run: |
FAILED=$(jq '.summary.failed' /tmp/inference-results.json)
echo "Failed tests: $FAILED"
if [ "$FAILED" -gt 0 ]; then
echo "::error::$FAILED inference test(s) failed"
exit 1
fi
- name: Upload inference results
uses: actions/upload-artifact@v4
if: always()
with:
name: inference-test-results
path: /tmp/inference-results.json
llm-judge:
name: LLM Judge Evaluation
runs-on: self-hosted
needs: [build, runtime, inference]
if: ${{ inputs.skip_llm_judge_stage != 'true' }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Install test runner dependencies
run: cd tests && npm ci
- name: Download all test results
uses: actions/download-artifact@v4
with:
path: /tmp/results
- name: Run LLM judge on all results
run: |
cd tests
echo "Running LLM judge evaluation..."
echo "Using model: ${{ inputs.judge_model || 'gemma3:4b' }}"
npm run --silent dev -- run --judge-model ${{ inputs.judge_model || 'gemma3:4b' }} --output json > /tmp/llm-judged-results.json || true
echo "--- JSON Results ---"
cat /tmp/llm-judged-results.json
- name: Check test results
run: |
FAILED=$(jq '.summary.failed' /tmp/llm-judged-results.json)
echo "Failed tests: $FAILED"
if [ "$FAILED" -gt 0 ]; then
echo "::error::$FAILED test(s) failed LLM evaluation"
exit 1
fi
- name: Upload final results
uses: actions/upload-artifact@v4
if: always()
with:
name: llm-judged-results
path: /tmp/llm-judged-results.json
cleanup:
name: Cleanup & Summary
runs-on: self-hosted
needs: [build, runtime, inference, llm-judge]
if: always()
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Stop Container
run: |
cd docker
docker compose down || true
echo "Container stopped"
- name: Summary
run: |
echo "## Full Pipeline Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Stage | Status |" >> $GITHUB_STEP_SUMMARY
echo "|-------|--------|" >> $GITHUB_STEP_SUMMARY
echo "| Build Verification | ${{ needs.build.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| Runtime Tests | ${{ needs.runtime.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| Inference Tests | ${{ needs.inference.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| LLM Judge | ${{ needs.llm-judge.result }} |" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "Commit: ${{ github.sha }}" >> $GITHUB_STEP_SUMMARY