Files
ollama37/.github/workflows/full-pipeline.yml
Shang Chieh Tseng 0e66cc6f93 Fix workflows to fail on test failures
The '|| true' was swallowing test runner exit codes, causing workflows
to pass even when tests failed. Added separate 'Check test results'
step that reads JSON summary and fails workflow if any tests failed.

Affected workflows:
- build.yml
- runtime.yml
- inference.yml
- full-pipeline.yml

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-15 21:48:40 +08:00

204 lines
5.5 KiB
YAML

name: Full Pipeline
on:
workflow_dispatch: # Manual trigger
inputs:
skip_llm_judge:
description: 'Skip LLM judge evaluation'
required: false
default: 'false'
type: choice
options:
- 'true'
- 'false'
env:
OLLAMA_HOST: http://localhost:11434
jobs:
build:
name: Build Verification
uses: ./.github/workflows/build.yml
start-container:
name: Start Container
runs-on: self-hosted
needs: build
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Start container
run: |
cd docker
docker compose down 2>/dev/null || true
docker compose up -d
echo "Waiting for container to be ready..."
sleep 10
- name: Verify container health
run: |
docker ps
curl -s http://localhost:11434/api/tags || echo "Ollama not ready yet, continuing..."
runtime:
name: Runtime Tests
runs-on: self-hosted
needs: start-container
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Install test runner dependencies
run: cd tests && npm ci
- name: Run runtime tests
id: runtime-tests
run: |
cd tests
npm run --silent dev -- run --suite runtime --no-llm --output json > /tmp/runtime-results.json || true
echo "--- JSON Results ---"
cat /tmp/runtime-results.json
- name: Check test results
run: |
FAILED=$(jq '.summary.failed' /tmp/runtime-results.json)
echo "Failed tests: $FAILED"
if [ "$FAILED" -gt 0 ]; then
echo "::error::$FAILED runtime test(s) failed"
exit 1
fi
- name: Upload runtime results
uses: actions/upload-artifact@v4
if: always()
with:
name: runtime-test-results
path: /tmp/runtime-results.json
inference:
name: Inference Tests
runs-on: self-hosted
needs: runtime
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Install test runner dependencies
run: cd tests && npm ci
- name: Run inference tests
id: inference-tests
run: |
cd tests
npm run --silent dev -- run --suite inference --no-llm --output json > /tmp/inference-results.json || true
echo "--- JSON Results ---"
cat /tmp/inference-results.json
- name: Check test results
run: |
FAILED=$(jq '.summary.failed' /tmp/inference-results.json)
echo "Failed tests: $FAILED"
if [ "$FAILED" -gt 0 ]; then
echo "::error::$FAILED inference test(s) failed"
exit 1
fi
- name: Upload inference results
uses: actions/upload-artifact@v4
if: always()
with:
name: inference-test-results
path: /tmp/inference-results.json
llm-judge:
name: LLM Judge Evaluation
runs-on: self-hosted
needs: [build, runtime, inference]
if: ${{ inputs.skip_llm_judge != 'true' }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Install test runner dependencies
run: cd tests && npm ci
- name: Download all test results
uses: actions/download-artifact@v4
with:
path: /tmp/results
- name: Run LLM judge on all results
run: |
cd tests
echo "Running LLM judge evaluation..."
npm run --silent dev -- run --output json > /tmp/llm-judged-results.json || true
echo "--- JSON Results ---"
cat /tmp/llm-judged-results.json
- name: Check test results
run: |
FAILED=$(jq '.summary.failed' /tmp/llm-judged-results.json)
echo "Failed tests: $FAILED"
if [ "$FAILED" -gt 0 ]; then
echo "::error::$FAILED test(s) failed LLM evaluation"
exit 1
fi
- name: Upload final results
uses: actions/upload-artifact@v4
if: always()
with:
name: llm-judged-results
path: /tmp/llm-judged-results.json
cleanup:
name: Cleanup & Summary
runs-on: self-hosted
needs: [build, runtime, inference, llm-judge]
if: always()
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Stop Container
run: |
cd docker
docker compose down || true
echo "Container stopped"
- name: Summary
run: |
echo "## Full Pipeline Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Stage | Status |" >> $GITHUB_STEP_SUMMARY
echo "|-------|--------|" >> $GITHUB_STEP_SUMMARY
echo "| Build Verification | ${{ needs.build.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| Runtime Tests | ${{ needs.runtime.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| Inference Tests | ${{ needs.inference.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| LLM Judge | ${{ needs.llm-judge.result }} |" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "Commit: ${{ github.sha }}" >> $GITHUB_STEP_SUMMARY