name: Inference Tests on: workflow_dispatch: # Manual trigger inputs: use_existing_container: description: 'Use existing running container' required: false default: 'false' type: choice options: - 'true' - 'false' judge_mode: description: 'Test judge mode' required: false default: 'simple' type: choice options: - 'simple' - 'llm' - 'dual' judge_model: description: 'LLM model for judging (if llm/dual mode)' required: false default: 'gemma3:4b' type: string workflow_call: # Called by other workflows inputs: use_existing_container: description: 'Container is already running' required: false default: false type: boolean judge_mode: description: 'Test judge mode (simple, llm, dual)' required: false default: 'simple' type: string judge_model: description: 'LLM model for judging' required: false default: 'gemma3:4b' type: string outputs: result: description: "Inference test result" value: ${{ jobs.inference.outputs.result }} env: OLLAMA_HOST: http://localhost:11434 jobs: inference: name: Inference Tests runs-on: self-hosted outputs: result: ${{ steps.inference-tests.outcome }} steps: - name: Checkout uses: actions/checkout@v4 - name: Setup Node.js uses: actions/setup-node@v4 with: node-version: '20' - name: Install test runner dependencies run: cd tests && npm ci - name: Start container (if needed) if: ${{ inputs.use_existing_container != 'true' && inputs.use_existing_container != true }} run: | cd docker docker compose down 2>/dev/null || true docker compose up -d sleep 10 - name: Run inference tests id: inference-tests run: | cd tests # Build judge flags based on input JUDGE_FLAGS="" if [ "${{ inputs.judge_mode }}" = "simple" ] || [ -z "${{ inputs.judge_mode }}" ]; then JUDGE_FLAGS="--no-llm" elif [ "${{ inputs.judge_mode }}" = "dual" ]; then JUDGE_FLAGS="--dual-judge --judge-model ${{ inputs.judge_model || 'gemma3:4b' }}" else # llm mode JUDGE_FLAGS="--judge-model ${{ inputs.judge_model || 'gemma3:4b' }}" fi echo "Judge mode: ${{ inputs.judge_mode || 'simple' }}" echo "Judge flags: $JUDGE_FLAGS" # Progress goes to stderr (visible), JSON results go to file npm run --silent dev -- run --suite inference $JUDGE_FLAGS --output json > /tmp/inference-results.json || true echo "--- JSON Results ---" cat /tmp/inference-results.json - name: Check test results run: | FAILED=$(jq '.summary.failed' /tmp/inference-results.json) echo "Failed tests: $FAILED" if [ "$FAILED" -gt 0 ]; then echo "::error::$FAILED inference test(s) failed" exit 1 fi - name: Upload inference results uses: actions/upload-artifact@v4 if: always() with: name: inference-test-results path: /tmp/inference-results.json - name: Stop container (if we started it) if: ${{ always() && inputs.use_existing_container != 'true' && inputs.use_existing_container != true }} run: | cd docker docker compose down || true echo "Container stopped"