diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 73adf952..d6935e58 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -37,9 +37,12 @@ jobs: echo "--- JSON Results ---" cat /tmp/build-results.json - # Check if any tests failed - if grep -q '"pass": false' /tmp/build-results.json; then - echo "Some build tests failed" + - name: Check test results + run: | + FAILED=$(jq '.summary.failed' /tmp/build-results.json) + echo "Failed tests: $FAILED" + if [ "$FAILED" -gt 0 ]; then + echo "::error::$FAILED build test(s) failed" exit 1 fi diff --git a/.github/workflows/full-pipeline.yml b/.github/workflows/full-pipeline.yml index 81c92d3d..a29dcf13 100644 --- a/.github/workflows/full-pipeline.yml +++ b/.github/workflows/full-pipeline.yml @@ -67,6 +67,15 @@ jobs: echo "--- JSON Results ---" cat /tmp/runtime-results.json + - name: Check test results + run: | + FAILED=$(jq '.summary.failed' /tmp/runtime-results.json) + echo "Failed tests: $FAILED" + if [ "$FAILED" -gt 0 ]; then + echo "::error::$FAILED runtime test(s) failed" + exit 1 + fi + - name: Upload runtime results uses: actions/upload-artifact@v4 if: always() @@ -100,6 +109,15 @@ jobs: echo "--- JSON Results ---" cat /tmp/inference-results.json + - name: Check test results + run: | + FAILED=$(jq '.summary.failed' /tmp/inference-results.json) + echo "Failed tests: $FAILED" + if [ "$FAILED" -gt 0 ]; then + echo "::error::$FAILED inference test(s) failed" + exit 1 + fi + - name: Upload inference results uses: actions/upload-artifact@v4 if: always() @@ -139,6 +157,15 @@ jobs: echo "--- JSON Results ---" cat /tmp/llm-judged-results.json + - name: Check test results + run: | + FAILED=$(jq '.summary.failed' /tmp/llm-judged-results.json) + echo "Failed tests: $FAILED" + if [ "$FAILED" -gt 0 ]; then + echo "::error::$FAILED test(s) failed LLM evaluation" + exit 1 + fi + - name: Upload final results uses: actions/upload-artifact@v4 if: always() diff --git a/.github/workflows/inference.yml b/.github/workflows/inference.yml index 173bad8d..f1b5de43 100644 --- a/.github/workflows/inference.yml +++ b/.github/workflows/inference.yml @@ -63,6 +63,15 @@ jobs: echo "--- JSON Results ---" cat /tmp/inference-results.json + - name: Check test results + run: | + FAILED=$(jq '.summary.failed' /tmp/inference-results.json) + echo "Failed tests: $FAILED" + if [ "$FAILED" -gt 0 ]; then + echo "::error::$FAILED inference test(s) failed" + exit 1 + fi + - name: Upload inference results uses: actions/upload-artifact@v4 if: always() diff --git a/.github/workflows/runtime.yml b/.github/workflows/runtime.yml index fde739c3..70b863f0 100644 --- a/.github/workflows/runtime.yml +++ b/.github/workflows/runtime.yml @@ -59,6 +59,15 @@ jobs: echo "--- JSON Results ---" cat /tmp/runtime-results.json + - name: Check test results + run: | + FAILED=$(jq '.summary.failed' /tmp/runtime-results.json) + echo "Failed tests: $FAILED" + if [ "$FAILED" -gt 0 ]; then + echo "::error::$FAILED runtime test(s) failed" + exit 1 + fi + - name: Upload runtime results uses: actions/upload-artifact@v4 if: always()