mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-21 21:26:59 +00:00
Add comprehensive Ollama log checking and configurable LLM judge mode
Test case enhancements: - TC-RUNTIME-001: Add startup log error checking (CUDA, CUBLAS, CPU fallback) - TC-RUNTIME-002: Add GPU detection verification, CUDA init checks, error detection - TC-RUNTIME-003: Add server listening verification, runtime error checks - TC-INFERENCE-001: Add model loading logs, layer offload verification - TC-INFERENCE-002: Add inference error checking (CUBLAS/CUDA errors) - TC-INFERENCE-003: Add API request log verification, response time display Workflow enhancements: - Add judge_mode input (simple/llm/dual) to all workflows - Add judge_model input to specify LLM model for judging - Configurable via GitHub Actions UI without code changes 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
41
.github/workflows/runtime.yml
vendored
41
.github/workflows/runtime.yml
vendored
@@ -11,6 +11,20 @@ on:
|
||||
options:
|
||||
- 'true'
|
||||
- 'false'
|
||||
judge_mode:
|
||||
description: 'Test judge mode'
|
||||
required: false
|
||||
default: 'simple'
|
||||
type: choice
|
||||
options:
|
||||
- 'simple'
|
||||
- 'llm'
|
||||
- 'dual'
|
||||
judge_model:
|
||||
description: 'LLM model for judging (if llm/dual mode)'
|
||||
required: false
|
||||
default: 'gemma3:4b'
|
||||
type: string
|
||||
workflow_call: # Called by other workflows
|
||||
inputs:
|
||||
keep_container:
|
||||
@@ -18,6 +32,16 @@ on:
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
judge_mode:
|
||||
description: 'Test judge mode (simple, llm, dual)'
|
||||
required: false
|
||||
default: 'simple'
|
||||
type: string
|
||||
judge_model:
|
||||
description: 'LLM model for judging'
|
||||
required: false
|
||||
default: 'gemma3:4b'
|
||||
type: string
|
||||
outputs:
|
||||
result:
|
||||
description: "Runtime test result"
|
||||
@@ -53,8 +77,23 @@ jobs:
|
||||
id: runtime-tests
|
||||
run: |
|
||||
cd tests
|
||||
|
||||
# Build judge flags based on input
|
||||
JUDGE_FLAGS=""
|
||||
if [ "${{ inputs.judge_mode }}" = "simple" ] || [ -z "${{ inputs.judge_mode }}" ]; then
|
||||
JUDGE_FLAGS="--no-llm"
|
||||
elif [ "${{ inputs.judge_mode }}" = "dual" ]; then
|
||||
JUDGE_FLAGS="--dual-judge --judge-model ${{ inputs.judge_model || 'gemma3:4b' }}"
|
||||
else
|
||||
# llm mode
|
||||
JUDGE_FLAGS="--judge-model ${{ inputs.judge_model || 'gemma3:4b' }}"
|
||||
fi
|
||||
|
||||
echo "Judge mode: ${{ inputs.judge_mode || 'simple' }}"
|
||||
echo "Judge flags: $JUDGE_FLAGS"
|
||||
|
||||
# Progress goes to stderr (visible), JSON results go to file
|
||||
npm run --silent dev -- run --suite runtime --no-llm --output json > /tmp/runtime-results.json || true
|
||||
npm run --silent dev -- run --suite runtime $JUDGE_FLAGS --output json > /tmp/runtime-results.json || true
|
||||
|
||||
echo "--- JSON Results ---"
|
||||
cat /tmp/runtime-results.json
|
||||
|
||||
Reference in New Issue
Block a user