Fix runtime test log checks that require model loading

- Remove CUDA initialization checks from TC-RUNTIME-002 (ggml_cuda_init, load_backend only appear when a model is loaded, not at startup) - Fix bash integer comparison error in TC-RUNTIME-003 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-17 11:17:11 +00:00 · 2025-12-17 00:00:24 +08:00
parent 11329c5ee8
commit ce2882b757
2 changed files with 5 additions and 29 deletions
--- a/tests/testcases/runtime/TC-RUNTIME-002.yml
+++ b/tests/testcases/runtime/TC-RUNTIME-002.yml
@@ -58,31 +58,6 @@ steps:
        echo "WARNING: Compute capability 3.7 not detected"
      fi
  - name: Verify CUDA initialization in logs
    command: |
      cd docker
      LOGS=$(docker compose logs 2>&1)
      echo "=== CUDA Initialization Check ==="
      # Check ggml_cuda_init
      if echo "$LOGS" | grep -q "ggml_cuda_init: found"; then
        echo "SUCCESS: CUDA initialized"
        echo "$LOGS" | grep "ggml_cuda_init: found" | head -1
      else
        echo "ERROR: CUDA not initialized"
        exit 1
      fi
      # Check CUDA backend loaded
      if echo "$LOGS" | grep -q "load_backend: loaded CUDA backend"; then
        echo "SUCCESS: CUDA backend loaded"
        echo "$LOGS" | grep "load_backend: loaded CUDA backend" | head -1
      else
        echo "ERROR: CUDA backend not loaded"
        exit 1
      fi
  - name: Check for GPU-related errors in logs
    command: |
      cd docker
@@ -121,12 +96,13 @@ criteria: |
  - CUDA libraries are available (libcuda, libcublas, etc.)
  - /dev/nvidia-uvm device file exists (required for CUDA runtime)
  - Ollama logs show "inference compute" with "library=CUDA"
  - Ollama logs show "ggml_cuda_init: found N CUDA devices"
  - Ollama logs show "load_backend: loaded CUDA backend"
  - NO "id=cpu library=cpu" (CPU fallback)
  - NO CUBLAS_STATUS_ errors
  - NO CUDA error messages
  NOTE: "ggml_cuda_init" and "load_backend" only appear when a model is loaded,
  so they are checked in inference tests, not here.
  NOTE: If nvidia-smi works but Ollama shows only CPU, the UVM device
  files are missing. The test will auto-fix with nvidia-modprobe -u -c=0.
--- a/tests/testcases/runtime/TC-RUNTIME-003.yml
+++ b/tests/testcases/runtime/TC-RUNTIME-003.yml
@@ -52,8 +52,8 @@ steps:
      echo "=== Runtime Error Check ==="
      # Check for any ERROR level logs
-      ERROR_COUNT=$(echo "$LOGS" | grep -c "level=ERROR" || echo "0")
+      ERROR_COUNT=$(echo "$LOGS" | grep -c "level=ERROR" || true)
-      if [ "$ERROR_COUNT" -gt 0 ]; then
+      if [ -n "$ERROR_COUNT" ] && [ "$ERROR_COUNT" -gt 0 ] 2>/dev/null; then
        echo "WARNING: Found $ERROR_COUNT ERROR level log entries:"
        echo "$LOGS" | grep "level=ERROR" | tail -5
      else