diff --git a/tests/testcases/inference/TC-INFERENCE-004.yml b/tests/testcases/inference/TC-INFERENCE-004.yml deleted file mode 100644 index c2ee5e33..00000000 --- a/tests/testcases/inference/TC-INFERENCE-004.yml +++ /dev/null @@ -1,32 +0,0 @@ -id: TC-INFERENCE-004 -name: CUBLAS Fallback Verification -suite: inference -priority: 4 -timeout: 120000 - -dependencies: - - TC-INFERENCE-002 - -steps: - - name: Check for CUBLAS errors in logs - command: cd docker && docker compose logs 2>&1 | grep -i "CUBLAS_STATUS" | grep -v "SUCCESS" | head -10 || echo "No CUBLAS errors" - - - name: Check compute capability detection - command: cd docker && docker compose logs 2>&1 | grep -iE "compute|capability|cc.*3" | head -10 || echo "No compute capability logs" - - - name: Verify no GPU errors - command: cd docker && docker compose logs 2>&1 | grep -iE "error|fail" | grep -i gpu | head -10 || echo "No GPU errors" - -criteria: | - CUBLAS should work correctly on Tesla K80 using legacy fallback. - - Expected: - - No CUBLAS_STATUS_ARCH_MISMATCH errors - - No CUBLAS_STATUS_NOT_SUPPORTED errors - - Compute capability 3.7 may be mentioned in debug logs - - No fatal GPU-related errors - - The K80 uses legacy CUBLAS functions (cublasSgemmBatched) - instead of modern Ex variants. This should work transparently. - - Accept warnings. Only fail on actual CUBLAS errors.