From 82ab6cc96e9a2f1c0edfcbcedc388bd7c2eed0ab Mon Sep 17 00:00:00 2001 From: Shang Chieh Tseng Date: Wed, 17 Dec 2025 17:20:44 +0800 Subject: [PATCH] Refactor model unload: each test cleans up its own model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - TC-INFERENCE-003: Add unload step for gemma3:4b at end - TC-INFERENCE-004: Remove redundant 4b unload at start - TC-INFERENCE-005: Remove redundant 12b unload at start Each model size test now handles its own VRAM cleanup. Workflow-level unload remains as safety fallback for failures. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- tests/testcases/inference/TC-INFERENCE-003.yml | 7 +++++++ tests/testcases/inference/TC-INFERENCE-004.yml | 7 ------- tests/testcases/inference/TC-INFERENCE-005.yml | 7 ------- 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/tests/testcases/inference/TC-INFERENCE-003.yml b/tests/testcases/inference/TC-INFERENCE-003.yml index 30a33faa..81616e01 100644 --- a/tests/testcases/inference/TC-INFERENCE-003.yml +++ b/tests/testcases/inference/TC-INFERENCE-003.yml @@ -79,6 +79,13 @@ steps: echo "Recent API requests:" echo "$LOGS" | grep '\[GIN\]' | tail -5 + - name: Unload model after 4b tests complete + command: | + echo "Unloading gemma3:4b from VRAM..." + curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:4b","keep_alive":0}' || true + sleep 2 + echo "Model unloaded" + criteria: | Ollama REST API should handle inference requests. diff --git a/tests/testcases/inference/TC-INFERENCE-004.yml b/tests/testcases/inference/TC-INFERENCE-004.yml index 8623942f..5898895d 100644 --- a/tests/testcases/inference/TC-INFERENCE-004.yml +++ b/tests/testcases/inference/TC-INFERENCE-004.yml @@ -8,13 +8,6 @@ dependencies: - TC-INFERENCE-003 steps: - - name: Unload previous model from VRAM - command: | - echo "Unloading any loaded models..." - curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:4b","keep_alive":0}' || true - sleep 2 - echo "Previous model unloaded" - - name: Check if gemma3:12b model exists command: docker exec ollama37 ollama list | grep -q "gemma3:12b" && echo "Model exists" || echo "Model not found" diff --git a/tests/testcases/inference/TC-INFERENCE-005.yml b/tests/testcases/inference/TC-INFERENCE-005.yml index 84ec6fb6..43ddbb07 100644 --- a/tests/testcases/inference/TC-INFERENCE-005.yml +++ b/tests/testcases/inference/TC-INFERENCE-005.yml @@ -8,13 +8,6 @@ dependencies: - TC-INFERENCE-004 steps: - - name: Unload previous model from VRAM - command: | - echo "Unloading any loaded models..." - curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:12b","keep_alive":0}' || true - sleep 2 - echo "Previous model unloaded" - - name: Verify dual GPU availability command: | echo "=== GPU Configuration ==="