From 82ab6cc96e9a2f1c0edfcbcedc388bd7c2eed0ab Mon Sep 17 00:00:00 2001
From: Shang Chieh Tseng <shangchieh.tseng@tsengsyu.com>
Date: Wed, 17 Dec 2025 17:20:44 +0800
Subject: [PATCH] Refactor model unload: each test cleans up its own model
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- TC-INFERENCE-003: Add unload step for gemma3:4b at end
- TC-INFERENCE-004: Remove redundant 4b unload at start
- TC-INFERENCE-005: Remove redundant 12b unload at start

Each model size test now handles its own VRAM cleanup.
Workflow-level unload remains as safety fallback for failures.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 tests/testcases/inference/TC-INFERENCE-003.yml | 7 +++++++
 tests/testcases/inference/TC-INFERENCE-004.yml | 7 -------
 tests/testcases/inference/TC-INFERENCE-005.yml | 7 -------
 3 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/tests/testcases/inference/TC-INFERENCE-003.yml b/tests/testcases/inference/TC-INFERENCE-003.yml
index 30a33faa..81616e01 100644
--- a/tests/testcases/inference/TC-INFERENCE-003.yml
+++ b/tests/testcases/inference/TC-INFERENCE-003.yml
@@ -79,6 +79,13 @@ steps:
       echo "Recent API requests:"
       echo "$LOGS" | grep '\[GIN\]' | tail -5
 
+  - name: Unload model after 4b tests complete
+    command: |
+      echo "Unloading gemma3:4b from VRAM..."
+      curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:4b","keep_alive":0}' || true
+      sleep 2
+      echo "Model unloaded"
+
 criteria: |
   Ollama REST API should handle inference requests.
 
diff --git a/tests/testcases/inference/TC-INFERENCE-004.yml b/tests/testcases/inference/TC-INFERENCE-004.yml
index 8623942f..5898895d 100644
--- a/tests/testcases/inference/TC-INFERENCE-004.yml
+++ b/tests/testcases/inference/TC-INFERENCE-004.yml
@@ -8,13 +8,6 @@ dependencies:
   - TC-INFERENCE-003
 
 steps:
-  - name: Unload previous model from VRAM
-    command: |
-      echo "Unloading any loaded models..."
-      curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:4b","keep_alive":0}' || true
-      sleep 2
-      echo "Previous model unloaded"
-
   - name: Check if gemma3:12b model exists
     command: docker exec ollama37 ollama list | grep -q "gemma3:12b" && echo "Model exists" || echo "Model not found"
 
diff --git a/tests/testcases/inference/TC-INFERENCE-005.yml b/tests/testcases/inference/TC-INFERENCE-005.yml
index 84ec6fb6..43ddbb07 100644
--- a/tests/testcases/inference/TC-INFERENCE-005.yml
+++ b/tests/testcases/inference/TC-INFERENCE-005.yml
@@ -8,13 +8,6 @@ dependencies:
   - TC-INFERENCE-004
 
 steps:
-  - name: Unload previous model from VRAM
-    command: |
-      echo "Unloading any loaded models..."
-      curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:12b","keep_alive":0}' || true
-      sleep 2
-      echo "Previous model unloaded"
-
   - name: Verify dual GPU availability
     command: |
       echo "=== GPU Configuration ==="