diff --git a/tests/testcases/inference/TC-INFERENCE-003.yml b/tests/testcases/inference/TC-INFERENCE-003.yml index 30a33faa..81616e01 100644 --- a/tests/testcases/inference/TC-INFERENCE-003.yml +++ b/tests/testcases/inference/TC-INFERENCE-003.yml @@ -79,6 +79,13 @@ steps: echo "Recent API requests:" echo "$LOGS" | grep '\[GIN\]' | tail -5 + - name: Unload model after 4b tests complete + command: | + echo "Unloading gemma3:4b from VRAM..." + curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:4b","keep_alive":0}' || true + sleep 2 + echo "Model unloaded" + criteria: | Ollama REST API should handle inference requests. diff --git a/tests/testcases/inference/TC-INFERENCE-004.yml b/tests/testcases/inference/TC-INFERENCE-004.yml index 8623942f..5898895d 100644 --- a/tests/testcases/inference/TC-INFERENCE-004.yml +++ b/tests/testcases/inference/TC-INFERENCE-004.yml @@ -8,13 +8,6 @@ dependencies: - TC-INFERENCE-003 steps: - - name: Unload previous model from VRAM - command: | - echo "Unloading any loaded models..." - curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:4b","keep_alive":0}' || true - sleep 2 - echo "Previous model unloaded" - - name: Check if gemma3:12b model exists command: docker exec ollama37 ollama list | grep -q "gemma3:12b" && echo "Model exists" || echo "Model not found" diff --git a/tests/testcases/inference/TC-INFERENCE-005.yml b/tests/testcases/inference/TC-INFERENCE-005.yml index 84ec6fb6..43ddbb07 100644 --- a/tests/testcases/inference/TC-INFERENCE-005.yml +++ b/tests/testcases/inference/TC-INFERENCE-005.yml @@ -8,13 +8,6 @@ dependencies: - TC-INFERENCE-004 steps: - - name: Unload previous model from VRAM - command: | - echo "Unloading any loaded models..." - curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:12b","keep_alive":0}' || true - sleep 2 - echo "Previous model unloaded" - - name: Verify dual GPU availability command: | echo "=== GPU Configuration ==="