diff --git a/.github/workflows/full-pipeline.yml b/.github/workflows/full-pipeline.yml
index f21e2ae9..bc84b321 100644
--- a/.github/workflows/full-pipeline.yml
+++ b/.github/workflows/full-pipeline.yml
@@ -165,6 +165,13 @@ jobs:
             exit 1
           fi
 
+      - name: Unload test model from VRAM
+        if: always()
+        run: |
+          echo "Unloading gemma3:4b from VRAM..."
+          curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:4b","keep_alive":0}' || true
+          echo "Model unloaded"
+
       - name: Upload inference results
         uses: actions/upload-artifact@v4
         if: always()
diff --git a/.github/workflows/inference.yml b/.github/workflows/inference.yml
index 175fe01f..097de288 100644
--- a/.github/workflows/inference.yml
+++ b/.github/workflows/inference.yml
@@ -111,6 +111,13 @@ jobs:
             exit 1
           fi
 
+      - name: Unload test model from VRAM
+        if: always()
+        run: |
+          echo "Unloading gemma3:4b from VRAM..."
+          curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:4b","keep_alive":0}' || true
+          echo "Model unloaded"
+
       - name: Upload inference results
         uses: actions/upload-artifact@v4
         if: always()
diff --git a/tests/src/cli.ts b/tests/src/cli.ts
index cfca48d2..beb79a40 100644
--- a/tests/src/cli.ts
+++ b/tests/src/cli.ts
@@ -169,6 +169,11 @@ program
       }
     }
 
+    // Unload judge model from VRAM if LLM was used
+    if (options.llm !== false) {
+      await judge.unloadModel();
+    }
+
     // Create reports (with separate verdicts in dual-judge mode)
     const reports = Reporter.createReports(
       results,
diff --git a/tests/src/judge.ts b/tests/src/judge.ts
index 8aafc559..71a47fd4 100644
--- a/tests/src/judge.ts
+++ b/tests/src/judge.ts
@@ -175,6 +175,30 @@ Respond ONLY with the JSON array, no other text.`;
     }
   }
 
+  // Unload the judge model from VRAM to free memory for other tests
+  async unloadModel(): Promise<void> {
+    try {
+      process.stderr.write(
+        `  Unloading judge model ${this.model} from VRAM...\n`,
+      );
+      await axios.post(
+        `${this.ollamaUrl}/api/generate`,
+        {
+          model: this.model,
+          keep_alive: 0,
+        },
+        {
+          timeout: 30000,
+        },
+      );
+      process.stderr.write(`  Judge model unloaded.\n`);
+    } catch (error) {
+      process.stderr.write(
+        `  Warning: Failed to unload judge model: ${error}\n`,
+      );
+    }
+  }
+
   // Fallback: Simple rule-based judgment (no LLM)
   simpleJudge(result: TestResult): Judgment {
     const allStepsPassed = result.steps.every((s) => s.exitCode === 0);