diff --git a/.github/workflows/full-pipeline.yml b/.github/workflows/full-pipeline.yml index f21e2ae9..bc84b321 100644 --- a/.github/workflows/full-pipeline.yml +++ b/.github/workflows/full-pipeline.yml @@ -165,6 +165,13 @@ jobs: exit 1 fi + - name: Unload test model from VRAM + if: always() + run: | + echo "Unloading gemma3:4b from VRAM..." + curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:4b","keep_alive":0}' || true + echo "Model unloaded" + - name: Upload inference results uses: actions/upload-artifact@v4 if: always() diff --git a/.github/workflows/inference.yml b/.github/workflows/inference.yml index 175fe01f..097de288 100644 --- a/.github/workflows/inference.yml +++ b/.github/workflows/inference.yml @@ -111,6 +111,13 @@ jobs: exit 1 fi + - name: Unload test model from VRAM + if: always() + run: | + echo "Unloading gemma3:4b from VRAM..." + curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:4b","keep_alive":0}' || true + echo "Model unloaded" + - name: Upload inference results uses: actions/upload-artifact@v4 if: always() diff --git a/tests/src/cli.ts b/tests/src/cli.ts index cfca48d2..beb79a40 100644 --- a/tests/src/cli.ts +++ b/tests/src/cli.ts @@ -169,6 +169,11 @@ program } } + // Unload judge model from VRAM if LLM was used + if (options.llm !== false) { + await judge.unloadModel(); + } + // Create reports (with separate verdicts in dual-judge mode) const reports = Reporter.createReports( results, diff --git a/tests/src/judge.ts b/tests/src/judge.ts index 8aafc559..71a47fd4 100644 --- a/tests/src/judge.ts +++ b/tests/src/judge.ts @@ -175,6 +175,30 @@ Respond ONLY with the JSON array, no other text.`; } } + // Unload the judge model from VRAM to free memory for other tests + async unloadModel(): Promise { + try { + process.stderr.write( + ` Unloading judge model ${this.model} from VRAM...\n`, + ); + await axios.post( + `${this.ollamaUrl}/api/generate`, + { + model: this.model, + keep_alive: 0, + }, + { + timeout: 30000, + }, + ); + process.stderr.write(` Judge model unloaded.\n`); + } catch (error) { + process.stderr.write( + ` Warning: Failed to unload judge model: ${error}\n`, + ); + } + } + // Fallback: Simple rule-based judgment (no LLM) simpleJudge(result: TestResult): Judgment { const allStepsPassed = result.steps.every((s) => s.exitCode === 0);