Unload models from VRAM after use to free GPU memory

- Add unloadModel() method to LLMJudge class
- CLI calls unloadModel() after judging completes
- Workflows unload gemma3:4b after inference tests
- Uses Ollama API with keep_alive:0 to trigger unload
This commit is contained in:
Shang Chieh Tseng
2025-12-17 16:51:12 +08:00
parent 7bb050f146
commit 22e77e0dde
4 changed files with 43 additions and 0 deletions

View File

@@ -165,6 +165,13 @@ jobs:
exit 1
fi
- name: Unload test model from VRAM
if: always()
run: |
echo "Unloading gemma3:4b from VRAM..."
curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:4b","keep_alive":0}' || true
echo "Model unloaded"
- name: Upload inference results
uses: actions/upload-artifact@v4
if: always()

View File

@@ -111,6 +111,13 @@ jobs:
exit 1
fi
- name: Unload test model from VRAM
if: always()
run: |
echo "Unloading gemma3:4b from VRAM..."
curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:4b","keep_alive":0}' || true
echo "Model unloaded"
- name: Upload inference results
uses: actions/upload-artifact@v4
if: always()

View File

@@ -169,6 +169,11 @@ program
}
}
// Unload judge model from VRAM if LLM was used
if (options.llm !== false) {
await judge.unloadModel();
}
// Create reports (with separate verdicts in dual-judge mode)
const reports = Reporter.createReports(
results,

View File

@@ -175,6 +175,30 @@ Respond ONLY with the JSON array, no other text.`;
}
}
// Unload the judge model from VRAM to free memory for other tests
async unloadModel(): Promise<void> {
try {
process.stderr.write(
` Unloading judge model ${this.model} from VRAM...\n`,
);
await axios.post(
`${this.ollamaUrl}/api/generate`,
{
model: this.model,
keep_alive: 0,
},
{
timeout: 30000,
},
);
process.stderr.write(` Judge model unloaded.\n`);
} catch (error) {
process.stderr.write(
` Warning: Failed to unload judge model: ${error}\n`,
);
}
}
// Fallback: Simple rule-based judgment (no LLM)
simpleJudge(result: TestResult): Judgment {
const allStepsPassed = result.steps.every((s) => s.exitCode === 0);