mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-20 12:47:00 +00:00
Unload models from VRAM after use to free GPU memory
- Add unloadModel() method to LLMJudge class - CLI calls unloadModel() after judging completes - Workflows unload gemma3:4b after inference tests - Uses Ollama API with keep_alive:0 to trigger unload
This commit is contained in:
@@ -169,6 +169,11 @@ program
|
||||
}
|
||||
}
|
||||
|
||||
// Unload judge model from VRAM if LLM was used
|
||||
if (options.llm !== false) {
|
||||
await judge.unloadModel();
|
||||
}
|
||||
|
||||
// Create reports (with separate verdicts in dual-judge mode)
|
||||
const reports = Reporter.createReports(
|
||||
results,
|
||||
|
||||
@@ -175,6 +175,30 @@ Respond ONLY with the JSON array, no other text.`;
|
||||
}
|
||||
}
|
||||
|
||||
// Unload the judge model from VRAM to free memory for other tests
|
||||
async unloadModel(): Promise<void> {
|
||||
try {
|
||||
process.stderr.write(
|
||||
` Unloading judge model ${this.model} from VRAM...\n`,
|
||||
);
|
||||
await axios.post(
|
||||
`${this.ollamaUrl}/api/generate`,
|
||||
{
|
||||
model: this.model,
|
||||
keep_alive: 0,
|
||||
},
|
||||
{
|
||||
timeout: 30000,
|
||||
},
|
||||
);
|
||||
process.stderr.write(` Judge model unloaded.\n`);
|
||||
} catch (error) {
|
||||
process.stderr.write(
|
||||
` Warning: Failed to unload judge model: ${error}\n`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: Simple rule-based judgment (no LLM)
|
||||
simpleJudge(result: TestResult): Judgment {
|
||||
const allStepsPassed = result.steps.every((s) => s.exitCode === 0);
|
||||
|
||||
Reference in New Issue
Block a user