mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-22 05:37:01 +00:00
Unload models from VRAM after use to free GPU memory
- Add unloadModel() method to LLMJudge class - CLI calls unloadModel() after judging completes - Workflows unload gemma3:4b after inference tests - Uses Ollama API with keep_alive:0 to trigger unload
This commit is contained in:
7
.github/workflows/full-pipeline.yml
vendored
7
.github/workflows/full-pipeline.yml
vendored
@@ -165,6 +165,13 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Unload test model from VRAM
|
||||
if: always()
|
||||
run: |
|
||||
echo "Unloading gemma3:4b from VRAM..."
|
||||
curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:4b","keep_alive":0}' || true
|
||||
echo "Model unloaded"
|
||||
|
||||
- name: Upload inference results
|
||||
uses: actions/upload-artifact@v4
|
||||
if: always()
|
||||
|
||||
7
.github/workflows/inference.yml
vendored
7
.github/workflows/inference.yml
vendored
@@ -111,6 +111,13 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Unload test model from VRAM
|
||||
if: always()
|
||||
run: |
|
||||
echo "Unloading gemma3:4b from VRAM..."
|
||||
curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:4b","keep_alive":0}' || true
|
||||
echo "Model unloaded"
|
||||
|
||||
- name: Upload inference results
|
||||
uses: actions/upload-artifact@v4
|
||||
if: always()
|
||||
|
||||
@@ -169,6 +169,11 @@ program
|
||||
}
|
||||
}
|
||||
|
||||
// Unload judge model from VRAM if LLM was used
|
||||
if (options.llm !== false) {
|
||||
await judge.unloadModel();
|
||||
}
|
||||
|
||||
// Create reports (with separate verdicts in dual-judge mode)
|
||||
const reports = Reporter.createReports(
|
||||
results,
|
||||
|
||||
@@ -175,6 +175,30 @@ Respond ONLY with the JSON array, no other text.`;
|
||||
}
|
||||
}
|
||||
|
||||
// Unload the judge model from VRAM to free memory for other tests
|
||||
async unloadModel(): Promise<void> {
|
||||
try {
|
||||
process.stderr.write(
|
||||
` Unloading judge model ${this.model} from VRAM...\n`,
|
||||
);
|
||||
await axios.post(
|
||||
`${this.ollamaUrl}/api/generate`,
|
||||
{
|
||||
model: this.model,
|
||||
keep_alive: 0,
|
||||
},
|
||||
{
|
||||
timeout: 30000,
|
||||
},
|
||||
);
|
||||
process.stderr.write(` Judge model unloaded.\n`);
|
||||
} catch (error) {
|
||||
process.stderr.write(
|
||||
` Warning: Failed to unload judge model: ${error}\n`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: Simple rule-based judgment (no LLM)
|
||||
simpleJudge(result: TestResult): Judgment {
|
||||
const allStepsPassed = result.steps.every((s) => s.exitCode === 0);
|
||||
|
||||
Reference in New Issue
Block a user