mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-22 05:37:01 +00:00
Unload models from VRAM after use to free GPU memory
- Add unloadModel() method to LLMJudge class - CLI calls unloadModel() after judging completes - Workflows unload gemma3:4b after inference tests - Uses Ollama API with keep_alive:0 to trigger unload
This commit is contained in:
7
.github/workflows/full-pipeline.yml
vendored
7
.github/workflows/full-pipeline.yml
vendored
@@ -165,6 +165,13 @@ jobs:
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
- name: Unload test model from VRAM
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
echo "Unloading gemma3:4b from VRAM..."
|
||||||
|
curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:4b","keep_alive":0}' || true
|
||||||
|
echo "Model unloaded"
|
||||||
|
|
||||||
- name: Upload inference results
|
- name: Upload inference results
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
if: always()
|
if: always()
|
||||||
|
|||||||
7
.github/workflows/inference.yml
vendored
7
.github/workflows/inference.yml
vendored
@@ -111,6 +111,13 @@ jobs:
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
- name: Unload test model from VRAM
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
echo "Unloading gemma3:4b from VRAM..."
|
||||||
|
curl -s http://localhost:11434/api/generate -d '{"model":"gemma3:4b","keep_alive":0}' || true
|
||||||
|
echo "Model unloaded"
|
||||||
|
|
||||||
- name: Upload inference results
|
- name: Upload inference results
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
if: always()
|
if: always()
|
||||||
|
|||||||
@@ -169,6 +169,11 @@ program
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Unload judge model from VRAM if LLM was used
|
||||||
|
if (options.llm !== false) {
|
||||||
|
await judge.unloadModel();
|
||||||
|
}
|
||||||
|
|
||||||
// Create reports (with separate verdicts in dual-judge mode)
|
// Create reports (with separate verdicts in dual-judge mode)
|
||||||
const reports = Reporter.createReports(
|
const reports = Reporter.createReports(
|
||||||
results,
|
results,
|
||||||
|
|||||||
@@ -175,6 +175,30 @@ Respond ONLY with the JSON array, no other text.`;
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Unload the judge model from VRAM to free memory for other tests
|
||||||
|
async unloadModel(): Promise<void> {
|
||||||
|
try {
|
||||||
|
process.stderr.write(
|
||||||
|
` Unloading judge model ${this.model} from VRAM...\n`,
|
||||||
|
);
|
||||||
|
await axios.post(
|
||||||
|
`${this.ollamaUrl}/api/generate`,
|
||||||
|
{
|
||||||
|
model: this.model,
|
||||||
|
keep_alive: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
timeout: 30000,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
process.stderr.write(` Judge model unloaded.\n`);
|
||||||
|
} catch (error) {
|
||||||
|
process.stderr.write(
|
||||||
|
` Warning: Failed to unload judge model: ${error}\n`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Fallback: Simple rule-based judgment (no LLM)
|
// Fallback: Simple rule-based judgment (no LLM)
|
||||||
simpleJudge(result: TestResult): Judgment {
|
simpleJudge(result: TestResult): Judgment {
|
||||||
const allStepsPassed = result.steps.every((s) => s.exitCode === 0);
|
const allStepsPassed = result.steps.every((s) => s.exitCode === 0);
|
||||||
|
|||||||
Reference in New Issue
Block a user