mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 15:57:04 +00:00
improve api error handling (#781)
- remove new lines from llama.cpp error messages relayed to client - check api option types and return error on wrong type - change num layers from 95% VRAM to 92% VRAM
This commit is contained in:
@@ -238,8 +238,8 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
|
||||
// TODO: this is a rough heuristic, better would be to calculate this based on number of layers and context size
|
||||
bytesPerLayer := fileSizeBytes / numLayer
|
||||
|
||||
// max number of layers we can fit in VRAM, subtract 5% to prevent consuming all available VRAM and running out of memory
|
||||
layers := int(freeVramBytes/bytesPerLayer) * 95 / 100
|
||||
// max number of layers we can fit in VRAM, subtract 8% to prevent consuming all available VRAM and running out of memory
|
||||
layers := int(freeVramBytes/bytesPerLayer) * 92 / 100
|
||||
log.Printf("%d MiB VRAM available, loading up to %d GPU layers", vramMib, layers)
|
||||
|
||||
return layers
|
||||
@@ -261,8 +261,7 @@ func NewStatusWriter() *StatusWriter {
|
||||
|
||||
func (w *StatusWriter) Write(b []byte) (int, error) {
|
||||
if _, after, ok := bytes.Cut(b, []byte("error:")); ok {
|
||||
err := fmt.Errorf("llama runner: %s", after)
|
||||
w.ErrCh <- err
|
||||
w.ErrCh <- fmt.Errorf("llama runner: %s", bytes.TrimSpace(after))
|
||||
}
|
||||
return os.Stderr.Write(b)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user