mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-11 16:26:59 +00:00
llama: Improve error handling
Check for NULL return values from llama.cpp in more places and convert them into Go errors, which should make debugging easier in the future rather than having hidden surprises in our data structures.
This commit is contained in:
@@ -63,9 +63,9 @@ func (c *ImageContext) Free(modelPath string) {
|
||||
}
|
||||
}
|
||||
|
||||
func (c *ImageContext) NewEmbed(llamaContext *llama.Context, data []byte, aspectRatioId int) [][]float32 {
|
||||
func (c *ImageContext) NewEmbed(llamaContext *llama.Context, data []byte, aspectRatioId int) ([][]float32, error) {
|
||||
if c == nil {
|
||||
return nil
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
hash := c.hashImage(data)
|
||||
@@ -76,17 +76,23 @@ func (c *ImageContext) NewEmbed(llamaContext *llama.Context, data []byte, aspect
|
||||
embed, err := c.findImage(hash)
|
||||
if err != nil {
|
||||
if c.mllama != nil {
|
||||
embed = c.mllama.NewEmbed(llamaContext, data, aspectRatioId)
|
||||
embed, err = c.mllama.NewEmbed(llamaContext, data, aspectRatioId)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else if c.clip != nil {
|
||||
embed = c.clip.NewEmbed(llamaContext, data)
|
||||
embed, err = c.clip.NewEmbed(llamaContext, data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
return nil
|
||||
return nil, errors.New("received image but vision model not loaded")
|
||||
}
|
||||
|
||||
c.addImage(hash, embed)
|
||||
}
|
||||
|
||||
return embed
|
||||
return embed, nil
|
||||
}
|
||||
|
||||
func (c *ImageContext) BatchSize(configuredBatchSize int) int {
|
||||
|
||||
@@ -131,7 +131,10 @@ func (s *Server) NewSequence(prompt string, images []ImageData, params NewSequen
|
||||
|
||||
var sc *llama.SamplingContext
|
||||
if params.samplingParams != nil {
|
||||
sc = llama.NewSamplingContext(s.model, *params.samplingParams)
|
||||
sc, err = llama.NewSamplingContext(s.model, *params.samplingParams)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, input := range inputs {
|
||||
if input.embed == nil {
|
||||
sc.Accept(input.token, false)
|
||||
@@ -194,7 +197,11 @@ func (s *Server) inputs(prompt string, images []ImageData) ([]input, error) {
|
||||
return nil, fmt.Errorf("invalid image index: %d", n)
|
||||
}
|
||||
|
||||
embed := s.image.NewEmbed(s.lc, images[imageIndex].Data, images[imageIndex].AspectRatioID)
|
||||
embed, err := s.image.NewEmbed(s.lc, images[imageIndex].Data, images[imageIndex].AspectRatioID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, e := range embed {
|
||||
inputs = append(inputs, input{embed: e})
|
||||
}
|
||||
@@ -305,13 +312,19 @@ func (s *Server) run(ctx context.Context) {
|
||||
|
||||
// Logically these batches are used only within the context of processBatch
|
||||
// but it is better for performance to allocate them once here
|
||||
tokenBatch := llama.NewBatch(s.batchSize, len(s.seqs), 0)
|
||||
tokenBatch, err := llama.NewBatch(s.batchSize, len(s.seqs), 0)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer tokenBatch.Free()
|
||||
|
||||
var embedBatch *llama.Batch
|
||||
embedBatchSize := s.image.BatchSize(s.batchSize)
|
||||
if embedBatchSize != 0 {
|
||||
embedBatch = llama.NewBatch(embedBatchSize, len(s.seqs), s.image.EmbedSize(s.lc))
|
||||
embedBatch, err = llama.NewBatch(embedBatchSize, len(s.seqs), s.image.EmbedSize(s.lc))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer embedBatch.Free()
|
||||
} else {
|
||||
embedBatch = &llama.Batch{}
|
||||
|
||||
Reference in New Issue
Block a user