llama: Improve error handling

Check for NULL return values from llama.cpp in more places and
convert them into Go errors, which should make debugging easier
in the future rather than having hidden surprises in our data
structures.
This commit is contained in:
Jesse Gross
2024-11-01 15:50:53 -07:00
committed by Jesse Gross
parent a103dae01e
commit 312d9de1d1
4 changed files with 98 additions and 40 deletions

View File

@@ -63,9 +63,9 @@ func (c *ImageContext) Free(modelPath string) {
}
}
func (c *ImageContext) NewEmbed(llamaContext *llama.Context, data []byte, aspectRatioId int) [][]float32 {
func (c *ImageContext) NewEmbed(llamaContext *llama.Context, data []byte, aspectRatioId int) ([][]float32, error) {
if c == nil {
return nil
return nil, nil
}
hash := c.hashImage(data)
@@ -76,17 +76,23 @@ func (c *ImageContext) NewEmbed(llamaContext *llama.Context, data []byte, aspect
embed, err := c.findImage(hash)
if err != nil {
if c.mllama != nil {
embed = c.mllama.NewEmbed(llamaContext, data, aspectRatioId)
embed, err = c.mllama.NewEmbed(llamaContext, data, aspectRatioId)
if err != nil {
return nil, err
}
} else if c.clip != nil {
embed = c.clip.NewEmbed(llamaContext, data)
embed, err = c.clip.NewEmbed(llamaContext, data)
if err != nil {
return nil, err
}
} else {
return nil
return nil, errors.New("received image but vision model not loaded")
}
c.addImage(hash, embed)
}
return embed
return embed, nil
}
func (c *ImageContext) BatchSize(configuredBatchSize int) int {

View File

@@ -131,7 +131,10 @@ func (s *Server) NewSequence(prompt string, images []ImageData, params NewSequen
var sc *llama.SamplingContext
if params.samplingParams != nil {
sc = llama.NewSamplingContext(s.model, *params.samplingParams)
sc, err = llama.NewSamplingContext(s.model, *params.samplingParams)
if err != nil {
return nil, err
}
for _, input := range inputs {
if input.embed == nil {
sc.Accept(input.token, false)
@@ -194,7 +197,11 @@ func (s *Server) inputs(prompt string, images []ImageData) ([]input, error) {
return nil, fmt.Errorf("invalid image index: %d", n)
}
embed := s.image.NewEmbed(s.lc, images[imageIndex].Data, images[imageIndex].AspectRatioID)
embed, err := s.image.NewEmbed(s.lc, images[imageIndex].Data, images[imageIndex].AspectRatioID)
if err != nil {
return nil, err
}
for _, e := range embed {
inputs = append(inputs, input{embed: e})
}
@@ -305,13 +312,19 @@ func (s *Server) run(ctx context.Context) {
// Logically these batches are used only within the context of processBatch
// but it is better for performance to allocate them once here
tokenBatch := llama.NewBatch(s.batchSize, len(s.seqs), 0)
tokenBatch, err := llama.NewBatch(s.batchSize, len(s.seqs), 0)
if err != nil {
panic(err)
}
defer tokenBatch.Free()
var embedBatch *llama.Batch
embedBatchSize := s.image.BatchSize(s.batchSize)
if embedBatchSize != 0 {
embedBatch = llama.NewBatch(embedBatchSize, len(s.seqs), s.image.EmbedSize(s.lc))
embedBatch, err = llama.NewBatch(embedBatchSize, len(s.seqs), s.image.EmbedSize(s.lc))
if err != nil {
panic(err)
}
defer embedBatch.Free()
} else {
embedBatch = &llama.Batch{}