mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 15:57:04 +00:00
sample: temporarily use grammars for constrained generation in new engine (#9586)
This commit is contained in:
@@ -254,6 +254,12 @@ type Server struct {
|
||||
// multimodalHash generates hashes for comparing equality
|
||||
// of non-text data
|
||||
multimodalHash maphash.Hash
|
||||
|
||||
// vocab is a llama.cpp vocab required for gammar-based
|
||||
// constrained generation (json mode, structured outputs)
|
||||
// TODO: this is temporary until Ollama sampling supports
|
||||
// constrained generation
|
||||
vocab *sample.Vocab
|
||||
}
|
||||
|
||||
func (s *Server) allNil() bool {
|
||||
@@ -574,18 +580,25 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
var grammar *sample.Grammar
|
||||
var err error
|
||||
if req.Grammar != "" {
|
||||
grammar, err = sample.NewGrammar(s.vocab, req.Grammar)
|
||||
if err != nil {
|
||||
http.Error(w, "failed to load model vocabulary required for format", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
sampler := sample.NewSampler(
|
||||
req.Temperature,
|
||||
req.TopK,
|
||||
req.TopP,
|
||||
req.MinP,
|
||||
req.Seed,
|
||||
grammar,
|
||||
)
|
||||
|
||||
if req.Grammar != "" {
|
||||
panic("grammars are not yet supported")
|
||||
}
|
||||
|
||||
seq, err := s.NewSequence(req.Prompt, req.Images, NewSequenceParams{
|
||||
numPredict: req.NumPredict,
|
||||
stop: req.Stop,
|
||||
@@ -797,6 +810,8 @@ func (s *Server) loadModel(
|
||||
panic(err)
|
||||
}
|
||||
|
||||
s.vocab = sample.NewVocab(mpath)
|
||||
|
||||
// TODO(jessegross): LoRA loading
|
||||
if lpath.String() != "" {
|
||||
panic("loras are not yet implemented")
|
||||
|
||||
Reference in New Issue
Block a user