pass model and predict options

2025-12-10 15:57:04 +00:00 · 2023-07-06 17:09:48 -07:00
parent 7974ad58ff
commit 3f1b7177f2
3 changed files with 151 additions and 16 deletions
--- a/api/types.go
+++ b/api/types.go
@@ -31,6 +31,92 @@ type PullProgress struct {
 type GenerateRequest struct {
 	Model  string `json:"model"`
 	Prompt string `json:"prompt"`
+
+	ModelOptions   `json:"model_opts"`
+	PredictOptions `json:"predict_opts"`
+}
+
+type ModelOptions struct {
+	ContextSize int    `json:"context_size"`
+	Seed        int    `json:"seed"`
+	NBatch      int    `json:"n_batch"`
+	F16Memory   bool   `json:"memory_f16"`
+	MLock       bool   `json:"mlock"`
+	MMap        bool   `json:"mmap"`
+	VocabOnly   bool   `json:"vocab_only"`
+	LowVRAM     bool   `json:"low_vram"`
+	Embeddings  bool   `json:"embeddings"`
+	NUMA        bool   `json:"numa"`
+	NGPULayers  int    `json:"gpu_layers"`
+	MainGPU     string `json:"main_gpu"`
+	TensorSplit string `json:"tensor_split"`
+}
+
+type PredictOptions struct {
+	Seed        int     `json:"seed"`
+	Threads     int     `json:"threads"`
+	Tokens      int     `json:"tokens"`
+	TopK        int     `json:"top_k"`
+	Repeat      int     `json:"repeat"`
+	Batch       int     `json:"batch"`
+	NKeep       int     `json:"nkeep"`
+	TopP        float64 `json:"top_p"`
+	Temperature float64 `json:"temp"`
+	Penalty     float64 `json:"penalty"`
+	F16KV       bool
+	DebugMode   bool
+	StopPrompts []string
+	IgnoreEOS   bool `json:"ignore_eos"`
+
+	TailFreeSamplingZ float64 `json:"tfs_z"`
+	TypicalP          float64 `json:"typical_p"`
+	FrequencyPenalty  float64 `json:"freq_penalty"`
+	PresencePenalty   float64 `json:"pres_penalty"`
+	Mirostat          int     `json:"mirostat"`
+	MirostatETA       float64 `json:"mirostat_lr"`
+	MirostatTAU       float64 `json:"mirostat_ent"`
+	PenalizeNL        bool    `json:"penalize_nl"`
+	LogitBias         string  `json:"logit_bias"`
+
+	PathPromptCache string
+	MLock           bool `json:"mlock"`
+	MMap            bool `json:"mmap"`
+	PromptCacheAll  bool
+	PromptCacheRO   bool
+	MainGPU         string
+	TensorSplit     string
+}
+
+var DefaultModelOptions ModelOptions = ModelOptions{
+	ContextSize: 128,
+	Seed:        0,
+	F16Memory:   true,
+	MLock:       false,
+	Embeddings:  true,
+	MMap:        true,
+	LowVRAM:     false,
+}
+
+var DefaultPredictOptions PredictOptions = PredictOptions{
+	Seed:              -1,
+	Threads:           -1,
+	Tokens:            512,
+	Penalty:           1.1,
+	Repeat:            64,
+	Batch:             512,
+	NKeep:             64,
+	TopK:              90,
+	TopP:              0.86,
+	TailFreeSamplingZ: 1.0,
+	TypicalP:          1.0,
+	Temperature:       0.8,
+	FrequencyPenalty:  0.0,
+	PresencePenalty:   0.0,
+	Mirostat:          0,
+	MirostatTAU:       5.0,
+	MirostatETA:       0.1,
+	MMap:              true,
+	StopPrompts:       []string{"llama"},
 }

 type GenerateResponse struct {