model: handle multiple eos tokens (#10577)

* get eos_token_id from generation_config.json

* refactor

* include both ids and strings in trace

* comments

* remove special case for gemma3 special vocab (#10743)
This commit is contained in:
Michael Yang
2025-05-16 13:40:23 -07:00
committed by GitHub
parent 27da2cddc5
commit 333e360422
18 changed files with 282 additions and 182 deletions

17
model/textprocessor.go Normal file
View File

@@ -0,0 +1,17 @@
package model
const (
TOKEN_TYPE_NORMAL = iota + 1
TOKEN_TYPE_UNKNOWN
TOKEN_TYPE_CONTROL
TOKEN_TYPE_USER_DEFINED
TOKEN_TYPE_UNUSED
TOKEN_TYPE_BYTE
)
type TextProcessor interface {
Encode(s string, addSpecial bool) ([]int32, error)
Decode([]int32) (string, error)
Is(int32, Special) bool
Vocabulary() *Vocabulary
}