model: add bos token if configured

This commit is contained in:
Michael Yang
2025-02-26 11:20:51 -08:00
parent e185c08ad9
commit 53d2990d9b
5 changed files with 32 additions and 2 deletions

View File

@@ -30,7 +30,8 @@ type Vocabulary struct {
Scores []uint32
Merges []string
BOS, EOS int32
BOS, EOS int32
AddBOS, AddEOS bool
specialOnce sync.Once
special []string
@@ -281,6 +282,26 @@ func (bpe BytePairEncoding) Encode(s string) ([]int32, error) {
}
}
if len(ids) > 0 {
if bpe.vocab.AddBOS {
if ids[0] == bpe.vocab.BOS {
slog.Warn("adding bos token to prompt which already has it", "id", bpe.vocab.BOS)
}
slog.Debug("adding bos token to prompt", "id", bpe.vocab.BOS)
ids = append([]int32{bpe.vocab.BOS}, ids...)
}
if bpe.vocab.AddEOS {
if ids[len(ids)-1] == bpe.vocab.EOS {
slog.Warn("adding eos token to prompt which already has it", "id", bpe.vocab.EOS)
}
slog.Debug("adding eos token to prompt", "id", bpe.vocab.EOS)
ids = append(ids, bpe.vocab.EOS)
}
}
return ids, nil
}