move tokenizers to separate package (#13825)

This commit is contained in:
Michael Yang
2026-02-05 17:44:11 -08:00
committed by GitHub
parent 8a4b77f9da
commit f1373193dc
79 changed files with 200 additions and 6720 deletions

17
tokenizer/tokenizer.go Normal file
View File

@@ -0,0 +1,17 @@
package tokenizer
const (
TOKEN_TYPE_NORMAL = iota + 1
TOKEN_TYPE_UNKNOWN
TOKEN_TYPE_CONTROL
TOKEN_TYPE_USER_DEFINED
TOKEN_TYPE_UNUSED
TOKEN_TYPE_BYTE
)
type Tokenizer interface {
Encode(s string, addSpecial bool) ([]int32, error)
Decode([]int32) (string, error)
Is(int32, Special) bool
Vocabulary() *Vocabulary
}