gpt-tokenizer
Version:
A pure JavaScript implementation of a BPE tokenizer (Encoder/Decoder) for GPT-2 / GPT-3 / GPT-4 and other OpenAI models
9 lines (8 loc) • 348 B
text/typescript
export const EndOfText = '<|endoftext|>'
export const FimPrefix = '<|fim_prefix|>'
export const FimMiddle = '<|fim_middle|>'
export const FimSuffix = '<|fim_suffix|>'
export const ImStart = '<|im_start|>' // 100264
export const ImEnd = '<|im_end|>' // 100265
export const ImSep = '<|im_sep|>' // 100266
export const EndOfPrompt = '<|endofprompt|>'