gpt-tokenizer
Version:
A pure JavaScript implementation of a BPE tokenizer (Encoder/Decoder) for GPT-2 / GPT-3 / GPT-4 and other OpenAI models
20 lines • 708 B
JavaScript
import { EndOfPrompt, EndOfText, FimMiddle, FimPrefix, FimSuffix, ImEnd, ImSep, ImStart, } from '../specialTokens.js';
import { CL_AND_O_TOKEN_SPLIT_PATTERN } from './constants.js';
export function O200KBase(bytePairRankDecoder) {
const specialTokenMapping = new Map([
[ ],
[ ],
[ ],
[ ],
[ ],
[ ],
[ ],
[ ],
]);
return {
tokenSplitRegex: CL_AND_O_TOKEN_SPLIT_PATTERN,
bytePairRankDecoder,
specialTokensEncoder: specialTokenMapping,
};
}
//# sourceMappingURL=o200k_base.js.map