gpt-tokenizer
Version:
A pure JavaScript implementation of a BPE tokenizer (Encoder/Decoder) for GPT-2 / GPT-3 / GPT-4 and other OpenAI models
12 lines • 429 B
JavaScript
import {} from '../modelParams.js';
import { EndOfText } from '../specialTokens.js';
import { R50K_TOKEN_SPLIT_REGEX } from './constants.js';
export function P50KBase(bytePairRankDecoder) {
return {
expectedVocabularySize: 50_281,
tokenSplitRegex: R50K_TOKEN_SPLIT_REGEX,
bytePairRankDecoder,
specialTokensEncoder: new Map([[EndOfText, 50_256]]),
};
}
//# sourceMappingURL=p50k_base.js.map