UNPKG

gpt-tokenizer

Version:

A pure JavaScript implementation of a BPE tokenizer (Encoder/Decoder) for GPT-2 / GPT-3 / GPT-4 and other OpenAI models

23 lines 907 B
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.Cl100KBase = Cl100KBase; const specialTokens_js_1 = require("../specialTokens.js"); const constants_js_1 = require("./constants.js"); function Cl100KBase(bytePairRankDecoder) { const specialTokenMapping = new Map([ [specialTokens_js_1.EndOfText, 100_257], [specialTokens_js_1.FimPrefix, 100_258], [specialTokens_js_1.FimMiddle, 100_259], [specialTokens_js_1.FimSuffix, 100_260], [specialTokens_js_1.ImStart, 100_264], [specialTokens_js_1.ImEnd, 100_265], [specialTokens_js_1.ImSep, 100_266], [specialTokens_js_1.EndOfPrompt, 100_276], ]); return { tokenSplitRegex: constants_js_1.CL_AND_O_TOKEN_SPLIT_PATTERN, bytePairRankDecoder, specialTokensEncoder: specialTokenMapping, }; } //# sourceMappingURL=cl100k_base.js.map