gpt-tokenizer
Version:
A pure JavaScript implementation of a BPE tokenizer (Encoder/Decoder) for GPT-2 / GPT-3 / GPT-4 and other OpenAI models
21 lines • 695 B
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.getMaxValueFromMap = getMaxValueFromMap;
exports.escapeRegExp = escapeRegExp;
exports.getSpecialTokenRegex = getSpecialTokenRegex;
function getMaxValueFromMap(map) {
let max = 0;
map.forEach((val) => {
max = Math.max(max, val);
});
return max;
}
function escapeRegExp(string) {
return string.replace(/[$()*+.?[\\\]^{|}]/g, '\\$&'); // $& means the whole matched string
}
function getSpecialTokenRegex(tokens) {
const escapedTokens = [...tokens].map(escapeRegExp);
const inner = escapedTokens.join('|');
return new RegExp(`(${inner})`);
}
//# sourceMappingURL=util.js.map