gpt-tokenizer
Version:
A pure JavaScript implementation of a BPE tokenizer (Encoder/Decoder) for GPT-2 / GPT-3 / GPT-4 and other OpenAI models
16 lines • 500 B
JavaScript
export function getMaxValueFromMap(map) {
let max = 0;
map.forEach((val) => {
max = Math.max(max, val);
});
return max;
}
export function escapeRegExp(string) {
return string.replace(/[$()*+.?[\\\]^{|}]/g, '\\$&'); // $& means the whole matched string
}
export function getSpecialTokenRegex(tokens) {
const escapedTokens = [...tokens].map(escapeRegExp);
const inner = escapedTokens.join('|');
return new RegExp(`(${inner})`);
}
//# sourceMappingURL=util.js.map