UNPKG

gpt-tokenizer

Version:

A pure JavaScript implementation of a BPE tokenizer (Encoder/Decoder) for GPT-2 / GPT-3 / GPT-4 and other OpenAI models

18 lines (15 loc) 501 B
export function getMaxValueFromMap(map: Map<unknown, number>): number { let max = 0 map.forEach((val) => { max = Math.max(max, val) }) return max } export function escapeRegExp(string: string) { return string.replace(/[$()*+.?[\\\]^{|}]/g, '\\$&') // $& means the whole matched string } export function getSpecialTokenRegex(tokens: Set<string>): RegExp { const escapedTokens = [...tokens].map(escapeRegExp) const inner = escapedTokens.join('|') return new RegExp(`(${inner})`) }