gpt-tokenizer
Version:
A pure JavaScript implementation of a BPE tokenizer (Encoder/Decoder) for GPT-2 / GPT-3 / GPT-4 and other OpenAI models
41 lines (39 loc) • 869 B
text/typescript
/* eslint-disable import/extensions */
import bpeRanks from '../bpeRanks/cl100k_base.js'
import { GptEncoding } from '../GptEncoding.js'
export * from '../constants.js'
export * from '../specialTokens.js'
// prettier-ignore
const api = GptEncoding.getEncodingApiForModel('gpt-3.5-turbo-0301', () => bpeRanks)
const {
decode,
decodeAsyncGenerator,
decodeGenerator,
encode,
encodeGenerator,
isWithinTokenLimit,
countTokens,
encodeChat,
encodeChatGenerator,
vocabularySize,
setMergeCacheSize,
clearMergeCache,
estimateCost,
} = api
export {
clearMergeCache,
countTokens,
decode,
decodeAsyncGenerator,
decodeGenerator,
encode,
encodeChat,
encodeChatGenerator,
encodeGenerator,
estimateCost,
isWithinTokenLimit,
setMergeCacheSize,
vocabularySize,
}
// eslint-disable-next-line import/no-default-export
export default api