gpt-tokenizer

Version:

A pure JavaScript implementation of a BPE tokenizer (Encoder/Decoder) for GPT-2 / GPT-3 / GPT-4 and other OpenAI models

24 lines (19 loc) • 746 B

text/typescript

/** * @vitest-environment edge-runtime */ // eslint-disable-next-line import/no-extraneous-dependencies import { describe, expect, test } from 'vitest' import { GptEncoding } from './GptEncoding.js' import { resolveEncoding } from './resolveEncoding.js' describe('edge-runtime', () => { const encoding = GptEncoding.getEncodingApi('o200k_base', resolveEncoding) const { decode, encode, isWithinTokenLimit } = encoding test('simple text', () => { const str = 'This is some text' const encoded = [2_500, 382, 1_236, 2_201] expect(encode(str)).toEqual(encoded) expect(decode(encode(str))).toEqual(str) expect(isWithinTokenLimit(str, 3)).toBe(false) expect(isWithinTokenLimit(str, 5)).toBe(encoded.length) }) })