gpt-tokenizer
Version:
A pure JavaScript implementation of a BPE tokenizer (Encoder/Decoder) for GPT-2 / GPT-3 / GPT-4 and other OpenAI models
24 lines (19 loc) • 746 B
text/typescript
/**
* @vitest-environment edge-runtime
*/
// eslint-disable-next-line import/no-extraneous-dependencies
import { describe, expect, test } from 'vitest'
import { GptEncoding } from './GptEncoding.js'
import { resolveEncoding } from './resolveEncoding.js'
describe('edge-runtime', () => {
const encoding = GptEncoding.getEncodingApi('o200k_base', resolveEncoding)
const { decode, encode, isWithinTokenLimit } = encoding
test('simple text', () => {
const str = 'This is some text'
const encoded = [2_500, 382, 1_236, 2_201]
expect(encode(str)).toEqual(encoded)
expect(decode(encode(str))).toEqual(str)
expect(isWithinTokenLimit(str, 3)).toBe(false)
expect(isWithinTokenLimit(str, 5)).toBe(encoded.length)
})
})