llm-tokenizer
Version:
A lightweight tokenizer for OpenAI's GPT model series. Uses OpenAI's tiktoken python package
34 lines (24 loc) • 1.04 kB
TypeScript
// eslint-disable-next-line quotes
declare module "llm-tokenizer" {
export class Tokenizer {
/**
*
* @param encoding Optional. Specify your default encoding
*/
constructor(encoding?: EncodingOptions)
public countToken(text: string, encoding?: EncodingOptions): Promise<number>;
public encodeText(text: string, encoding?: EncodingOptions): Promise<number[]>;
public decodeText(encodedText: number[], encoding?: EncodingOptions): Promise<string>;
public countChatToken(conversation: {
role: `system` | `user` | `assistant`,
content: string
}[], encoding?: EncodingOptions): Promise<number>;
public getEncodingByModel(modelName: string): Promise<string>;
public getEncodingList(): string[];
public setEncoding(options: {
encodingName?: EncodingOptions,
modelName?: string
}): Promise<void>;
}
}
type EncodingOptions = `cl100k_base` | `p50k_base` | `r50k_base`;