tokenx

Version:

Fast and lightweight token estimation for any LLM without requiring a full tokenizer

37 lines • 1.44 kB

TypeScript

//#region src/types.d.ts /** * Configuration options for token estimation */ interface TokenEstimationOptions { /** Default average characters per token when no language-specific rule applies */ defaultCharsPerToken?: number; /** Custom language configurations to override defaults */ languageConfigs?: LanguageConfig[]; } /** * Language-specific token estimation configurations */ interface LanguageConfig { /** Regular expression to detect the language */ pattern: RegExp; /** Average number of characters per token for this language */ averageCharsPerToken: number; } //#endregion //#region src/index.d.ts /** * Checks if a text string is within a specified token limit */ declare function isWithinTokenLimit(text: string, tokenLimit: number, options?: TokenEstimationOptions): boolean; // Legacy alias for backward compatibility declare const approximateTokenSize: typeof estimateTokenCount; /** * Estimates the number of tokens in a text string using heuristic rules. */ declare function estimateTokenCount(text?: string, options?: TokenEstimationOptions): number; /** * Extracts a portion of text based on token positions, similar to Array.prototype.slice(). */ declare function sliceByTokens(text: string, start?: number, end?: number, options?: TokenEstimationOptions): string; //#endregion export { LanguageConfig, TokenEstimationOptions, approximateTokenSize, estimateTokenCount, isWithinTokenLimit, sliceByTokens };