tokenx
Version:
Fast and lightweight token estimation for any LLM without requiring a full tokenizer
37 lines • 1.44 kB
TypeScript
//#region src/types.d.ts
/**
* Configuration options for token estimation
*/
interface TokenEstimationOptions {
/** Default average characters per token when no language-specific rule applies */
defaultCharsPerToken?: number;
/** Custom language configurations to override defaults */
languageConfigs?: LanguageConfig[];
}
/**
* Language-specific token estimation configurations
*/
interface LanguageConfig {
/** Regular expression to detect the language */
pattern: RegExp;
/** Average number of characters per token for this language */
averageCharsPerToken: number;
}
//#endregion
//#region src/index.d.ts
/**
* Checks if a text string is within a specified token limit
*/
declare function isWithinTokenLimit(text: string, tokenLimit: number, options?: TokenEstimationOptions): boolean;
// Legacy alias for backward compatibility
declare const approximateTokenSize: typeof estimateTokenCount;
/**
* Estimates the number of tokens in a text string using heuristic rules.
*/
declare function estimateTokenCount(text?: string, options?: TokenEstimationOptions): number;
/**
* Extracts a portion of text based on token positions, similar to Array.prototype.slice().
*/
declare function sliceByTokens(text: string, start?: number, end?: number, options?: TokenEstimationOptions): string;
//#endregion
export { LanguageConfig, TokenEstimationOptions, approximateTokenSize, estimateTokenCount, isWithinTokenLimit, sliceByTokens };