UNPKG

@yogesh0333/yogiway-prompt

Version:

Free & Open Source Prompt Optimization Library - Save 30-50% on AI API costs. Multi-language, multi-platform support.

69 lines (68 loc) 2.82 kB
"use strict"; /** * Improved Token Counter with Better Accuracy * Uses multiple estimation methods for better accuracy */ Object.defineProperty(exports, "__esModule", { value: true }); exports.countTokensImproved = countTokensImproved; exports.getTokenCountRange = getTokenCountRange; // More accurate token-to-character ratios based on real data const TOKEN_RATIOS = { openai: { min: 3.5, avg: 4.0, max: 4.5 }, // GPT models anthropic: { min: 3.0, avg: 3.5, max: 4.0 }, // Claude models google: { min: 3.5, avg: 4.0, max: 4.5 }, // Gemini models cohere: { min: 3.5, avg: 4.0, max: 4.5 }, mistral: { min: 3.5, avg: 4.0, max: 4.5 }, custom: { min: 3.5, avg: 4.0, max: 4.5 }, }; // Word-to-token ratios (more accurate for English) const WORD_TOKEN_RATIOS = { openai: 1.3, // ~1.3 tokens per word anthropic: 1.4, // ~1.4 tokens per word google: 1.3, cohere: 1.3, mistral: 1.3, custom: 1.3, }; /** * Improved token counting with multiple methods */ function countTokensImproved(text, provider = 'openai') { if (!text) return 0; const ratio = TOKEN_RATIOS[provider]; const wordRatio = WORD_TOKEN_RATIOS[provider]; // Method 1: Word-based (most accurate for English) const words = text.trim().split(/\s+/).filter(w => w.length > 0); const wordBasedTokens = Math.ceil(words.length * wordRatio); // Method 2: Character-based (fallback) const chars = text.length; const charBasedTokens = Math.ceil(chars / ratio.avg); // Method 3: Hybrid (weighted average) // Words are more reliable, so weight them higher const hybridTokens = Math.ceil((wordBasedTokens * 0.7) + (charBasedTokens * 0.3)); // Method 4: Account for special characters const specialChars = (text.match(/[^\w\s]/g) || []).length; const specialCharTokens = Math.ceil(specialChars * 0.5); // Special chars often = 1 token // Final estimate: hybrid + special chars return Math.max(1, hybridTokens + specialCharTokens); } /** * Get token count range (min, avg, max) for better accuracy indication */ function getTokenCountRange(text, provider = 'openai') { if (!text) return { min: 0, avg: 0, max: 0 }; const ratio = TOKEN_RATIOS[provider]; const wordRatio = WORD_TOKEN_RATIOS[provider]; const words = text.trim().split(/\s+/).filter(w => w.length > 0); const chars = text.length; const wordBased = Math.ceil(words.length * wordRatio); const charBasedMin = Math.ceil(chars / ratio.max); const charBasedAvg = Math.ceil(chars / ratio.avg); const charBasedMax = Math.ceil(chars / ratio.min); const min = Math.min(wordBased, charBasedMin); const avg = Math.ceil((wordBased * 0.7) + (charBasedAvg * 0.3)); const max = Math.max(wordBased, charBasedMax); return { min, avg, max }; }