@yogesh0333/yogiway-prompt
Version:
Free & Open Source Prompt Optimization Library - Save 30-50% on AI API costs. Multi-language, multi-platform support.
69 lines (68 loc) • 2.82 kB
JavaScript
;
/**
* Improved Token Counter with Better Accuracy
* Uses multiple estimation methods for better accuracy
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.countTokensImproved = countTokensImproved;
exports.getTokenCountRange = getTokenCountRange;
// More accurate token-to-character ratios based on real data
const TOKEN_RATIOS = {
openai: { min: 3.5, avg: 4.0, max: 4.5 }, // GPT models
anthropic: { min: 3.0, avg: 3.5, max: 4.0 }, // Claude models
google: { min: 3.5, avg: 4.0, max: 4.5 }, // Gemini models
cohere: { min: 3.5, avg: 4.0, max: 4.5 },
mistral: { min: 3.5, avg: 4.0, max: 4.5 },
custom: { min: 3.5, avg: 4.0, max: 4.5 },
};
// Word-to-token ratios (more accurate for English)
const WORD_TOKEN_RATIOS = {
openai: 1.3, // ~1.3 tokens per word
anthropic: 1.4, // ~1.4 tokens per word
google: 1.3,
cohere: 1.3,
mistral: 1.3,
custom: 1.3,
};
/**
* Improved token counting with multiple methods
*/
function countTokensImproved(text, provider = 'openai') {
if (!text)
return 0;
const ratio = TOKEN_RATIOS[provider];
const wordRatio = WORD_TOKEN_RATIOS[provider];
// Method 1: Word-based (most accurate for English)
const words = text.trim().split(/\s+/).filter(w => w.length > 0);
const wordBasedTokens = Math.ceil(words.length * wordRatio);
// Method 2: Character-based (fallback)
const chars = text.length;
const charBasedTokens = Math.ceil(chars / ratio.avg);
// Method 3: Hybrid (weighted average)
// Words are more reliable, so weight them higher
const hybridTokens = Math.ceil((wordBasedTokens * 0.7) + (charBasedTokens * 0.3));
// Method 4: Account for special characters
const specialChars = (text.match(/[^\w\s]/g) || []).length;
const specialCharTokens = Math.ceil(specialChars * 0.5); // Special chars often = 1 token
// Final estimate: hybrid + special chars
return Math.max(1, hybridTokens + specialCharTokens);
}
/**
* Get token count range (min, avg, max) for better accuracy indication
*/
function getTokenCountRange(text, provider = 'openai') {
if (!text)
return { min: 0, avg: 0, max: 0 };
const ratio = TOKEN_RATIOS[provider];
const wordRatio = WORD_TOKEN_RATIOS[provider];
const words = text.trim().split(/\s+/).filter(w => w.length > 0);
const chars = text.length;
const wordBased = Math.ceil(words.length * wordRatio);
const charBasedMin = Math.ceil(chars / ratio.max);
const charBasedAvg = Math.ceil(chars / ratio.avg);
const charBasedMax = Math.ceil(chars / ratio.min);
const min = Math.min(wordBased, charBasedMin);
const avg = Math.ceil((wordBased * 0.7) + (charBasedAvg * 0.3));
const max = Math.max(wordBased, charBasedMax);
return { min, avg, max };
}