@yogesh0333/yogiway-prompt
Version:
Free & Open Source Prompt Optimization Library - Save 30-50% on AI API costs. Multi-language, multi-platform support.
88 lines (87 loc) • 3.07 kB
JavaScript
;
/**
* Universal Token Counter
* Supports multiple LLM providers with accurate token counting
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.countTokens = countTokens;
exports.getTokenCount = getTokenCount;
exports.estimateOutputTokens = estimateOutputTokens;
// Token-to-character ratios (approximate, varies by model)
const TOKEN_RATIOS = {
openai: 4, // ~4 chars per token (GPT-3.5/4)
anthropic: 3.5, // ~3.5 chars per token (Claude)
google: 4, // ~4 chars per token (Gemini)
cohere: 4, // ~4 chars per token
mistral: 4, // ~4 chars per token
meta: 4, // ~4 chars per token
xai: 4, // ~4 chars per token
perplexity: 4, // ~4 chars per token
custom: 4, // Default
};
// Pricing per 1M tokens (as of 2024, update as needed)
const PRICING = {
openai: { input: 0.5, output: 1.5 }, // GPT-4
anthropic: { input: 3.0, output: 15.0 }, // Claude 3 Opus
google: { input: 0.25, output: 0.5 }, // Gemini Pro
cohere: { input: 0.15, output: 0.6 },
mistral: { input: 0.2, output: 0.6 },
meta: { input: 0.1, output: 0.3 }, // Llama models
xai: { input: 0.3, output: 0.6 }, // Grok models
perplexity: { input: 0.2, output: 0.4 }, // Perplexity models
custom: { input: 0.5, output: 1.5 },
};
/**
* Count tokens accurately for different providers
*/
function countTokens(text, provider = 'openai') {
if (!text)
return 0;
// More accurate counting for common cases
const ratio = TOKEN_RATIOS[provider];
// Count words (better approximation)
const words = text.trim().split(/\s+/).length;
// Count characters
const chars = text.length;
// Use word-based estimation (more accurate)
// Average English word is ~4.5 chars, ~1.3 tokens
const wordBasedTokens = Math.ceil(words * 1.3);
// Use char-based estimation as fallback
const charBasedTokens = Math.ceil(chars / ratio);
// Return average for better accuracy
return Math.ceil((wordBasedTokens + charBasedTokens) / 2);
}
/**
* Get detailed token count with cost estimation
*/
function getTokenCount(text, provider = 'openai', model) {
const tokens = countTokens(text, provider);
const characters = text.length;
const words = text.trim().split(/\s+/).length;
const pricing = PRICING[provider];
const estimatedCost = (tokens / 1000000) * pricing.input;
// Calculate for all providers for comparison
const breakdown = Object.keys(PRICING).map((p) => {
const pTokens = countTokens(text, p);
const pPricing = PRICING[p];
return {
provider: p,
tokens: pTokens,
cost: (pTokens / 1000000) * pPricing.input,
};
});
return {
tokens,
characters,
words,
estimatedCost,
breakdown,
};
}
/**
* Estimate output tokens (typically 20-30% of input for completions)
*/
function estimateOutputTokens(inputTokens, provider = 'openai') {
// Most models generate 20-30% of input tokens
return Math.ceil(inputTokens * 0.25);
}