@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
143 lines • 5.47 kB
JavaScript
/**
* Token Estimation Utilities
*
* Provides character-based token estimation with per-provider adjustment
* multipliers. Uses the same approach as Continue (gpt-tokenizer baseline
* + provider multipliers) but without requiring a tokenizer dependency.
*
* Multiplier sources: Continue project's getAdjustedTokenCount.ts
* - Anthropic: 1.23x (Anthropic tokenizer produces ~23% more tokens)
* - Google (AI Studio / Vertex): 1.18x
* - Mistral/Codestral: 1.26x
* - OpenAI/GPT: 1.0x (baseline)
*/
/** Characters per token for English text */
export const CHARS_PER_TOKEN = 4;
/** Characters per token for code */
export const CODE_CHARS_PER_TOKEN = 3;
/**
* Safety margin: additive fraction of baseTokens added to the provider-adjusted estimate.
* Using additive margin prevents compounding with provider multipliers.
*
* Old behavior: baseTokens * providerMultiplier * 1.15 (compounding)
* e.g. Anthropic: baseTokens * 1.23 * 1.15 = baseTokens * 1.4145
* New behavior: baseTokens * providerMultiplier + baseTokens * 0.05 (additive)
* e.g. Anthropic: baseTokens * 1.23 + baseTokens * 0.05 = baseTokens * 1.28
*/
export const TOKEN_SAFETY_MARGIN_ADDITIVE = 0.05;
/** @deprecated Use TOKEN_SAFETY_MARGIN_ADDITIVE instead. Kept for backward compatibility. */
export const TOKEN_SAFETY_MARGIN = 1.15;
/** Message framing overhead in tokens (role + delimiters) */
export const TOKENS_PER_MESSAGE = 4;
/** Conversation-level overhead in tokens */
export const TOKENS_PER_CONVERSATION = 24;
/** Image token estimate (flat) */
export const IMAGE_TOKEN_ESTIMATE = 1_024;
/**
* Per-provider token multipliers.
* Applied on top of the base GPT-style character estimate.
*/
const PROVIDER_MULTIPLIERS = {
anthropic: 1.23,
"google-ai": 1.18,
vertex: 1.18,
mistral: 1.26,
openai: 1.0,
azure: 1.0,
bedrock: 1.23, // Bedrock is mostly Anthropic models
ollama: 1.0,
litellm: 1.0,
huggingface: 1.0,
sagemaker: 1.0,
};
/**
* Get the token multiplier for a given provider.
*/
export function getProviderMultiplier(provider) {
if (!provider) {
return 1.0;
}
return PROVIDER_MULTIPLIERS[provider] ?? 1.0;
}
/**
* Estimate token count for a string.
*
* @param text - Input text
* @param provider - Optional provider for multiplier adjustment
* @param isCode - Whether the text is code (uses CODE_CHARS_PER_TOKEN)
* @returns Estimated token count
*/
export function estimateTokens(text, provider, isCode) {
if (!text || text.length === 0) {
return 0;
}
const charsPerToken = isCode ? CODE_CHARS_PER_TOKEN : CHARS_PER_TOKEN;
const baseTokens = Math.ceil(text.length / charsPerToken);
const multiplier = getProviderMultiplier(provider);
// Apply provider multiplier and additive safety margin separately
// This prevents compounding (e.g. Anthropic: 1.23 * 1.15 = 1.41x was too aggressive)
const providerAdjusted = baseTokens * multiplier;
const safetyBuffer = baseTokens * TOKEN_SAFETY_MARGIN_ADDITIVE;
return Math.ceil(providerAdjusted + safetyBuffer);
}
/**
* Estimate token count for a single ChatMessage.
* Includes message framing overhead.
*/
export function estimateMessageTokens(message, provider) {
let contentStr = "";
if (message.content) {
if (typeof message.content === "string") {
contentStr = message.content;
}
else {
try {
contentStr = JSON.stringify(message.content);
}
catch {
// Fallback for circular references or non-serializable content
contentStr = String(message.content);
}
}
}
const contentTokens = estimateTokens(contentStr, provider);
return contentTokens + TOKENS_PER_MESSAGE;
}
/**
* Estimate total token count for an array of messages.
* Includes conversation-level overhead.
*/
export function estimateMessagesTokens(messages, provider) {
if (!messages || messages.length === 0) {
return 0;
}
const messageTokens = messages.reduce((sum, msg) => sum + estimateMessageTokens(msg, provider), 0);
return messageTokens + TOKENS_PER_CONVERSATION;
}
/**
* Truncate text to fit within a token budget.
* Tries to cut at sentence or word boundaries.
*
* @param text - Input text
* @param maxTokens - Maximum tokens allowed
* @param provider - Optional provider for multiplier
* @returns Truncated text with "..." suffix if truncated
*/
export function truncateToTokenBudget(text, maxTokens, provider) {
if (estimateTokens(text, provider) <= maxTokens) {
return { text, truncated: false };
}
const multiplier = getProviderMultiplier(provider);
// Use additive safety margin: effective multiplier = multiplier + additive margin
const effectiveMultiplier = multiplier + TOKEN_SAFETY_MARGIN_ADDITIVE;
const maxChars = Math.floor((maxTokens / effectiveMultiplier) * CHARS_PER_TOKEN);
if (maxChars <= 0) {
return { text: "", truncated: true };
}
// Try to cut at sentence boundary
const candidate = text.slice(0, maxChars);
const lastSentence = Math.max(candidate.lastIndexOf(". "), candidate.lastIndexOf("! "), candidate.lastIndexOf("? "), candidate.lastIndexOf("\n"));
const cutPoint = lastSentence > maxChars * 0.5 ? lastSentence + 1 : maxChars;
return { text: text.slice(0, cutPoint) + "...", truncated: true };
}
//# sourceMappingURL=tokenEstimation.js.map