lynkr
Version:
Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.
218 lines (189 loc) • 6.26 kB
JavaScript
const logger = require("../logger");
/**
* Estimate token count (rough approximation: 4 chars ≈ 1 token)
* For production, consider using @anthropic-ai/tokenizer for exact counts
*/
function estimateTokens(text) {
if (!text) return 0;
if (typeof text !== 'string') {
text = JSON.stringify(text);
}
return Math.ceil(text.length / 4);
}
/**
* Count tokens in a full API payload
*/
function countPayloadTokens(payload) {
const breakdown = {
system: 0,
tools: 0,
messages: 0,
total: 0
};
// System prompt
if (payload.system) {
if (Array.isArray(payload.system)) {
breakdown.system = payload.system.reduce((sum, block) =>
sum + estimateTokens(block.text || block), 0);
} else {
breakdown.system = estimateTokens(payload.system);
}
}
// Tools
if (payload.tools && Array.isArray(payload.tools)) {
breakdown.tools = estimateTokens(JSON.stringify(payload.tools));
}
// Messages
if (payload.messages && Array.isArray(payload.messages)) {
for (const msg of payload.messages) {
// Message content
if (typeof msg.content === 'string') {
breakdown.messages += estimateTokens(msg.content);
} else if (Array.isArray(msg.content)) {
breakdown.messages += msg.content.reduce((sum, block) => {
if (block.type === 'text') {
return sum + estimateTokens(block.text || '');
} else if (block.type === 'tool_result') {
return sum + estimateTokens(block.content || '');
} else if (block.type === 'image') {
// Images: rough estimate based on source length
return sum + estimateTokens(JSON.stringify(block.source || {}));
}
return sum + estimateTokens(JSON.stringify(block));
}, 0);
}
// Tool calls
if (msg.tool_calls) {
breakdown.messages += estimateTokens(JSON.stringify(msg.tool_calls));
}
}
}
breakdown.total = breakdown.system + breakdown.tools + breakdown.messages;
return breakdown;
}
/**
* Extract token usage from API response
*/
function extractUsageFromResponse(response) {
if (!response || !response.usage) {
return null;
}
return {
inputTokens: response.usage.input_tokens || 0,
outputTokens: response.usage.output_tokens || 0,
cacheCreationTokens: response.usage.cache_creation_input_tokens || 0,
cacheReadTokens: response.usage.cache_read_input_tokens || 0,
totalTokens: (response.usage.input_tokens || 0) + (response.usage.output_tokens || 0)
};
}
/**
* Calculate cost based on token usage
* Prices as of 2025 (update as needed)
*/
function calculateCost(usage, model = 'claude-sonnet-4-5') {
const PRICES = {
'claude-opus-4-5': { input: 15, output: 75, cache_write: 18.75, cache_read: 1.5 },
'claude-sonnet-4-5': { input: 3, output: 15, cache_write: 3.75, cache_read: 0.3 },
'claude-haiku-4': { input: 0.8, output: 4, cache_write: 1, cache_read: 0.08 },
'databricks-claude-sonnet-4-5': { input: 3, output: 15, cache_write: 3.75, cache_read: 0.3 },
'databricks-claude-haiku-4': { input: 0.8, output: 4, cache_write: 1, cache_read: 0.08 },
};
const price = PRICES[model] || PRICES['claude-sonnet-4-5'];
const inputCost = (usage.inputTokens / 1_000_000) * price.input;
const outputCost = (usage.outputTokens / 1_000_000) * price.output;
const cacheWriteCost = ((usage.cacheCreationTokens || 0) / 1_000_000) * price.cache_write;
const cacheReadCost = ((usage.cacheReadTokens || 0) / 1_000_000) * price.cache_read;
return {
input: inputCost,
output: outputCost,
cacheWrite: cacheWriteCost,
cacheRead: cacheReadCost,
total: inputCost + outputCost + cacheWriteCost + cacheReadCost
};
}
/**
* Log token usage with breakdown
*/
function logTokenUsage(context, estimated, actual) {
const efficiency = actual ? ((actual.totalTokens / estimated.total) * 100).toFixed(1) : 'N/A';
logger.info({
context,
estimated: {
system: estimated.system,
tools: estimated.tools,
messages: estimated.messages,
total: estimated.total
},
actual: actual || 'not available',
estimateAccuracy: efficiency + '%'
}, 'Token usage tracked');
}
/**
* Store token usage in session metadata
*/
function recordTokenUsage(session, turnId, estimated, actual, model) {
if (!session || !actual) return;
session.metadata = session.metadata || {};
session.metadata.tokenUsage = session.metadata.tokenUsage || [];
const cost = calculateCost(actual, model);
session.metadata.tokenUsage.push({
turn: turnId,
timestamp: Date.now(),
estimated,
actual,
cost,
model
});
// Track cumulative totals
session.metadata.totalTokens = (session.metadata.totalTokens || 0) + actual.totalTokens;
session.metadata.totalCost = (session.metadata.totalCost || 0) + cost.total;
}
/**
* Get token statistics for a session
*/
function getSessionTokenStats(session) {
if (!session || !session.metadata || !session.metadata.tokenUsage) {
return {
turns: 0,
totalTokens: 0,
totalCost: 0,
averageTokensPerTurn: 0,
breakdown: []
};
}
const usage = session.metadata.tokenUsage;
const totalTokens = session.metadata.totalTokens || 0;
const totalCost = session.metadata.totalCost || 0;
return {
turns: usage.length,
totalTokens,
totalCost,
averageTokensPerTurn: usage.length > 0 ? Math.round(totalTokens / usage.length) : 0,
cacheHitRate: calculateCacheHitRate(usage),
breakdown: usage
};
}
/**
* Calculate cache hit rate from usage history
*/
function calculateCacheHitRate(usageHistory) {
if (!usageHistory || usageHistory.length === 0) return 0;
const totalCacheableTokens = usageHistory.reduce((sum, turn) => {
return sum + (turn.actual.inputTokens || 0);
}, 0);
const cachedTokens = usageHistory.reduce((sum, turn) => {
return sum + (turn.actual.cacheReadTokens || 0);
}, 0);
return totalCacheableTokens > 0
? ((cachedTokens / totalCacheableTokens) * 100).toFixed(1)
: 0;
}
module.exports = {
estimateTokens,
countPayloadTokens,
extractUsageFromResponse,
calculateCost,
logTokenUsage,
recordTokenUsage,
getSessionTokenStats
};