lynkr
Version:
Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.
163 lines (151 loc) • 4.52 kB
JavaScript
/**
* Accurate token estimation using js-tiktoken.
*
* Replaces the chars/4 approximation across the routing path. Falls back to
* chars/4 if js-tiktoken is unavailable (graceful degradation — never throws).
*
* Phase 1.1 of the routing overhaul.
*
* @module routing/tokenizer
*/
const logger = require('../logger');
let _tiktoken = null;
let _tiktokenLoaded = false;
const _encoderCache = new Map();
function _loadTiktoken() {
if (_tiktokenLoaded) return _tiktoken;
_tiktokenLoaded = true;
try {
_tiktoken = require('js-tiktoken');
} catch (err) {
logger.debug(
{ err: err.message },
'[Tokenizer] js-tiktoken not available, falling back to chars/4'
);
_tiktoken = null;
}
return _tiktoken;
}
function _encodingForModel(model) {
if (!model || typeof model !== 'string') return 'cl100k_base';
const lower = model.toLowerCase();
// GPT-4o family + o-series use o200k_base
if (
lower.includes('gpt-4o') ||
lower.includes('gpt-4.1') ||
lower.includes('gpt-5') ||
lower.includes('o1') ||
lower.includes('o3') ||
lower.includes('o4')
) {
return 'o200k_base';
}
// GPT-4 / GPT-3.5 / Anthropic / most others approximate well with cl100k_base
return 'cl100k_base';
}
function _getEncoder(model) {
const tiktoken = _loadTiktoken();
if (!tiktoken) return null;
const encName = _encodingForModel(model);
let cached = _encoderCache.get(encName);
if (cached) return cached;
try {
cached = tiktoken.getEncoding(encName);
_encoderCache.set(encName, cached);
return cached;
} catch (err) {
logger.debug(
{ err: err.message, encoding: encName },
'[Tokenizer] Encoder load failed, using fallback'
);
return null;
}
}
/**
* Count tokens in a single string.
* @param {string} text
* @param {string|null} model - optional model name for encoding selection
* @returns {number}
*/
function countTokens(text, model = null) {
if (!text || typeof text !== 'string') return 0;
const encoder = _getEncoder(model);
if (!encoder) return Math.ceil(text.length / 4);
try {
return encoder.encode(text).length;
} catch (err) {
return Math.ceil(text.length / 4);
}
}
function _extractText(content) {
if (!content) return '';
if (typeof content === 'string') return content;
if (Array.isArray(content)) {
let combined = '';
for (const block of content) {
if (!block) continue;
if (typeof block === 'string') {
combined += block + ' ';
} else if (block.type === 'text' && block.text) {
combined += block.text + ' ';
} else if (typeof block.text === 'string') {
combined += block.text + ' ';
} else if (block.type === 'tool_use' && block.input) {
try {
combined += JSON.stringify(block.input) + ' ';
} catch {
// ignore non-serializable input
}
} else if (block.type === 'tool_result' && block.content) {
combined += _extractText(block.content) + ' ';
}
}
return combined;
}
return '';
}
function _imageTokenEstimate(content) {
if (!Array.isArray(content)) return 0;
let imageBase64Bytes = 0;
for (const block of content) {
if (block?.type === 'image' && block.source?.data) {
imageBase64Bytes += block.source.data.length;
}
}
// Rough heuristic mirroring previous behavior: ~1 token per 6 base64 chars
return Math.floor(imageBase64Bytes / 6);
}
/**
* Count tokens across a full Anthropic-format message array + optional system.
* @param {Array} messages
* @param {string|Array|null} system
* @param {string|null} model
* @returns {number}
*/
function countMessagesTokens(messages = [], system = null, model = null) {
let total = 0;
if (system) {
total += countTokens(_extractText(system), model);
}
if (Array.isArray(messages)) {
for (const msg of messages) {
total += countTokens(_extractText(msg?.content), model);
total += _imageTokenEstimate(msg?.content);
}
// Per-message structural overhead (~4 tokens per message in both Anthropic and OpenAI)
total += messages.length * 4;
}
return total;
}
/**
* Count tokens from a full payload object (Anthropic-style with .messages, .system, .model).
*/
function countPayloadTokens(payload, model = null) {
if (!payload) return 0;
return countMessagesTokens(payload.messages, payload.system, model || payload.model);
}
module.exports = {
countTokens,
countMessagesTokens,
countPayloadTokens,
};