@tehreet/conduit
Version:
LLM API gateway with intelligent routing, robust process management, and health monitoring
389 lines • 12.8 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.tokenCounter = exports.TokenCounter = void 0;
const tiktoken_1 = require("tiktoken");
const log_1 = require("./log");
class TokenCounter {
constructor() {
this.encoders = new Map();
this.cache = new Map();
this.batchQueue = [];
this.batchTimer = null;
this.batchResolvers = new Map();
this.options = {
useCache: true,
cacheMaxAge: 5 * 60 * 1000, // 5 minutes
enableBatching: true,
batchSize: 10,
batchDelay: 100 // 100ms
};
}
/**
* Count tokens for a given text and model
*/
async countTokens(text, model = 'claude-3-5-sonnet-20241022', options = {}) {
const opts = { ...this.options, ...options };
// Check cache first
if (opts.useCache) {
const cached = this.getCachedResult(text, model);
if (cached) {
return { ...cached, method: 'cached' };
}
}
try {
// Map Claude models to tiktoken models for approximation
const tiktokenModel = this.mapToTiktokenModel(model);
if (!this.encoders.has(tiktokenModel)) {
const encoder = (0, tiktoken_1.encoding_for_model)(tiktokenModel);
this.encoders.set(tiktokenModel, encoder);
}
const encoder = this.encoders.get(tiktokenModel);
const tokens = encoder.encode(text);
const result = {
count: tokens.length,
method: 'tiktoken',
};
// Cache the result
if (opts.useCache) {
this.setCachedResult(text, model, result);
}
return result;
}
catch (error) {
(0, log_1.log)('Token counting error, falling back to estimation:', error);
// Fallback to simple estimation
const result = {
count: this.estimateTokens(text),
method: 'estimate',
};
// Cache the estimation result too
if (opts.useCache) {
this.setCachedResult(text, model, result);
}
return result;
}
}
/**
* Count tokens for a message array
*/
async countMessagesTokens(messages, model = 'claude-3-5-sonnet-20241022') {
// Combine all message content
const fullText = messages
.map(msg => `${msg.role}: ${msg.content}`)
.join('\n\n');
// Add some overhead for message structure
const result = await this.countTokens(fullText, model);
result.count = Math.ceil(result.count * 1.1); // 10% overhead for structure
return result;
}
/**
* Simple token estimation (roughly 4 chars per token)
*/
estimateTokens(text) {
// More sophisticated estimation based on Claude's patterns
const words = text.split(/\s+/).length;
const chars = text.length;
// Average between word count * 1.3 and char count / 4
const wordEstimate = words * 1.3;
const charEstimate = chars / 4;
return Math.ceil((wordEstimate + charEstimate) / 2);
}
/**
* Map Claude model names to tiktoken models for approximation
*/
mapToTiktokenModel(claudeModel) {
// Claude uses a similar tokenizer to GPT-4
if (claudeModel.includes('claude-3')) {
return 'gpt-4';
}
// Default to GPT-4 for best approximation
return 'gpt-4';
}
/**
* Count tokens from streaming content
*/
createStreamCounter(model = 'claude-3-5-sonnet-20241022') {
let totalTokens = 0;
let buffer = '';
const self = this;
return {
addChunk(chunk) {
buffer += chunk;
// Count complete sentences/paragraphs to avoid recounting
const completeBlocks = buffer.split(/\n\n/);
if (completeBlocks.length > 1) {
// Keep the last incomplete block in buffer
buffer = completeBlocks.pop() || '';
// Count tokens in complete blocks
for (const block of completeBlocks) {
const tokens = self.estimateTokens(block);
totalTokens += tokens;
}
}
},
getTotal() {
// Add remaining buffer
if (buffer) {
totalTokens += self.estimateTokens(buffer);
buffer = '';
}
return totalTokens;
},
};
}
/**
* Batch count tokens for multiple texts
*/
async batchCountTokens(requests, options = {}) {
const opts = { ...this.options, ...options };
if (!opts.enableBatching) {
// Process individually if batching is disabled
const results = [];
for (const request of requests) {
const result = await this.countTokens(request.text, request.model, opts);
results.push({
id: request.id,
count: result.count,
method: result.method
});
}
return results;
}
// Process in batches
const results = [];
const batchSize = opts.batchSize || 10;
for (let i = 0; i < requests.length; i += batchSize) {
const batch = requests.slice(i, i + batchSize);
const batchResults = await Promise.all(batch.map(async (request) => {
const result = await this.countTokens(request.text, request.model, opts);
return {
id: request.id,
count: result.count,
method: result.method
};
}));
results.push(...batchResults);
}
return results;
}
/**
* Count tokens with batching support (queued)
*/
async countTokensBatched(text, model = 'claude-3-5-sonnet-20241022', options = {}) {
const opts = { ...this.options, ...options };
if (!opts.enableBatching) {
return this.countTokens(text, model, opts);
}
return new Promise((resolve) => {
const id = `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
this.batchResolvers.set(id, (result) => {
resolve({
count: result.count,
method: result.method
});
});
this.batchQueue.push({ id, text, model });
this.scheduleBatchProcessing();
});
}
/**
* Get model-specific token counting configuration
*/
getModelConfig(model) {
// Different models have different token characteristics
if (model.includes('claude-3-5-sonnet')) {
return { multiplier: 1.0, overhead: 0 };
}
if (model.includes('claude-3-5-haiku')) {
return { multiplier: 1.0, overhead: 0 };
}
if (model.includes('claude-3-opus')) {
return { multiplier: 1.0, overhead: 0 };
}
if (model.includes('gpt-4')) {
return { multiplier: 1.0, overhead: 0 };
}
if (model.includes('gpt-3.5')) {
return { multiplier: 0.9, overhead: 10 };
}
// Default configuration
return { multiplier: 1.0, overhead: 0 };
}
/**
* Update token counter options
*/
updateOptions(options) {
this.options = { ...this.options, ...options };
}
/**
* Get current options
*/
getOptions() {
return { ...this.options };
}
/**
* Clear token count cache
*/
clearCache() {
this.cache.clear();
}
/**
* Get cache statistics
*/
getCacheStats() {
if (this.cache.size === 0) {
return {
size: 0,
hitRate: 0,
oldestEntry: null,
newestEntry: null
};
}
const entries = Array.from(this.cache.values());
const timestamps = entries.map(e => e.timestamp);
return {
size: this.cache.size,
hitRate: 0, // Would need to track hits/misses for this
oldestEntry: new Date(Math.min(...timestamps)),
newestEntry: new Date(Math.max(...timestamps))
};
}
/**
* Schedule batch processing
*/
scheduleBatchProcessing() {
if (this.batchTimer) {
return;
}
this.batchTimer = setTimeout(() => {
this.processBatch();
}, this.options.batchDelay || 100);
}
/**
* Process the current batch queue
*/
async processBatch() {
if (this.batchQueue.length === 0) {
this.batchTimer = null;
return;
}
const batch = this.batchQueue.splice(0, this.options.batchSize || 10);
this.batchTimer = null;
try {
const results = await this.batchCountTokens(batch);
for (const result of results) {
const resolver = this.batchResolvers.get(result.id);
if (resolver) {
resolver(result);
this.batchResolvers.delete(result.id);
}
}
}
catch (error) {
(0, log_1.log)('Batch processing error:', error);
// Resolve with error fallback
for (const request of batch) {
const resolver = this.batchResolvers.get(request.id);
if (resolver) {
resolver({
id: request.id,
count: this.estimateTokens(request.text),
method: 'estimate'
});
this.batchResolvers.delete(request.id);
}
}
}
// Schedule next batch if queue is not empty
if (this.batchQueue.length > 0) {
this.scheduleBatchProcessing();
}
}
/**
* Get cached result
*/
getCachedResult(text, model) {
const cacheKey = this.getCacheKey(text, model);
const cached = this.cache.get(cacheKey);
if (!cached) {
return null;
}
// Check if cache entry is still valid
const now = Date.now();
const age = now - cached.timestamp;
if (age > (this.options.cacheMaxAge || 5 * 60 * 1000)) {
this.cache.delete(cacheKey);
return null;
}
return cached.result;
}
/**
* Set cached result
*/
setCachedResult(text, model, result) {
const cacheKey = this.getCacheKey(text, model);
this.cache.set(cacheKey, {
text,
model,
result,
timestamp: Date.now()
});
// Clean up old entries periodically
if (this.cache.size > 1000) {
this.cleanupCache();
}
}
/**
* Generate cache key
*/
getCacheKey(text, model) {
// Use a simple hash of text + model for cache key
return `${model}:${text.length}:${this.simpleHash(text)}`;
}
/**
* Simple hash function for cache keys
*/
simpleHash(str) {
let hash = 0;
for (let i = 0; i < str.length; i++) {
const char = str.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash; // Convert to 32bit integer
}
return hash.toString(36);
}
/**
* Clean up old cache entries
*/
cleanupCache() {
const now = Date.now();
const maxAge = this.options.cacheMaxAge || 5 * 60 * 1000;
for (const [key, entry] of this.cache.entries()) {
if (now - entry.timestamp > maxAge) {
this.cache.delete(key);
}
}
}
/**
* Clean up encoders to free memory
*/
cleanup() {
for (const encoder of this.encoders.values()) {
if (encoder.free) {
encoder.free();
}
}
this.encoders.clear();
this.cache.clear();
if (this.batchTimer) {
clearTimeout(this.batchTimer);
this.batchTimer = null;
}
this.batchQueue.length = 0;
this.batchResolvers.clear();
}
}
exports.TokenCounter = TokenCounter;
// Singleton instance
exports.tokenCounter = new TokenCounter();
//# sourceMappingURL=token-counter.js.map