UNPKG

@aid-on/llm-throttle

Version:

高精度なLLMレート制限ライブラリ - Precise dual rate limiting for LLM APIs (RPM + TPM)

1,482 lines (1,470 loc) 47.2 kB
"use strict"; "use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __esm = (fn, res) => function __init() { return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res; }; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // node_modules/@aid-on/fuzztok/dist/index.mjs var dist_exports = {}; __export(dist_exports, { CharacterClassifier: () => CharacterClassifier, FuzzyTokenEstimator: () => FuzzyTokenEstimator, SimpleModelConfigProvider: () => SimpleModelConfigProvider, TokenCostCalculator: () => TokenCostCalculator, TokenEstimationVisualizer: () => TokenEstimationVisualizer, createFuzzyEstimator: () => createFuzzyEstimator, createSimpleFuzzyEstimator: () => createSimpleFuzzyEstimator }); function createFuzzyEstimator(modelProvider, options) { return new FuzzyTokenEstimator(modelProvider, options); } function createSimpleFuzzyEstimator(modelConfigs, defaultModel) { const provider = new SimpleModelConfigProvider(modelConfigs, defaultModel); return new FuzzyTokenEstimator(provider, { defaultModel }); } var CharacterClassifier, SimpleModelConfigProvider, DEFAULT_FALLBACK_CONFIG, FuzzyTokenEstimator, TokenCostCalculator, TokenEstimationVisualizer; var init_dist = __esm({ "node_modules/@aid-on/fuzztok/dist/index.mjs"() { "use strict"; CharacterClassifier = class { /** * CJK(中国語・日本語・韓国語)文字およびマルチバイト文字の判定 */ static isCJKCharacter(char) { const code = char.charCodeAt(0); return ( // CJK統合漢字拡張A-G、互換漢字など code >= 11904 && code <= 12031 || // CJK部首補助 code >= 12288 && code <= 12351 || // CJK記号と句読点 code >= 12352 && code <= 12447 || // ひらがな code >= 12448 && code <= 12543 || // カタカナ code >= 12544 && code <= 12591 || // 注音符号 code >= 12592 && code <= 12687 || // ハングル互換字母 code >= 12688 && code <= 12703 || // 漢文用記号 code >= 12704 && code <= 12735 || // 注音字母拡張 code >= 12736 && code <= 12783 || // CJKストローク code >= 12784 && code <= 12799 || // カタカナ拡張 code >= 12800 && code <= 13055 || // 囲みCJK文字・月 code >= 13056 && code <= 13311 || // CJK互換 code >= 13312 && code <= 19903 || // CJK統合漢字拡張A code >= 19968 && code <= 40959 || // CJK統合漢字 code >= 40960 && code <= 42127 || // イ文字 code >= 42128 && code <= 42191 || // イ文字部首 code >= 44032 && code <= 55215 || // ハングル音節文字 code >= 63744 && code <= 64255 || // CJK互換漢字 code >= 65072 && code <= 65103 || // CJK互換形 code >= 65280 && code <= 65519 || // 半角・全角形 code >= 131072 && code <= 173791 || // CJK統合漢字拡張B code >= 173824 && code <= 177983 || // CJK統合漢字拡張C code >= 177984 && code <= 178207 || // CJK統合漢字拡張D code >= 178208 && code <= 183983 || // CJK統合漢字拡張E code >= 183984 && code <= 191471 || // CJK統合漢字拡張F code >= 196608 && code <= 201551 ); } /** * より詳細な文字種別の判定 */ static getCharacterType(char) { if (this.isCJKCharacter(char)) return "cjk"; if (/[a-zA-Z\u00c0-\u024f\u1e00-\u1eff]/.test(char)) return "latin"; if (/[0-9\u0660-\u0669\u06f0-\u06f9]/.test(char)) return "digit"; if (/\s/.test(char)) return "whitespace"; return "symbol"; } /** * テキスト全体の言語構成を分析 */ static analyzeTextComposition(text) { const composition = { cjk: 0, latin: 0, digits: 0, symbols: 0, whitespace: 0, total: text.length }; for (const char of text) { const type = this.getCharacterType(char); if (type === "digit") { composition.digits++; } else if (type === "symbol") { composition.symbols++; } else { composition[type]++; } } return { ...composition, cjkRatio: composition.total > 0 ? composition.cjk / composition.total : 0 }; } }; SimpleModelConfigProvider = class { constructor(configs, defaultModel) { this.configs = new Map(Object.entries(configs)); this.defaultModel = defaultModel; } getConfig(modelName) { return this.configs.get(modelName); } getSupportedModels() { return Array.from(this.configs.keys()); } getDefaultModel() { return this.defaultModel; } }; DEFAULT_FALLBACK_CONFIG = { charsPerToken: 4, overhead: 10, cjkTokensPerChar: 1.5, mixedTextMultiplier: 1.05, numberTokensPerChar: 3.5, symbolTokensPerChar: 2.5, whitespaceHandling: "compress" }; FuzzyTokenEstimator = class { constructor(modelProvider, options) { this.modelProvider = modelProvider; this.fallbackConfig = options?.fallbackConfig || DEFAULT_FALLBACK_CONFIG; this.defaultModel = options?.defaultModel || modelProvider.getDefaultModel?.(); } /** * 現在のモデルプロバイダーを取得 */ getModelProvider() { return this.modelProvider; } /** * モデルプロバイダーを変更 */ setModelProvider(provider) { this.modelProvider = provider; } /** * 指定されたモデルの設定を取得(フォールバック付き) */ getModelConfig(modelName) { const model = modelName || this.defaultModel || "unknown"; const config = this.modelProvider.getConfig(model) || this.fallbackConfig; return { config, model }; } /** * 詳細な推定結果を返すメインメソッド */ estimateDetailed(text, modelName) { const { config, model } = this.getModelConfig(modelName); if (!text) { return { tokens: config.overhead, breakdown: { cjk: 0, latin: 0, digits: 0, symbols: 0, overhead: config.overhead }, textAnalysis: { totalChars: 0, cjkRatio: 0, adjustmentFactor: 1 }, confidence: "high", modelUsed: model }; } const composition = CharacterClassifier.analyzeTextComposition(text); const breakdown = { cjk: 0, latin: 0, digits: 0, symbols: 0, overhead: config.overhead }; let currentType = null; let currentGroup = ""; const processGroup = () => { if (!currentGroup || !currentType) return; switch (currentType) { case "cjk": breakdown.cjk += currentGroup.length * config.cjkTokensPerChar; break; case "latin": breakdown.latin += Math.ceil(currentGroup.length / config.charsPerToken); break; case "digit": breakdown.digits += Math.ceil(currentGroup.length / (config.numberTokensPerChar || 3.5)); break; case "symbol": breakdown.symbols += Math.ceil(currentGroup.length / (config.symbolTokensPerChar || 2.5)); break; case "whitespace": if (config.whitespaceHandling === "count") { breakdown.symbols += currentGroup.length * 0.3; } break; } currentGroup = ""; }; for (const char of text) { const type = CharacterClassifier.getCharacterType(char); if (type !== currentType) { processGroup(); currentType = type; } currentGroup += char; } processGroup(); let baseTokens = Object.values(breakdown).reduce((sum, val) => sum + val, 0); baseTokens *= config.mixedTextMultiplier; const adjustmentFactor = this.calculateAdjustmentFactor(composition.cjkRatio); const finalTokens = Math.ceil(baseTokens * adjustmentFactor); const confidence = this.calculateConfidence(composition); return { tokens: finalTokens, breakdown, textAnalysis: { totalChars: text.length, cjkRatio: composition.cjkRatio, adjustmentFactor }, confidence, modelUsed: model }; } /** * シンプルなトークン数のみを返すメソッド */ estimate(text, modelName) { return this.estimateDetailed(text, modelName).tokens; } /** * TextPayload形式での推定 */ estimatePayload(payload) { const promptTokens = typeof payload.prompt === "string" ? this.estimate(payload.prompt, payload.model) : this.getModelConfig(payload.model).config.overhead; const maxTokens = payload.maxTokens && payload.maxTokens > 0 ? payload.maxTokens : 500; return Math.ceil((promptTokens + maxTokens) * 1.1); } /** * 日本語比率に基づく調整係数を計算 */ calculateAdjustmentFactor(japaneseRatio) { if (japaneseRatio > 0.8) { return 0.6; } else if (japaneseRatio === 0) { return 1; } else if (japaneseRatio < 0.2) { return 0.7; } else { return 0.7 - (japaneseRatio - 0.2) / 0.6 * 0.1; } } /** * 推定の信頼度を計算 */ calculateConfidence(composition) { if (composition.total < 10) return "low"; if (composition.cjkRatio > 0.9 || composition.cjkRatio < 0.1) return "high"; if (composition.symbols / composition.total > 0.3) return "low"; return "medium"; } /** * バッチ推定 */ estimateBatch(texts, modelName) { return texts.map((text) => this.estimateDetailed(text, modelName)); } /** * ストリーミングテキストの推定 */ async *estimateStream(textStream, modelName) { let total = 0; for await (const chunk of textStream) { const tokens = this.estimate(chunk, modelName); total += tokens; yield { chunk, tokens, total }; } } /** * 利用可能なモデルの一覧を取得 */ getSupportedModels() { return this.modelProvider.getSupportedModels(); } }; TokenCostCalculator = class { constructor(costProvider) { this.costProvider = costProvider; } calculate(model, inputTokens, outputTokens) { const pricing = this.costProvider.getCost(model); if (!pricing) { return { inputCost: 0, outputCost: 0, totalCost: 0, formattedTotal: "N/A", available: false }; } const inputCost = inputTokens / 1e3 * pricing.input; const outputCost = outputTokens / 1e3 * pricing.output; const totalCost = inputCost + outputCost; return { inputCost, outputCost, totalCost, formattedTotal: `$${totalCost.toFixed(4)}`, available: true }; } }; TokenEstimationVisualizer = class { static visualize(text, result) { const bar = (value, max, width = 20) => { const filled = Math.round(value / max * width); return "\u2588".repeat(filled) + "\u2591".repeat(width - filled); }; const maxTokens = Math.max(...Object.values(result.breakdown)); return ` === Token Estimation Visualization === Model: ${result.modelUsed} Text: "${text.slice(0, 50)}${text.length > 50 ? "..." : ""}" Total Tokens: ${result.tokens} Confidence: ${result.confidence} Breakdown: CJK [${bar(result.breakdown.cjk, maxTokens)}] ${result.breakdown.cjk.toFixed(1)} Latin [${bar(result.breakdown.latin, maxTokens)}] ${result.breakdown.latin.toFixed(1)} Digits [${bar(result.breakdown.digits, maxTokens)}] ${result.breakdown.digits.toFixed(1)} Symbols [${bar(result.breakdown.symbols, maxTokens)}] ${result.breakdown.symbols.toFixed(1)} Overhead [${bar(result.breakdown.overhead, maxTokens)}] ${result.breakdown.overhead} Text Analysis: - Total Characters: ${result.textAnalysis.totalChars} - CJK Ratio: ${(result.textAnalysis.cjkRatio * 100).toFixed(1)}% - Adjustment Factor: ${result.textAnalysis.adjustmentFactor.toFixed(2)} `; } }; } }); // src/index.ts var index_exports = {}; __export(index_exports, { AsyncLock: () => AsyncLock, InMemoryStorage: () => InMemoryStorage, InvalidConfigError: () => InvalidConfigError, LLMThrottle: () => LLMThrottle, RateLimitError: () => RateLimitError, TokenBucket: () => TokenBucket, createLLMThrottle: () => createLLMThrottle, createLLMThrottleWithStorage: () => createLLMThrottleWithStorage, createMonotonicClock: () => createMonotonicClock, createOptimalClock: () => createOptimalClock, createStandardClock: () => createStandardClock, defaultValidationRules: () => defaultValidationRules, estimateTokens: () => estimateTokens, getClockInfo: () => getClockInfo, isFuzztokAvailable: () => isFuzztokAvailable, robustEstimateTokens: () => robustEstimateTokens, validateAndNormalizeConfig: () => validateAndNormalizeConfig, validateConfig: () => validateConfig }); module.exports = __toCommonJS(index_exports); // src/errors.ts var RateLimitError = class extends Error { constructor(message, reason, availableIn) { super(message); this.reason = reason; this.availableIn = availableIn; this.name = "RateLimitError"; } }; var InvalidConfigError = class extends Error { constructor(message) { super(message); this.name = "InvalidConfigError"; } }; // src/token-bucket.ts var TokenBucket = class { constructor(config, storage) { this._initialized = false; this.validateConfig(config); this._capacity = config.capacity; this._available = config.initialTokens ?? config.capacity; this._refillRate = config.refillRate; this._clock = config.clock ?? (() => Date.now()); this._lastRefill = this._clock(); this._storage = storage; this._storageKey = config.storageKey; } validateConfig(config) { if (config.capacity <= 0) { throw new InvalidConfigError("Capacity must be greater than 0"); } if (config.refillRate <= 0) { throw new InvalidConfigError("Refill rate must be greater than 0"); } if (config.initialTokens !== void 0 && config.initialTokens < 0) { throw new InvalidConfigError("Initial tokens cannot be negative"); } if (config.initialTokens !== void 0 && config.initialTokens > config.capacity) { throw new InvalidConfigError("Initial tokens cannot exceed capacity"); } } get capacity() { return this._capacity; } get available() { this.refill(); return this._available; } get refillRate() { return this._refillRate; } refill() { const now = this._clock(); const timePassed = (now - this._lastRefill) / 1e3; if (timePassed <= 0) return; const tokensToAdd = timePassed * this._refillRate; this._available = Math.min( this._capacity, this._available + tokensToAdd ); this._lastRefill = now; } hasTokens(count) { if (count < 0) return false; this.refill(); return this._available >= count; } consume(count) { if (count < 0) { throw new Error("Cannot consume negative tokens"); } this.refill(); if (this._available >= count) { this._available -= count; this.persistState(); return true; } return false; } refund(count) { if (count < 0) { throw new Error("Cannot refund negative tokens"); } this._available = Math.min(this._capacity, this._available + count); this.persistState(); } timeUntilNextToken() { this.refill(); if (this._available >= 1) return 0; return Math.ceil((1 - this._available) / this._refillRate * 1e3); } timeUntilTokens(count) { if (count <= 0) return 0; this.refill(); if (this._available >= count) return 0; const needed = count - this._available; return Math.ceil(needed / this._refillRate * 1e3); } reset() { this._available = this._capacity; this._lastRefill = this._clock(); this.persistState(); } /** * Get current internal state for snapshots */ getState() { this.refill(); return { available: this._available, capacity: this._capacity, lastRefill: this._lastRefill }; } /** * Restore state from snapshot */ restoreState(state) { if (state.available < 0 || state.available > state.capacity) { throw new Error("Invalid state: available tokens out of range"); } if (state.capacity !== this._capacity) { throw new Error("Invalid state: capacity mismatch"); } this._available = state.available; this._lastRefill = state.lastRefill; } /** * Validate internal consistency */ validateConsistency() { this.refill(); return this._available >= 0 && this._available <= this._capacity && this._capacity > 0 && this._refillRate > 0; } /** * Initialize from storage if available */ async initializeFromStorage() { if (!this._storage || !this._storageKey || this._initialized) { return; } try { const storedState = await this._storage.loadTokenBucketState(this._storageKey); if (storedState && storedState.capacity === this._capacity) { this._available = storedState.available; this._lastRefill = storedState.lastRefill; } } catch (error) { } finally { this._initialized = true; } } /** * Persist current state to storage */ persistState() { if (!this._storage || !this._storageKey) { return; } const state = this.getState(); this._storage.saveTokenBucketState(this._storageKey, state).catch(() => { }); } }; // src/utils/async-lock.ts var AsyncLock = class { constructor() { this.locked = false; this.queue = []; } /** * Acquire the lock */ async acquire() { return new Promise((resolve, reject) => { if (!this.locked) { this.locked = true; resolve(); } else { this.queue.push({ resolve, reject }); } }); } /** * Release the lock */ release() { if (!this.locked) { throw new Error("Cannot release a lock that is not acquired"); } if (this.queue.length > 0) { const next = this.queue.shift(); next.resolve(); } else { this.locked = false; } } /** * Execute a function with the lock acquired */ async withLock(fn) { await this.acquire(); try { return await fn(); } finally { this.release(); } } /** * Check if the lock is currently held */ isLocked() { return this.locked; } /** * Get the number of pending operations waiting for the lock */ getQueueLength() { return this.queue.length; } /** * Clear all pending operations (useful for cleanup) */ clear() { const error = new Error("AsyncLock cleared"); this.queue.forEach((item) => item.reject(error)); this.queue = []; this.locked = false; } }; // src/utils/clock.ts var hasNodeHrtime; var hasPerformanceNow; try { hasNodeHrtime = typeof process !== "undefined" && typeof process.hrtime !== "undefined" && typeof process.hrtime.bigint === "function"; } catch { hasNodeHrtime = false; } try { hasPerformanceNow = typeof performance !== "undefined" && typeof performance.now === "function"; } catch { hasPerformanceNow = false; } function createMonotonicClock() { if (hasNodeHrtime) { const startTime = process.hrtime.bigint(); return () => { const current = process.hrtime.bigint(); return Number(current - startTime) / 1e6; }; } else if (hasPerformanceNow) { const startTime = performance.now(); return () => performance.now() - startTime; } else { const startTime = Date.now(); return () => Date.now() - startTime; } } function createStandardClock() { return () => Date.now(); } function createOptimalClock(preferMonotonic = true) { if (preferMonotonic && (hasNodeHrtime || hasPerformanceNow)) { return createMonotonicClock(); } return createStandardClock(); } function getClockInfo() { return { hasNodeHrtime, hasPerformanceNow, recommendedClock: hasNodeHrtime || hasPerformanceNow ? "monotonic" : "standard" }; } // src/utils/validation.ts var defaultValidationRules = [ { name: "rpm_positive", validate: (config) => config.rpm > 0 || "RPM must be greater than 0", level: "error" }, { name: "tpm_positive", validate: (config) => config.tpm > 0 || "TPM must be greater than 0", level: "error" }, { name: "burst_rpm_valid", validate: (config) => { if (config.burstRPM !== void 0 && config.burstRPM < config.rpm) { return "Burst RPM cannot be less than RPM"; } return true; }, level: "error" }, { name: "burst_tpm_valid", validate: (config) => { if (config.burstTPM !== void 0 && config.burstTPM < config.tpm) { return "Burst TPM cannot be less than TPM"; } return true; }, level: "error" }, { name: "burst_rpm_limit", validate: (config) => { if (config.burstRPM !== void 0 && config.burstRPM > config.rpm * 10) { return "Burst RPM should not exceed 10x the base RPM for optimal performance"; } return true; }, level: "warn" }, { name: "burst_tpm_limit", validate: (config) => { if (config.burstTPM !== void 0 && config.burstTPM > config.tpm * 10) { return "Burst TPM should not exceed 10x the base TPM for optimal performance"; } return true; }, level: "warn" }, { name: "rpm_high_warning", validate: (config) => { if (config.rpm > 1e4) { return "RPM above 10,000 may impact performance and API stability"; } return true; }, level: "warn" }, { name: "tpm_high_warning", validate: (config) => { if (config.tpm > 1e6) { return "TPM above 1,000,000 may impact performance and memory usage"; } return true; }, level: "warn" }, { name: "history_retention_valid", validate: (config) => { if (config.historyRetentionMs !== void 0 && config.historyRetentionMs <= 0) { return "History retention must be positive"; } return true; }, level: "error" }, { name: "max_history_valid", validate: (config) => { if (config.maxHistoryRecords !== void 0 && config.maxHistoryRecords <= 0) { return "Max history records must be positive"; } return true; }, level: "error" }, { name: "efficiency_window_valid", validate: (config) => { if (config.efficiencyWindowSize !== void 0 && config.efficiencyWindowSize <= 0) { return "Efficiency window size must be positive"; } return true; }, level: "error" } ]; function validateConfig(config, customRules = [], logger) { const result = { valid: true, errors: [], warnings: [] }; const allRules = [...defaultValidationRules, ...customRules]; for (const rule of allRules) { try { const validationResult = rule.validate(config); if (validationResult !== true) { const message = typeof validationResult === "string" ? validationResult : `Validation failed for rule: ${rule.name}`; if (rule.level === "error") { result.errors.push(message); result.valid = false; } else { result.warnings.push(message); } } } catch (error) { const message = `Validation rule '${rule.name}' threw an error: ${error instanceof Error ? error.message : String(error)}`; result.errors.push(message); result.valid = false; } } if (logger && result.warnings.length > 0) { result.warnings.forEach((warning) => logger.warn(`Config validation warning: ${warning}`)); } return result; } function validateAndNormalizeConfig(config, customRules = [], logger) { if (!config || typeof config !== "object") { throw new Error("Config must be an object"); } const result = validateConfig(config, customRules, logger); if (!result.valid) { throw new Error(`Configuration validation failed: ${result.errors.join(", ")}`); } return { ...config, adjustmentFailureStrategy: config.adjustmentFailureStrategy || "warn", maxHistoryRecords: config.maxHistoryRecords || 1e4, historyRetentionMs: config.historyRetentionMs || 6e4, efficiencyWindowSize: config.efficiencyWindowSize || 50, logger: config.logger || console }; } // src/storage/in-memory.ts var InMemoryStorage = class { constructor() { this.tokenBucketStates = /* @__PURE__ */ new Map(); this.consumptionHistory = []; this.compensationDebt = 0; } async saveTokenBucketState(key, state) { this.tokenBucketStates.set(key, { ...state }); } async loadTokenBucketState(key) { const state = this.tokenBucketStates.get(key); return state ? { ...state } : null; } async saveConsumptionHistory(records) { this.consumptionHistory = [...records]; } async loadConsumptionHistory(limit) { if (limit && limit > 0) { return this.consumptionHistory.slice(-limit); } return [...this.consumptionHistory]; } async addConsumptionRecord(record) { this.consumptionHistory.push({ ...record }); } async cleanupConsumptionHistory(olderThan) { const originalLength = this.consumptionHistory.length; this.consumptionHistory = this.consumptionHistory.filter( (record) => record.timestamp > olderThan ); return originalLength - this.consumptionHistory.length; } async saveCompensationDebt(debt) { this.compensationDebt = debt; } async loadCompensationDebt() { return this.compensationDebt; } async clear() { this.tokenBucketStates.clear(); this.consumptionHistory = []; this.compensationDebt = 0; } async isAvailable() { return true; } }; // src/utils/fuzztok-integration.ts var fuzztokModule = null; var fuzztokAvailable = false; async function loadFuzztok() { if (fuzztokModule !== null) { return fuzztokAvailable; } try { fuzztokModule = await Promise.resolve().then(() => (init_dist(), dist_exports)); fuzztokAvailable = true; return true; } catch (error) { fuzztokAvailable = false; return false; } } async function estimateTokens(text) { const loaded = await loadFuzztok(); if (!loaded || !fuzztokModule) { return Math.ceil(text.length / 4); } try { return fuzztokModule.countTokens(text); } catch (error) { return Math.ceil(text.length / 4); } } async function isFuzztokAvailable() { return await loadFuzztok(); } function simpleFallbackEstimate(text) { if (!text) return 0; const words = text.split(/\s+/).length; const chars = text.length; const wordBasedEstimate = Math.ceil(words * 1.3); const charBasedEstimate = Math.ceil(chars / 4); return Math.max(1, Math.round((wordBasedEstimate + charBasedEstimate) / 2)); } async function robustEstimateTokens(text) { if (!text) return 0; try { return await estimateTokens(text); } catch (error) { return simpleFallbackEstimate(text); } } // src/index.ts var LLMThrottle = class { /** * Create a new LLMThrottle instance * @param config Configuration including optional storage implementation */ constructor(config) { this.consumptionHistory = []; // private _config: LLMThrottleConfig; // Kept for future use this.lock = new AsyncLock(); this.compensationDebt = 0; this.initialized = false; const legacyConfig = { ...config, storage: config.storage ? { enabled: true, implementation: config.storage } : void 0 }; const tempLogger = config.logger || console; const validatedConfig = validateAndNormalizeConfig(legacyConfig, config.validationRules, tempLogger); this.logger = validatedConfig.logger || console; this.historyRetentionMs = validatedConfig.historyRetentionMs || 6e4; this.maxHistoryRecords = validatedConfig.maxHistoryRecords || 1e4; this.efficiencyWindowSize = validatedConfig.efficiencyWindowSize || 50; this.adjustmentFailureStrategy = validatedConfig.adjustmentFailureStrategy || "warn"; this.storageEnabled = !!config.storage; this.storage = config.storage || new InMemoryStorage(); if (validatedConfig.clock) { this.clock = validatedConfig.clock; } else { this.clock = createOptimalClock(validatedConfig.monotonicClock !== false); } this.rpmBucket = new TokenBucket({ capacity: validatedConfig.burstRPM || validatedConfig.rpm, refillRate: validatedConfig.rpm / 60, // per second initialTokens: validatedConfig.burstRPM || validatedConfig.rpm, clock: this.clock, storageKey: "rpm" }, this.storageEnabled ? this.storage : void 0); this.tpmBucket = new TokenBucket({ capacity: validatedConfig.burstTPM || validatedConfig.tpm, refillRate: validatedConfig.tpm / 60, initialTokens: validatedConfig.burstTPM || validatedConfig.tpm, clock: this.clock, storageKey: "tpm" }, this.storageEnabled ? this.storage : void 0); } /** * Initialize from storage if available * Call this after creating the instance to restore persisted state */ async initialize() { if (this.initialized || !this.storageEnabled) { return; } try { await Promise.all([ this.rpmBucket.initializeFromStorage(), this.tpmBucket.initializeFromStorage() ]); const storedDebt = await this.storage.loadCompensationDebt(); if (storedDebt >= 0) { this.compensationDebt = storedDebt; } const history = await this.storage.loadConsumptionHistory(this.maxHistoryRecords); if (history.length > 0) { this.consumptionHistory = history; this.cleanupHistory(); } this.logger.info("Throttle state initialized from storage"); } catch (error) { this.logger.warn(`Failed to initialize from storage: ${error instanceof Error ? error.message : String(error)}`); } finally { this.initialized = true; } } canProcess(estimatedTokens) { if (estimatedTokens < 0) { throw new Error("Estimated tokens cannot be negative"); } if (!this.rpmBucket.hasTokens(1)) { return { allowed: false, reason: "rpm_limit", availableIn: this.rpmBucket.timeUntilNextToken(), availableTokens: { rpm: this.rpmBucket.available, tpm: this.tpmBucket.available } }; } if (!this.tpmBucket.hasTokens(estimatedTokens)) { return { allowed: false, reason: "tpm_limit", availableIn: this.tpmBucket.timeUntilTokens(estimatedTokens), availableTokens: { rpm: this.rpmBucket.available, tpm: this.tpmBucket.available } }; } return { allowed: true, availableTokens: { rpm: this.rpmBucket.available, tpm: this.tpmBucket.available } }; } // Synchronous version (backward compatibility) consume(requestId, estimatedTokens, metadata) { if (!requestId || requestId.trim() === "") { throw new Error("Request ID cannot be empty"); } const totalTokensNeeded = estimatedTokens + this.compensationDebt; const check = this.canProcess(totalTokensNeeded); if (!check.allowed) { return false; } this.rpmBucket.consume(1); this.tpmBucket.consume(totalTokensNeeded); const appliedCompensation = this.compensationDebt; this.compensationDebt = 0; const record = { timestamp: this.clock(), tokens: estimatedTokens, requestId, metadata, estimatedTokens, compensationDebt: appliedCompensation }; this.consumptionHistory.push(record); if (this.storageEnabled) { this.storage.addConsumptionRecord(record).catch(() => { }); } this.cleanupHistory(); return true; } // Async version for concurrent scenarios async consumeAsync(requestId, estimatedTokens, metadata) { if (!requestId || requestId.trim() === "") { throw new Error("Request ID cannot be empty"); } return await this.lock.withLock(async () => { const totalTokensNeeded = estimatedTokens + this.compensationDebt; const check = this.canProcess(totalTokensNeeded); if (!check.allowed) { return false; } this.rpmBucket.consume(1); this.tpmBucket.consume(totalTokensNeeded); const appliedCompensation = this.compensationDebt; this.compensationDebt = 0; const record = { timestamp: this.clock(), tokens: estimatedTokens, requestId, metadata, estimatedTokens, compensationDebt: appliedCompensation }; this.consumptionHistory.push(record); if (this.storageEnabled) { this.storage.addConsumptionRecord(record).catch(() => { }); } this.cleanupHistory(); return true; }); } // Synchronous version (backward compatibility) consumeOrThrow(requestId, estimatedTokens, metadata) { const consumed = this.consume(requestId, estimatedTokens, metadata); if (!consumed) { const check = this.canProcess(estimatedTokens + this.compensationDebt); throw new RateLimitError( `Rate limit exceeded: ${check.reason}`, check.reason, check.availableIn ); } } async consumeOrThrowAsync(requestId, estimatedTokens, metadata) { const consumed = await this.consumeAsync(requestId, estimatedTokens, metadata); if (!consumed) { const check = this.canProcess(estimatedTokens + this.compensationDebt); throw new RateLimitError( `Rate limit exceeded: ${check.reason}`, check.reason, check.availableIn ); } } // Synchronous version (backward compatibility) adjustConsumption(requestId, actualTokens) { if (actualTokens < 0) { throw new Error("Actual tokens cannot be negative"); } const record = this.consumptionHistory.find( (item) => item.requestId === requestId ); if (!record) { throw new Error(`Request ID '${requestId}' not found in consumption history`); } const difference = actualTokens - record.tokens; if (difference > 0) { const consumed = this.tpmBucket.consume(difference); if (!consumed) { this.handleAdjustmentFailureSync(requestId, difference); } } else if (difference < 0) { this.tpmBucket.refund(-difference); } record.tokens = actualTokens; record.actualTokens = actualTokens; } async adjustConsumptionAsync(requestId, actualTokens) { if (actualTokens < 0) { throw new Error("Actual tokens cannot be negative"); } return await this.lock.withLock(async () => { const record = this.consumptionHistory.find( (item) => item.requestId === requestId ); if (!record) { throw new Error(`Request ID '${requestId}' not found in consumption history`); } const difference = actualTokens - record.tokens; if (difference > 0) { const consumed = this.tpmBucket.consume(difference); if (!consumed) { await this.handleAdjustmentFailure(requestId, difference); } } else if (difference < 0) { this.tpmBucket.refund(-difference); } record.tokens = actualTokens; record.actualTokens = actualTokens; }); } handleAdjustmentFailureSync(requestId, additionalTokens) { const message = `Failed to consume additional ${additionalTokens} tokens for request ${requestId}`; switch (this.adjustmentFailureStrategy) { case "strict": throw new RateLimitError( message, "tpm_limit", this.tpmBucket.timeUntilTokens(additionalTokens) ); case "warn": this.logger.warn(message); break; case "compensate": this.compensationDebt += additionalTokens; this.logger.info(`Adding ${additionalTokens} tokens to compensation debt. Total debt: ${this.compensationDebt}`); this.persistCompensationDebt(); break; } } async handleAdjustmentFailure(requestId, additionalTokens) { const message = `Failed to consume additional ${additionalTokens} tokens for request ${requestId}`; switch (this.adjustmentFailureStrategy) { case "strict": throw new RateLimitError( message, "tpm_limit", this.tpmBucket.timeUntilTokens(additionalTokens) ); case "warn": this.logger.warn(message); break; case "compensate": this.compensationDebt += additionalTokens; this.logger.info(`Adding ${additionalTokens} tokens to compensation debt. Total debt: ${this.compensationDebt}`); this.persistCompensationDebt(); break; } } getMetrics() { this.cleanupHistory(); const rpmUsed = this.rpmBucket.capacity - this.rpmBucket.available; const tpmUsed = this.tpmBucket.capacity - this.tpmBucket.available; const historyStats = this.getHistoryStatistics(); const memoryStats = this.getMemoryMetrics(); return { rpm: { used: rpmUsed, available: this.rpmBucket.available, limit: this.rpmBucket.capacity, percentage: rpmUsed / this.rpmBucket.capacity * 100 }, tpm: { used: tpmUsed, available: this.tpmBucket.available, limit: this.tpmBucket.capacity, percentage: tpmUsed / this.tpmBucket.capacity * 100 }, efficiency: this.calculateEfficiency(), consumptionHistory: historyStats, memory: memoryStats, compensation: { totalDebt: this.compensationDebt, pendingCompensation: this.compensationDebt } }; } getConsumptionHistory() { this.cleanupHistory(); return [...this.consumptionHistory]; } // Synchronous version (backward compatibility) reset() { this.rpmBucket.reset(); this.tpmBucket.reset(); this.consumptionHistory = []; this.compensationDebt = 0; if (this.storageEnabled) { this.storage.clear().catch(() => { }); } } async resetAsync() { return await this.lock.withLock(async () => { this.rpmBucket.reset(); this.tpmBucket.reset(); this.consumptionHistory = []; this.compensationDebt = 0; if (this.storageEnabled) { await this.storage.clear(); } }); } setHistoryRetention(ms) { if (ms <= 0) { throw new Error("History retention must be positive"); } this.historyRetentionMs = ms; } setMaxHistoryRecords(count) { if (count <= 0) { throw new Error("Max history records must be positive"); } this.maxHistoryRecords = count; this.cleanupHistory(); } cleanupHistory() { const cutoff = this.clock() - this.historyRetentionMs; this.consumptionHistory = this.consumptionHistory.filter( (item) => item.timestamp > cutoff ); if (this.storageEnabled) { this.storage.cleanupConsumptionHistory(cutoff).catch(() => { }); } if (this.consumptionHistory.length > this.maxHistoryRecords) { const excess = this.consumptionHistory.length - this.maxHistoryRecords; this.consumptionHistory.splice(0, excess); if (excess > 10) { this.logger.warn(`Removed ${excess} old consumption records to stay within memory limit`); } } } getHistoryStatistics() { if (this.consumptionHistory.length === 0) { return { count: 0, averageTokensPerRequest: 0, totalTokens: 0, estimationAccuracy: 1 }; } const totalTokens = this.consumptionHistory.reduce( (sum, record) => sum + record.tokens, 0 ); const recordsWithActual = this.consumptionHistory.filter( (record) => record.estimatedTokens !== void 0 && record.actualTokens !== void 0 ); let estimationAccuracy = 1; if (recordsWithActual.length > 0) { const accuracySum = recordsWithActual.reduce((sum, record) => { const estimated = record.estimatedTokens; const actual = record.actualTokens; if (estimated === 0) return sum + 1; return sum + Math.min(estimated, actual) / Math.max(estimated, actual); }, 0); estimationAccuracy = accuracySum / recordsWithActual.length; } return { count: this.consumptionHistory.length, averageTokensPerRequest: totalTokens / this.consumptionHistory.length, totalTokens, estimationAccuracy }; } getMemoryMetrics() { const recordSize = 200; return { historyRecords: this.consumptionHistory.length, estimatedMemoryUsage: this.consumptionHistory.length * recordSize, maxHistoryRecords: this.maxHistoryRecords }; } calculateEfficiency() { const recentHistory = this.consumptionHistory.slice(-this.efficiencyWindowSize); if (recentHistory.length === 0) return 1; const recordsWithActual = recentHistory.filter( (record) => record.estimatedTokens !== void 0 && record.actualTokens !== void 0 ); if (recordsWithActual.length === 0) { return 0.85; } let totalAccuracy = 0; for (const record of recordsWithActual) { const estimated = record.estimatedTokens; const actual = record.actualTokens; if (estimated === 0 && actual === 0) { totalAccuracy += 1; } else if (estimated === 0 || actual === 0) { totalAccuracy += 0; } else { totalAccuracy += Math.min(estimated, actual) / Math.max(estimated, actual); } } return totalAccuracy / recordsWithActual.length; } /** * Validates internal state consistency */ validateState() { try { if (!this.rpmBucket.validateConsistency() || !this.tpmBucket.validateConsistency()) { return false; } if (this.compensationDebt < 0) { return false; } if (this.consumptionHistory.length > this.maxHistoryRecords * 1.1) { return false; } const now = this.clock(); const oldestAllowed = now - this.historyRetentionMs * 2; for (const record of this.consumptionHistory) { if (record.timestamp < oldestAllowed || record.timestamp > now + 1e3) { return false; } if (record.tokens < 0) { return false; } } return true; } catch { return false; } } /** * Creates a state snapshot for backup/restore */ createSnapshot() { return { timestamp: this.clock(), rpmBucketState: this.rpmBucket.getState(), tpmBucketState: this.tpmBucket.getState(), historyCount: this.consumptionHistory.length, compensationDebt: this.compensationDebt }; } /** * Restores state from a snapshot */ async restoreFromSnapshot(snapshot) { return await this.lock.withLock(async () => { try { this.rpmBucket.restoreState(snapshot.rpmBucketState); this.tpmBucket.restoreState(snapshot.tpmBucketState); this.compensationDebt = snapshot.compensationDebt; this.consumptionHistory = []; this.logger.info(`State restored from snapshot (timestamp: ${snapshot.timestamp})`); } catch (error) { this.logger.error(`Failed to restore from snapshot: ${error instanceof Error ? error.message : String(error)}`); throw error; } }); } /** * Attempts to repair inconsistent state */ async repairState() { return await this.lock.withLock(async () => { let repaired = false; if (this.compensationDebt < 0) { this.logger.warn(`Repairing negative compensation debt: ${this.compensationDebt}`); this.compensationDebt = 0; this.persistCompensationDebt(); repaired = true; } const originalLength = this.consumptionHistory.length; const now = this.clock(); const oldestAllowed = now - this.historyRetentionMs * 2; this.consumptionHistory = this.consumptionHistory.filter((record) => { return record.timestamp >= oldestAllowed && record.timestamp <= now + 1e3 && record.tokens >= 0; }); if (this.consumptionHistory.length !== originalLength) { this.logger.warn(`Removed ${originalLength - this.consumptionHistory.length} invalid history records`); repaired = true; } if (this.consumptionHistory.length > this.maxHistoryRecords) { const excess = this.consumptionHistory.length - this.maxHistoryRecords; this.consumptionHistory.splice(0, excess); this.logger.warn(`Removed ${excess} excess history records`); repaired = true; } return repaired; }); } /** * Persist compensation debt to storage */ persistCompensationDebt() { if (!this.storageEnabled) { return; } this.storage.saveCompensationDebt(this.compensationDebt).catch(() => { }); } }; function createLLMThrottle(config) { return new LLMThrottle(config); } function createLLMThrottleWithStorage(config, storage) { return new LLMThrottle({ ...config, storage }); } // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { AsyncLock, InMemoryStorage, InvalidConfigError, LLMThrottle, RateLimitError, TokenBucket, createLLMThrottle, createLLMThrottleWithStorage, createMonotonicClock, createOptimalClock, createStandardClock, defaultValidationRules, estimateTokens, getClockInfo, isFuzztokAvailable, robustEstimateTokens, validateAndNormalizeConfig, validateConfig }); //# sourceMappingURL=index.js.map