@aid-on/llm-throttle
Version:
高精度なLLMレート制限ライブラリ - Precise dual rate limiting for LLM APIs (RPM + TPM)
1,445 lines (1,434 loc) • 45.6 kB
JavaScript
"use strict";
var __defProp = Object.defineProperty;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __esm = (fn, res) => function __init() {
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
};
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
// node_modules/@aid-on/fuzztok/dist/index.mjs
var dist_exports = {};
__export(dist_exports, {
CharacterClassifier: () => CharacterClassifier,
FuzzyTokenEstimator: () => FuzzyTokenEstimator,
SimpleModelConfigProvider: () => SimpleModelConfigProvider,
TokenCostCalculator: () => TokenCostCalculator,
TokenEstimationVisualizer: () => TokenEstimationVisualizer,
createFuzzyEstimator: () => createFuzzyEstimator,
createSimpleFuzzyEstimator: () => createSimpleFuzzyEstimator
});
function createFuzzyEstimator(modelProvider, options) {
return new FuzzyTokenEstimator(modelProvider, options);
}
function createSimpleFuzzyEstimator(modelConfigs, defaultModel) {
const provider = new SimpleModelConfigProvider(modelConfigs, defaultModel);
return new FuzzyTokenEstimator(provider, { defaultModel });
}
var CharacterClassifier, SimpleModelConfigProvider, DEFAULT_FALLBACK_CONFIG, FuzzyTokenEstimator, TokenCostCalculator, TokenEstimationVisualizer;
var init_dist = __esm({
"node_modules/@aid-on/fuzztok/dist/index.mjs"() {
"use strict";
CharacterClassifier = class {
/**
* CJK(中国語・日本語・韓国語)文字およびマルチバイト文字の判定
*/
static isCJKCharacter(char) {
const code = char.charCodeAt(0);
return (
// CJK統合漢字拡張A-G、互換漢字など
code >= 11904 && code <= 12031 || // CJK部首補助
code >= 12288 && code <= 12351 || // CJK記号と句読点
code >= 12352 && code <= 12447 || // ひらがな
code >= 12448 && code <= 12543 || // カタカナ
code >= 12544 && code <= 12591 || // 注音符号
code >= 12592 && code <= 12687 || // ハングル互換字母
code >= 12688 && code <= 12703 || // 漢文用記号
code >= 12704 && code <= 12735 || // 注音字母拡張
code >= 12736 && code <= 12783 || // CJKストローク
code >= 12784 && code <= 12799 || // カタカナ拡張
code >= 12800 && code <= 13055 || // 囲みCJK文字・月
code >= 13056 && code <= 13311 || // CJK互換
code >= 13312 && code <= 19903 || // CJK統合漢字拡張A
code >= 19968 && code <= 40959 || // CJK統合漢字
code >= 40960 && code <= 42127 || // イ文字
code >= 42128 && code <= 42191 || // イ文字部首
code >= 44032 && code <= 55215 || // ハングル音節文字
code >= 63744 && code <= 64255 || // CJK互換漢字
code >= 65072 && code <= 65103 || // CJK互換形
code >= 65280 && code <= 65519 || // 半角・全角形
code >= 131072 && code <= 173791 || // CJK統合漢字拡張B
code >= 173824 && code <= 177983 || // CJK統合漢字拡張C
code >= 177984 && code <= 178207 || // CJK統合漢字拡張D
code >= 178208 && code <= 183983 || // CJK統合漢字拡張E
code >= 183984 && code <= 191471 || // CJK統合漢字拡張F
code >= 196608 && code <= 201551
);
}
/**
* より詳細な文字種別の判定
*/
static getCharacterType(char) {
if (this.isCJKCharacter(char)) return "cjk";
if (/[a-zA-Z\u00c0-\u024f\u1e00-\u1eff]/.test(char)) return "latin";
if (/[0-9\u0660-\u0669\u06f0-\u06f9]/.test(char)) return "digit";
if (/\s/.test(char)) return "whitespace";
return "symbol";
}
/**
* テキスト全体の言語構成を分析
*/
static analyzeTextComposition(text) {
const composition = {
cjk: 0,
latin: 0,
digits: 0,
symbols: 0,
whitespace: 0,
total: text.length
};
for (const char of text) {
const type = this.getCharacterType(char);
if (type === "digit") {
composition.digits++;
} else if (type === "symbol") {
composition.symbols++;
} else {
composition[type]++;
}
}
return {
...composition,
cjkRatio: composition.total > 0 ? composition.cjk / composition.total : 0
};
}
};
SimpleModelConfigProvider = class {
constructor(configs, defaultModel) {
this.configs = new Map(Object.entries(configs));
this.defaultModel = defaultModel;
}
getConfig(modelName) {
return this.configs.get(modelName);
}
getSupportedModels() {
return Array.from(this.configs.keys());
}
getDefaultModel() {
return this.defaultModel;
}
};
DEFAULT_FALLBACK_CONFIG = {
charsPerToken: 4,
overhead: 10,
cjkTokensPerChar: 1.5,
mixedTextMultiplier: 1.05,
numberTokensPerChar: 3.5,
symbolTokensPerChar: 2.5,
whitespaceHandling: "compress"
};
FuzzyTokenEstimator = class {
constructor(modelProvider, options) {
this.modelProvider = modelProvider;
this.fallbackConfig = options?.fallbackConfig || DEFAULT_FALLBACK_CONFIG;
this.defaultModel = options?.defaultModel || modelProvider.getDefaultModel?.();
}
/**
* 現在のモデルプロバイダーを取得
*/
getModelProvider() {
return this.modelProvider;
}
/**
* モデルプロバイダーを変更
*/
setModelProvider(provider) {
this.modelProvider = provider;
}
/**
* 指定されたモデルの設定を取得(フォールバック付き)
*/
getModelConfig(modelName) {
const model = modelName || this.defaultModel || "unknown";
const config = this.modelProvider.getConfig(model) || this.fallbackConfig;
return { config, model };
}
/**
* 詳細な推定結果を返すメインメソッド
*/
estimateDetailed(text, modelName) {
const { config, model } = this.getModelConfig(modelName);
if (!text) {
return {
tokens: config.overhead,
breakdown: {
cjk: 0,
latin: 0,
digits: 0,
symbols: 0,
overhead: config.overhead
},
textAnalysis: {
totalChars: 0,
cjkRatio: 0,
adjustmentFactor: 1
},
confidence: "high",
modelUsed: model
};
}
const composition = CharacterClassifier.analyzeTextComposition(text);
const breakdown = {
cjk: 0,
latin: 0,
digits: 0,
symbols: 0,
overhead: config.overhead
};
let currentType = null;
let currentGroup = "";
const processGroup = () => {
if (!currentGroup || !currentType) return;
switch (currentType) {
case "cjk":
breakdown.cjk += currentGroup.length * config.cjkTokensPerChar;
break;
case "latin":
breakdown.latin += Math.ceil(currentGroup.length / config.charsPerToken);
break;
case "digit":
breakdown.digits += Math.ceil(currentGroup.length / (config.numberTokensPerChar || 3.5));
break;
case "symbol":
breakdown.symbols += Math.ceil(currentGroup.length / (config.symbolTokensPerChar || 2.5));
break;
case "whitespace":
if (config.whitespaceHandling === "count") {
breakdown.symbols += currentGroup.length * 0.3;
}
break;
}
currentGroup = "";
};
for (const char of text) {
const type = CharacterClassifier.getCharacterType(char);
if (type !== currentType) {
processGroup();
currentType = type;
}
currentGroup += char;
}
processGroup();
let baseTokens = Object.values(breakdown).reduce((sum, val) => sum + val, 0);
baseTokens *= config.mixedTextMultiplier;
const adjustmentFactor = this.calculateAdjustmentFactor(composition.cjkRatio);
const finalTokens = Math.ceil(baseTokens * adjustmentFactor);
const confidence = this.calculateConfidence(composition);
return {
tokens: finalTokens,
breakdown,
textAnalysis: {
totalChars: text.length,
cjkRatio: composition.cjkRatio,
adjustmentFactor
},
confidence,
modelUsed: model
};
}
/**
* シンプルなトークン数のみを返すメソッド
*/
estimate(text, modelName) {
return this.estimateDetailed(text, modelName).tokens;
}
/**
* TextPayload形式での推定
*/
estimatePayload(payload) {
const promptTokens = typeof payload.prompt === "string" ? this.estimate(payload.prompt, payload.model) : this.getModelConfig(payload.model).config.overhead;
const maxTokens = payload.maxTokens && payload.maxTokens > 0 ? payload.maxTokens : 500;
return Math.ceil((promptTokens + maxTokens) * 1.1);
}
/**
* 日本語比率に基づく調整係数を計算
*/
calculateAdjustmentFactor(japaneseRatio) {
if (japaneseRatio > 0.8) {
return 0.6;
} else if (japaneseRatio === 0) {
return 1;
} else if (japaneseRatio < 0.2) {
return 0.7;
} else {
return 0.7 - (japaneseRatio - 0.2) / 0.6 * 0.1;
}
}
/**
* 推定の信頼度を計算
*/
calculateConfidence(composition) {
if (composition.total < 10) return "low";
if (composition.cjkRatio > 0.9 || composition.cjkRatio < 0.1) return "high";
if (composition.symbols / composition.total > 0.3) return "low";
return "medium";
}
/**
* バッチ推定
*/
estimateBatch(texts, modelName) {
return texts.map((text) => this.estimateDetailed(text, modelName));
}
/**
* ストリーミングテキストの推定
*/
async *estimateStream(textStream, modelName) {
let total = 0;
for await (const chunk of textStream) {
const tokens = this.estimate(chunk, modelName);
total += tokens;
yield { chunk, tokens, total };
}
}
/**
* 利用可能なモデルの一覧を取得
*/
getSupportedModels() {
return this.modelProvider.getSupportedModels();
}
};
TokenCostCalculator = class {
constructor(costProvider) {
this.costProvider = costProvider;
}
calculate(model, inputTokens, outputTokens) {
const pricing = this.costProvider.getCost(model);
if (!pricing) {
return {
inputCost: 0,
outputCost: 0,
totalCost: 0,
formattedTotal: "N/A",
available: false
};
}
const inputCost = inputTokens / 1e3 * pricing.input;
const outputCost = outputTokens / 1e3 * pricing.output;
const totalCost = inputCost + outputCost;
return {
inputCost,
outputCost,
totalCost,
formattedTotal: `$${totalCost.toFixed(4)}`,
available: true
};
}
};
TokenEstimationVisualizer = class {
static visualize(text, result) {
const bar = (value, max, width = 20) => {
const filled = Math.round(value / max * width);
return "\u2588".repeat(filled) + "\u2591".repeat(width - filled);
};
const maxTokens = Math.max(...Object.values(result.breakdown));
return `
=== Token Estimation Visualization ===
Model: ${result.modelUsed}
Text: "${text.slice(0, 50)}${text.length > 50 ? "..." : ""}"
Total Tokens: ${result.tokens}
Confidence: ${result.confidence}
Breakdown:
CJK [${bar(result.breakdown.cjk, maxTokens)}] ${result.breakdown.cjk.toFixed(1)}
Latin [${bar(result.breakdown.latin, maxTokens)}] ${result.breakdown.latin.toFixed(1)}
Digits [${bar(result.breakdown.digits, maxTokens)}] ${result.breakdown.digits.toFixed(1)}
Symbols [${bar(result.breakdown.symbols, maxTokens)}] ${result.breakdown.symbols.toFixed(1)}
Overhead [${bar(result.breakdown.overhead, maxTokens)}] ${result.breakdown.overhead}
Text Analysis:
- Total Characters: ${result.textAnalysis.totalChars}
- CJK Ratio: ${(result.textAnalysis.cjkRatio * 100).toFixed(1)}%
- Adjustment Factor: ${result.textAnalysis.adjustmentFactor.toFixed(2)}
`;
}
};
}
});
// src/errors.ts
var RateLimitError = class extends Error {
constructor(message, reason, availableIn) {
super(message);
this.reason = reason;
this.availableIn = availableIn;
this.name = "RateLimitError";
}
};
var InvalidConfigError = class extends Error {
constructor(message) {
super(message);
this.name = "InvalidConfigError";
}
};
// src/token-bucket.ts
var TokenBucket = class {
constructor(config, storage) {
this._initialized = false;
this.validateConfig(config);
this._capacity = config.capacity;
this._available = config.initialTokens ?? config.capacity;
this._refillRate = config.refillRate;
this._clock = config.clock ?? (() => Date.now());
this._lastRefill = this._clock();
this._storage = storage;
this._storageKey = config.storageKey;
}
validateConfig(config) {
if (config.capacity <= 0) {
throw new InvalidConfigError("Capacity must be greater than 0");
}
if (config.refillRate <= 0) {
throw new InvalidConfigError("Refill rate must be greater than 0");
}
if (config.initialTokens !== void 0 && config.initialTokens < 0) {
throw new InvalidConfigError("Initial tokens cannot be negative");
}
if (config.initialTokens !== void 0 && config.initialTokens > config.capacity) {
throw new InvalidConfigError("Initial tokens cannot exceed capacity");
}
}
get capacity() {
return this._capacity;
}
get available() {
this.refill();
return this._available;
}
get refillRate() {
return this._refillRate;
}
refill() {
const now = this._clock();
const timePassed = (now - this._lastRefill) / 1e3;
if (timePassed <= 0) return;
const tokensToAdd = timePassed * this._refillRate;
this._available = Math.min(
this._capacity,
this._available + tokensToAdd
);
this._lastRefill = now;
}
hasTokens(count) {
if (count < 0) return false;
this.refill();
return this._available >= count;
}
consume(count) {
if (count < 0) {
throw new Error("Cannot consume negative tokens");
}
this.refill();
if (this._available >= count) {
this._available -= count;
this.persistState();
return true;
}
return false;
}
refund(count) {
if (count < 0) {
throw new Error("Cannot refund negative tokens");
}
this._available = Math.min(this._capacity, this._available + count);
this.persistState();
}
timeUntilNextToken() {
this.refill();
if (this._available >= 1) return 0;
return Math.ceil((1 - this._available) / this._refillRate * 1e3);
}
timeUntilTokens(count) {
if (count <= 0) return 0;
this.refill();
if (this._available >= count) return 0;
const needed = count - this._available;
return Math.ceil(needed / this._refillRate * 1e3);
}
reset() {
this._available = this._capacity;
this._lastRefill = this._clock();
this.persistState();
}
/**
* Get current internal state for snapshots
*/
getState() {
this.refill();
return {
available: this._available,
capacity: this._capacity,
lastRefill: this._lastRefill
};
}
/**
* Restore state from snapshot
*/
restoreState(state) {
if (state.available < 0 || state.available > state.capacity) {
throw new Error("Invalid state: available tokens out of range");
}
if (state.capacity !== this._capacity) {
throw new Error("Invalid state: capacity mismatch");
}
this._available = state.available;
this._lastRefill = state.lastRefill;
}
/**
* Validate internal consistency
*/
validateConsistency() {
this.refill();
return this._available >= 0 && this._available <= this._capacity && this._capacity > 0 && this._refillRate > 0;
}
/**
* Initialize from storage if available
*/
async initializeFromStorage() {
if (!this._storage || !this._storageKey || this._initialized) {
return;
}
try {
const storedState = await this._storage.loadTokenBucketState(this._storageKey);
if (storedState && storedState.capacity === this._capacity) {
this._available = storedState.available;
this._lastRefill = storedState.lastRefill;
}
} catch (error) {
} finally {
this._initialized = true;
}
}
/**
* Persist current state to storage
*/
persistState() {
if (!this._storage || !this._storageKey) {
return;
}
const state = this.getState();
this._storage.saveTokenBucketState(this._storageKey, state).catch(() => {
});
}
};
// src/utils/async-lock.ts
var AsyncLock = class {
constructor() {
this.locked = false;
this.queue = [];
}
/**
* Acquire the lock
*/
async acquire() {
return new Promise((resolve, reject) => {
if (!this.locked) {
this.locked = true;
resolve();
} else {
this.queue.push({ resolve, reject });
}
});
}
/**
* Release the lock
*/
release() {
if (!this.locked) {
throw new Error("Cannot release a lock that is not acquired");
}
if (this.queue.length > 0) {
const next = this.queue.shift();
next.resolve();
} else {
this.locked = false;
}
}
/**
* Execute a function with the lock acquired
*/
async withLock(fn) {
await this.acquire();
try {
return await fn();
} finally {
this.release();
}
}
/**
* Check if the lock is currently held
*/
isLocked() {
return this.locked;
}
/**
* Get the number of pending operations waiting for the lock
*/
getQueueLength() {
return this.queue.length;
}
/**
* Clear all pending operations (useful for cleanup)
*/
clear() {
const error = new Error("AsyncLock cleared");
this.queue.forEach((item) => item.reject(error));
this.queue = [];
this.locked = false;
}
};
// src/utils/clock.ts
var hasNodeHrtime;
var hasPerformanceNow;
try {
hasNodeHrtime = typeof process !== "undefined" && typeof process.hrtime !== "undefined" && typeof process.hrtime.bigint === "function";
} catch {
hasNodeHrtime = false;
}
try {
hasPerformanceNow = typeof performance !== "undefined" && typeof performance.now === "function";
} catch {
hasPerformanceNow = false;
}
function createMonotonicClock() {
if (hasNodeHrtime) {
const startTime = process.hrtime.bigint();
return () => {
const current = process.hrtime.bigint();
return Number(current - startTime) / 1e6;
};
} else if (hasPerformanceNow) {
const startTime = performance.now();
return () => performance.now() - startTime;
} else {
const startTime = Date.now();
return () => Date.now() - startTime;
}
}
function createStandardClock() {
return () => Date.now();
}
function createOptimalClock(preferMonotonic = true) {
if (preferMonotonic && (hasNodeHrtime || hasPerformanceNow)) {
return createMonotonicClock();
}
return createStandardClock();
}
function getClockInfo() {
return {
hasNodeHrtime,
hasPerformanceNow,
recommendedClock: hasNodeHrtime || hasPerformanceNow ? "monotonic" : "standard"
};
}
// src/utils/validation.ts
var defaultValidationRules = [
{
name: "rpm_positive",
validate: (config) => config.rpm > 0 || "RPM must be greater than 0",
level: "error"
},
{
name: "tpm_positive",
validate: (config) => config.tpm > 0 || "TPM must be greater than 0",
level: "error"
},
{
name: "burst_rpm_valid",
validate: (config) => {
if (config.burstRPM !== void 0 && config.burstRPM < config.rpm) {
return "Burst RPM cannot be less than RPM";
}
return true;
},
level: "error"
},
{
name: "burst_tpm_valid",
validate: (config) => {
if (config.burstTPM !== void 0 && config.burstTPM < config.tpm) {
return "Burst TPM cannot be less than TPM";
}
return true;
},
level: "error"
},
{
name: "burst_rpm_limit",
validate: (config) => {
if (config.burstRPM !== void 0 && config.burstRPM > config.rpm * 10) {
return "Burst RPM should not exceed 10x the base RPM for optimal performance";
}
return true;
},
level: "warn"
},
{
name: "burst_tpm_limit",
validate: (config) => {
if (config.burstTPM !== void 0 && config.burstTPM > config.tpm * 10) {
return "Burst TPM should not exceed 10x the base TPM for optimal performance";
}
return true;
},
level: "warn"
},
{
name: "rpm_high_warning",
validate: (config) => {
if (config.rpm > 1e4) {
return "RPM above 10,000 may impact performance and API stability";
}
return true;
},
level: "warn"
},
{
name: "tpm_high_warning",
validate: (config) => {
if (config.tpm > 1e6) {
return "TPM above 1,000,000 may impact performance and memory usage";
}
return true;
},
level: "warn"
},
{
name: "history_retention_valid",
validate: (config) => {
if (config.historyRetentionMs !== void 0 && config.historyRetentionMs <= 0) {
return "History retention must be positive";
}
return true;
},
level: "error"
},
{
name: "max_history_valid",
validate: (config) => {
if (config.maxHistoryRecords !== void 0 && config.maxHistoryRecords <= 0) {
return "Max history records must be positive";
}
return true;
},
level: "error"
},
{
name: "efficiency_window_valid",
validate: (config) => {
if (config.efficiencyWindowSize !== void 0 && config.efficiencyWindowSize <= 0) {
return "Efficiency window size must be positive";
}
return true;
},
level: "error"
}
];
function validateConfig(config, customRules = [], logger) {
const result = {
valid: true,
errors: [],
warnings: []
};
const allRules = [...defaultValidationRules, ...customRules];
for (const rule of allRules) {
try {
const validationResult = rule.validate(config);
if (validationResult !== true) {
const message = typeof validationResult === "string" ? validationResult : `Validation failed for rule: ${rule.name}`;
if (rule.level === "error") {
result.errors.push(message);
result.valid = false;
} else {
result.warnings.push(message);
}
}
} catch (error) {
const message = `Validation rule '${rule.name}' threw an error: ${error instanceof Error ? error.message : String(error)}`;
result.errors.push(message);
result.valid = false;
}
}
if (logger && result.warnings.length > 0) {
result.warnings.forEach((warning) => logger.warn(`Config validation warning: ${warning}`));
}
return result;
}
function validateAndNormalizeConfig(config, customRules = [], logger) {
if (!config || typeof config !== "object") {
throw new Error("Config must be an object");
}
const result = validateConfig(config, customRules, logger);
if (!result.valid) {
throw new Error(`Configuration validation failed: ${result.errors.join(", ")}`);
}
return {
...config,
adjustmentFailureStrategy: config.adjustmentFailureStrategy || "warn",
maxHistoryRecords: config.maxHistoryRecords || 1e4,
historyRetentionMs: config.historyRetentionMs || 6e4,
efficiencyWindowSize: config.efficiencyWindowSize || 50,
logger: config.logger || console
};
}
// src/storage/in-memory.ts
var InMemoryStorage = class {
constructor() {
this.tokenBucketStates = /* @__PURE__ */ new Map();
this.consumptionHistory = [];
this.compensationDebt = 0;
}
async saveTokenBucketState(key, state) {
this.tokenBucketStates.set(key, { ...state });
}
async loadTokenBucketState(key) {
const state = this.tokenBucketStates.get(key);
return state ? { ...state } : null;
}
async saveConsumptionHistory(records) {
this.consumptionHistory = [...records];
}
async loadConsumptionHistory(limit) {
if (limit && limit > 0) {
return this.consumptionHistory.slice(-limit);
}
return [...this.consumptionHistory];
}
async addConsumptionRecord(record) {
this.consumptionHistory.push({ ...record });
}
async cleanupConsumptionHistory(olderThan) {
const originalLength = this.consumptionHistory.length;
this.consumptionHistory = this.consumptionHistory.filter(
(record) => record.timestamp > olderThan
);
return originalLength - this.consumptionHistory.length;
}
async saveCompensationDebt(debt) {
this.compensationDebt = debt;
}
async loadCompensationDebt() {
return this.compensationDebt;
}
async clear() {
this.tokenBucketStates.clear();
this.consumptionHistory = [];
this.compensationDebt = 0;
}
async isAvailable() {
return true;
}
};
// src/utils/fuzztok-integration.ts
var fuzztokModule = null;
var fuzztokAvailable = false;
async function loadFuzztok() {
if (fuzztokModule !== null) {
return fuzztokAvailable;
}
try {
fuzztokModule = await Promise.resolve().then(() => (init_dist(), dist_exports));
fuzztokAvailable = true;
return true;
} catch (error) {
fuzztokAvailable = false;
return false;
}
}
async function estimateTokens(text) {
const loaded = await loadFuzztok();
if (!loaded || !fuzztokModule) {
return Math.ceil(text.length / 4);
}
try {
return fuzztokModule.countTokens(text);
} catch (error) {
return Math.ceil(text.length / 4);
}
}
async function isFuzztokAvailable() {
return await loadFuzztok();
}
function simpleFallbackEstimate(text) {
if (!text) return 0;
const words = text.split(/\s+/).length;
const chars = text.length;
const wordBasedEstimate = Math.ceil(words * 1.3);
const charBasedEstimate = Math.ceil(chars / 4);
return Math.max(1, Math.round((wordBasedEstimate + charBasedEstimate) / 2));
}
async function robustEstimateTokens(text) {
if (!text) return 0;
try {
return await estimateTokens(text);
} catch (error) {
return simpleFallbackEstimate(text);
}
}
// src/index.ts
var LLMThrottle = class {
/**
* Create a new LLMThrottle instance
* @param config Configuration including optional storage implementation
*/
constructor(config) {
this.consumptionHistory = [];
// private _config: LLMThrottleConfig; // Kept for future use
this.lock = new AsyncLock();
this.compensationDebt = 0;
this.initialized = false;
const legacyConfig = {
...config,
storage: config.storage ? {
enabled: true,
implementation: config.storage
} : void 0
};
const tempLogger = config.logger || console;
const validatedConfig = validateAndNormalizeConfig(legacyConfig, config.validationRules, tempLogger);
this.logger = validatedConfig.logger || console;
this.historyRetentionMs = validatedConfig.historyRetentionMs || 6e4;
this.maxHistoryRecords = validatedConfig.maxHistoryRecords || 1e4;
this.efficiencyWindowSize = validatedConfig.efficiencyWindowSize || 50;
this.adjustmentFailureStrategy = validatedConfig.adjustmentFailureStrategy || "warn";
this.storageEnabled = !!config.storage;
this.storage = config.storage || new InMemoryStorage();
if (validatedConfig.clock) {
this.clock = validatedConfig.clock;
} else {
this.clock = createOptimalClock(validatedConfig.monotonicClock !== false);
}
this.rpmBucket = new TokenBucket({
capacity: validatedConfig.burstRPM || validatedConfig.rpm,
refillRate: validatedConfig.rpm / 60,
// per second
initialTokens: validatedConfig.burstRPM || validatedConfig.rpm,
clock: this.clock,
storageKey: "rpm"
}, this.storageEnabled ? this.storage : void 0);
this.tpmBucket = new TokenBucket({
capacity: validatedConfig.burstTPM || validatedConfig.tpm,
refillRate: validatedConfig.tpm / 60,
initialTokens: validatedConfig.burstTPM || validatedConfig.tpm,
clock: this.clock,
storageKey: "tpm"
}, this.storageEnabled ? this.storage : void 0);
}
/**
* Initialize from storage if available
* Call this after creating the instance to restore persisted state
*/
async initialize() {
if (this.initialized || !this.storageEnabled) {
return;
}
try {
await Promise.all([
this.rpmBucket.initializeFromStorage(),
this.tpmBucket.initializeFromStorage()
]);
const storedDebt = await this.storage.loadCompensationDebt();
if (storedDebt >= 0) {
this.compensationDebt = storedDebt;
}
const history = await this.storage.loadConsumptionHistory(this.maxHistoryRecords);
if (history.length > 0) {
this.consumptionHistory = history;
this.cleanupHistory();
}
this.logger.info("Throttle state initialized from storage");
} catch (error) {
this.logger.warn(`Failed to initialize from storage: ${error instanceof Error ? error.message : String(error)}`);
} finally {
this.initialized = true;
}
}
canProcess(estimatedTokens) {
if (estimatedTokens < 0) {
throw new Error("Estimated tokens cannot be negative");
}
if (!this.rpmBucket.hasTokens(1)) {
return {
allowed: false,
reason: "rpm_limit",
availableIn: this.rpmBucket.timeUntilNextToken(),
availableTokens: {
rpm: this.rpmBucket.available,
tpm: this.tpmBucket.available
}
};
}
if (!this.tpmBucket.hasTokens(estimatedTokens)) {
return {
allowed: false,
reason: "tpm_limit",
availableIn: this.tpmBucket.timeUntilTokens(estimatedTokens),
availableTokens: {
rpm: this.rpmBucket.available,
tpm: this.tpmBucket.available
}
};
}
return {
allowed: true,
availableTokens: {
rpm: this.rpmBucket.available,
tpm: this.tpmBucket.available
}
};
}
// Synchronous version (backward compatibility)
consume(requestId, estimatedTokens, metadata) {
if (!requestId || requestId.trim() === "") {
throw new Error("Request ID cannot be empty");
}
const totalTokensNeeded = estimatedTokens + this.compensationDebt;
const check = this.canProcess(totalTokensNeeded);
if (!check.allowed) {
return false;
}
this.rpmBucket.consume(1);
this.tpmBucket.consume(totalTokensNeeded);
const appliedCompensation = this.compensationDebt;
this.compensationDebt = 0;
const record = {
timestamp: this.clock(),
tokens: estimatedTokens,
requestId,
metadata,
estimatedTokens,
compensationDebt: appliedCompensation
};
this.consumptionHistory.push(record);
if (this.storageEnabled) {
this.storage.addConsumptionRecord(record).catch(() => {
});
}
this.cleanupHistory();
return true;
}
// Async version for concurrent scenarios
async consumeAsync(requestId, estimatedTokens, metadata) {
if (!requestId || requestId.trim() === "") {
throw new Error("Request ID cannot be empty");
}
return await this.lock.withLock(async () => {
const totalTokensNeeded = estimatedTokens + this.compensationDebt;
const check = this.canProcess(totalTokensNeeded);
if (!check.allowed) {
return false;
}
this.rpmBucket.consume(1);
this.tpmBucket.consume(totalTokensNeeded);
const appliedCompensation = this.compensationDebt;
this.compensationDebt = 0;
const record = {
timestamp: this.clock(),
tokens: estimatedTokens,
requestId,
metadata,
estimatedTokens,
compensationDebt: appliedCompensation
};
this.consumptionHistory.push(record);
if (this.storageEnabled) {
this.storage.addConsumptionRecord(record).catch(() => {
});
}
this.cleanupHistory();
return true;
});
}
// Synchronous version (backward compatibility)
consumeOrThrow(requestId, estimatedTokens, metadata) {
const consumed = this.consume(requestId, estimatedTokens, metadata);
if (!consumed) {
const check = this.canProcess(estimatedTokens + this.compensationDebt);
throw new RateLimitError(
`Rate limit exceeded: ${check.reason}`,
check.reason,
check.availableIn
);
}
}
async consumeOrThrowAsync(requestId, estimatedTokens, metadata) {
const consumed = await this.consumeAsync(requestId, estimatedTokens, metadata);
if (!consumed) {
const check = this.canProcess(estimatedTokens + this.compensationDebt);
throw new RateLimitError(
`Rate limit exceeded: ${check.reason}`,
check.reason,
check.availableIn
);
}
}
// Synchronous version (backward compatibility)
adjustConsumption(requestId, actualTokens) {
if (actualTokens < 0) {
throw new Error("Actual tokens cannot be negative");
}
const record = this.consumptionHistory.find(
(item) => item.requestId === requestId
);
if (!record) {
throw new Error(`Request ID '${requestId}' not found in consumption history`);
}
const difference = actualTokens - record.tokens;
if (difference > 0) {
const consumed = this.tpmBucket.consume(difference);
if (!consumed) {
this.handleAdjustmentFailureSync(requestId, difference);
}
} else if (difference < 0) {
this.tpmBucket.refund(-difference);
}
record.tokens = actualTokens;
record.actualTokens = actualTokens;
}
async adjustConsumptionAsync(requestId, actualTokens) {
if (actualTokens < 0) {
throw new Error("Actual tokens cannot be negative");
}
return await this.lock.withLock(async () => {
const record = this.consumptionHistory.find(
(item) => item.requestId === requestId
);
if (!record) {
throw new Error(`Request ID '${requestId}' not found in consumption history`);
}
const difference = actualTokens - record.tokens;
if (difference > 0) {
const consumed = this.tpmBucket.consume(difference);
if (!consumed) {
await this.handleAdjustmentFailure(requestId, difference);
}
} else if (difference < 0) {
this.tpmBucket.refund(-difference);
}
record.tokens = actualTokens;
record.actualTokens = actualTokens;
});
}
handleAdjustmentFailureSync(requestId, additionalTokens) {
const message = `Failed to consume additional ${additionalTokens} tokens for request ${requestId}`;
switch (this.adjustmentFailureStrategy) {
case "strict":
throw new RateLimitError(
message,
"tpm_limit",
this.tpmBucket.timeUntilTokens(additionalTokens)
);
case "warn":
this.logger.warn(message);
break;
case "compensate":
this.compensationDebt += additionalTokens;
this.logger.info(`Adding ${additionalTokens} tokens to compensation debt. Total debt: ${this.compensationDebt}`);
this.persistCompensationDebt();
break;
}
}
async handleAdjustmentFailure(requestId, additionalTokens) {
const message = `Failed to consume additional ${additionalTokens} tokens for request ${requestId}`;
switch (this.adjustmentFailureStrategy) {
case "strict":
throw new RateLimitError(
message,
"tpm_limit",
this.tpmBucket.timeUntilTokens(additionalTokens)
);
case "warn":
this.logger.warn(message);
break;
case "compensate":
this.compensationDebt += additionalTokens;
this.logger.info(`Adding ${additionalTokens} tokens to compensation debt. Total debt: ${this.compensationDebt}`);
this.persistCompensationDebt();
break;
}
}
getMetrics() {
this.cleanupHistory();
const rpmUsed = this.rpmBucket.capacity - this.rpmBucket.available;
const tpmUsed = this.tpmBucket.capacity - this.tpmBucket.available;
const historyStats = this.getHistoryStatistics();
const memoryStats = this.getMemoryMetrics();
return {
rpm: {
used: rpmUsed,
available: this.rpmBucket.available,
limit: this.rpmBucket.capacity,
percentage: rpmUsed / this.rpmBucket.capacity * 100
},
tpm: {
used: tpmUsed,
available: this.tpmBucket.available,
limit: this.tpmBucket.capacity,
percentage: tpmUsed / this.tpmBucket.capacity * 100
},
efficiency: this.calculateEfficiency(),
consumptionHistory: historyStats,
memory: memoryStats,
compensation: {
totalDebt: this.compensationDebt,
pendingCompensation: this.compensationDebt
}
};
}
getConsumptionHistory() {
this.cleanupHistory();
return [...this.consumptionHistory];
}
// Synchronous version (backward compatibility)
reset() {
this.rpmBucket.reset();
this.tpmBucket.reset();
this.consumptionHistory = [];
this.compensationDebt = 0;
if (this.storageEnabled) {
this.storage.clear().catch(() => {
});
}
}
async resetAsync() {
return await this.lock.withLock(async () => {
this.rpmBucket.reset();
this.tpmBucket.reset();
this.consumptionHistory = [];
this.compensationDebt = 0;
if (this.storageEnabled) {
await this.storage.clear();
}
});
}
setHistoryRetention(ms) {
if (ms <= 0) {
throw new Error("History retention must be positive");
}
this.historyRetentionMs = ms;
}
setMaxHistoryRecords(count) {
if (count <= 0) {
throw new Error("Max history records must be positive");
}
this.maxHistoryRecords = count;
this.cleanupHistory();
}
cleanupHistory() {
const cutoff = this.clock() - this.historyRetentionMs;
this.consumptionHistory = this.consumptionHistory.filter(
(item) => item.timestamp > cutoff
);
if (this.storageEnabled) {
this.storage.cleanupConsumptionHistory(cutoff).catch(() => {
});
}
if (this.consumptionHistory.length > this.maxHistoryRecords) {
const excess = this.consumptionHistory.length - this.maxHistoryRecords;
this.consumptionHistory.splice(0, excess);
if (excess > 10) {
this.logger.warn(`Removed ${excess} old consumption records to stay within memory limit`);
}
}
}
getHistoryStatistics() {
if (this.consumptionHistory.length === 0) {
return {
count: 0,
averageTokensPerRequest: 0,
totalTokens: 0,
estimationAccuracy: 1
};
}
const totalTokens = this.consumptionHistory.reduce(
(sum, record) => sum + record.tokens,
0
);
const recordsWithActual = this.consumptionHistory.filter(
(record) => record.estimatedTokens !== void 0 && record.actualTokens !== void 0
);
let estimationAccuracy = 1;
if (recordsWithActual.length > 0) {
const accuracySum = recordsWithActual.reduce((sum, record) => {
const estimated = record.estimatedTokens;
const actual = record.actualTokens;
if (estimated === 0) return sum + 1;
return sum + Math.min(estimated, actual) / Math.max(estimated, actual);
}, 0);
estimationAccuracy = accuracySum / recordsWithActual.length;
}
return {
count: this.consumptionHistory.length,
averageTokensPerRequest: totalTokens / this.consumptionHistory.length,
totalTokens,
estimationAccuracy
};
}
getMemoryMetrics() {
const recordSize = 200;
return {
historyRecords: this.consumptionHistory.length,
estimatedMemoryUsage: this.consumptionHistory.length * recordSize,
maxHistoryRecords: this.maxHistoryRecords
};
}
calculateEfficiency() {
const recentHistory = this.consumptionHistory.slice(-this.efficiencyWindowSize);
if (recentHistory.length === 0) return 1;
const recordsWithActual = recentHistory.filter(
(record) => record.estimatedTokens !== void 0 && record.actualTokens !== void 0
);
if (recordsWithActual.length === 0) {
return 0.85;
}
let totalAccuracy = 0;
for (const record of recordsWithActual) {
const estimated = record.estimatedTokens;
const actual = record.actualTokens;
if (estimated === 0 && actual === 0) {
totalAccuracy += 1;
} else if (estimated === 0 || actual === 0) {
totalAccuracy += 0;
} else {
totalAccuracy += Math.min(estimated, actual) / Math.max(estimated, actual);
}
}
return totalAccuracy / recordsWithActual.length;
}
/**
* Validates internal state consistency
*/
validateState() {
try {
if (!this.rpmBucket.validateConsistency() || !this.tpmBucket.validateConsistency()) {
return false;
}
if (this.compensationDebt < 0) {
return false;
}
if (this.consumptionHistory.length > this.maxHistoryRecords * 1.1) {
return false;
}
const now = this.clock();
const oldestAllowed = now - this.historyRetentionMs * 2;
for (const record of this.consumptionHistory) {
if (record.timestamp < oldestAllowed || record.timestamp > now + 1e3) {
return false;
}
if (record.tokens < 0) {
return false;
}
}
return true;
} catch {
return false;
}
}
/**
* Creates a state snapshot for backup/restore
*/
createSnapshot() {
return {
timestamp: this.clock(),
rpmBucketState: this.rpmBucket.getState(),
tpmBucketState: this.tpmBucket.getState(),
historyCount: this.consumptionHistory.length,
compensationDebt: this.compensationDebt
};
}
/**
* Restores state from a snapshot
*/
async restoreFromSnapshot(snapshot) {
return await this.lock.withLock(async () => {
try {
this.rpmBucket.restoreState(snapshot.rpmBucketState);
this.tpmBucket.restoreState(snapshot.tpmBucketState);
this.compensationDebt = snapshot.compensationDebt;
this.consumptionHistory = [];
this.logger.info(`State restored from snapshot (timestamp: ${snapshot.timestamp})`);
} catch (error) {
this.logger.error(`Failed to restore from snapshot: ${error instanceof Error ? error.message : String(error)}`);
throw error;
}
});
}
/**
* Attempts to repair inconsistent state
*/
async repairState() {
return await this.lock.withLock(async () => {
let repaired = false;
if (this.compensationDebt < 0) {
this.logger.warn(`Repairing negative compensation debt: ${this.compensationDebt}`);
this.compensationDebt = 0;
this.persistCompensationDebt();
repaired = true;
}
const originalLength = this.consumptionHistory.length;
const now = this.clock();
const oldestAllowed = now - this.historyRetentionMs * 2;
this.consumptionHistory = this.consumptionHistory.filter((record) => {
return record.timestamp >= oldestAllowed && record.timestamp <= now + 1e3 && record.tokens >= 0;
});
if (this.consumptionHistory.length !== originalLength) {
this.logger.warn(`Removed ${originalLength - this.consumptionHistory.length} invalid history records`);
repaired = true;
}
if (this.consumptionHistory.length > this.maxHistoryRecords) {
const excess = this.consumptionHistory.length - this.maxHistoryRecords;
this.consumptionHistory.splice(0, excess);
this.logger.warn(`Removed ${excess} excess history records`);
repaired = true;
}
return repaired;
});
}
/**
* Persist compensation debt to storage
*/
persistCompensationDebt() {
if (!this.storageEnabled) {
return;
}
this.storage.saveCompensationDebt(this.compensationDebt).catch(() => {
});
}
};
function createLLMThrottle(config) {
return new LLMThrottle(config);
}
function createLLMThrottleWithStorage(config, storage) {
return new LLMThrottle({
...config,
storage
});
}
export {
AsyncLock,
InMemoryStorage,
InvalidConfigError,
LLMThrottle,
RateLimitError,
TokenBucket,
createLLMThrottle,
createLLMThrottleWithStorage,
createMonotonicClock,
createOptimalClock,
createStandardClock,
defaultValidationRules,
estimateTokens,
getClockInfo,
isFuzztokAvailable,
robustEstimateTokens,
validateAndNormalizeConfig,
validateConfig
};
//# sourceMappingURL=index.mjs.map