UNPKG

@aid-on/llm-throttle

Version:

高精度なLLMレート制限ライブラリ - Precise dual rate limiting for LLM APIs (RPM + TPM)

457 lines (445 loc) 14.7 kB
interface Logger { warn(message: string, ...args: unknown[]): void; error(message: string, ...args: unknown[]): void; info(message: string, ...args: unknown[]): void; debug(message: string, ...args: unknown[]): void; } type AdjustmentFailureStrategy = 'strict' | 'warn' | 'compensate'; interface ValidationRule<T = unknown> { name: string; validate: (value: T) => boolean | string; level: 'error' | 'warn'; } interface DualRateLimitConfig { /** Requests per minute limit */ rpm: number; /** Tokens per minute limit */ tpm: number; /** Optional burst capacity for RPM (defaults to rpm) */ burstRPM?: number; /** Optional burst capacity for TPM (defaults to tpm) */ burstTPM?: number; /** Optional custom clock function for testing */ clock?: () => number; /** Optional custom logger (defaults to console) */ logger?: Logger; /** Strategy when adjustConsumption fails to consume additional tokens */ adjustmentFailureStrategy?: AdjustmentFailureStrategy; /** Maximum number of consumption records to keep in memory */ maxHistoryRecords?: number; /** Maximum history retention time in milliseconds (defaults to 60000) */ historyRetentionMs?: number; /** Enable monotonic clock (auto-detected by default) */ monotonicClock?: boolean; /** Custom validation rules */ validationRules?: ValidationRule[]; /** Number of records to use for efficiency calculation (defaults to 50) */ efficiencyWindowSize?: number; /** Storage configuration for persistence */ storage?: { enabled?: boolean; implementation?: unknown; }; } interface ConsumptionRecord<TMetadata = Record<string, unknown>> { timestamp: number; tokens: number; requestId: string; /** Optional metadata about the request */ metadata?: TMetadata; /** Estimated tokens from initial consumption */ estimatedTokens?: number; /** Actual tokens set via adjustConsumption */ actualTokens?: number; /** Compensation debt for future requests */ compensationDebt?: number; } interface RateLimitCheckResult { allowed: boolean; reason?: 'rpm_limit' | 'tpm_limit'; availableIn?: number; /** Current available tokens */ availableTokens?: { rpm: number; tpm: number; }; } interface MemoryMetrics { historyRecords: number; estimatedMemoryUsage: number; maxHistoryRecords?: number; } interface StateSnapshot { timestamp: number; rpmBucketState: { available: number; capacity: number; lastRefill: number; }; tpmBucketState: { available: number; capacity: number; lastRefill: number; }; historyCount: number; compensationDebt: number; } interface RateLimitMetrics { rpm: { used: number; available: number; limit: number; percentage: number; }; tpm: { used: number; available: number; limit: number; percentage: number; }; efficiency: number; consumptionHistory: { count: number; averageTokensPerRequest: number; totalTokens: number; estimationAccuracy?: number; }; memory: MemoryMetrics; compensation: { totalDebt: number; pendingCompensation: number; }; } interface TokenBucketState { available: number; capacity: number; lastRefill: number; } interface TokenBucketConfig { capacity: number; refillRate: number; initialTokens?: number; clock?: () => number; storageKey?: string; } /** * Storage interface for persisting throttle state */ interface ThrottleStorage<TMetadata = Record<string, unknown>> { /** * Save token bucket state */ saveTokenBucketState(key: string, state: TokenBucketState): Promise<void>; /** * Load token bucket state */ loadTokenBucketState(key: string): Promise<TokenBucketState | null>; /** * Save consumption history */ saveConsumptionHistory(records: ConsumptionRecord<TMetadata>[]): Promise<void>; /** * Load consumption history */ loadConsumptionHistory(limit?: number): Promise<ConsumptionRecord<TMetadata>[]>; /** * Add a single consumption record */ addConsumptionRecord(record: ConsumptionRecord<TMetadata>): Promise<void>; /** * Remove old consumption records */ cleanupConsumptionHistory(olderThan: number): Promise<number>; /** * Save compensation debt */ saveCompensationDebt(debt: number): Promise<void>; /** * Load compensation debt */ loadCompensationDebt(): Promise<number>; /** * Clear all stored data */ clear(): Promise<void>; /** * Check if storage is available and working */ isAvailable(): Promise<boolean>; } /** * Storage configuration */ interface StorageConfig { /** * Enable or disable persistence */ enabled?: boolean; /** * Custom storage implementation */ storage?: ThrottleStorage; } /** * In-memory storage implementation (default) */ declare class InMemoryStorage<TMetadata = Record<string, unknown>> implements ThrottleStorage<TMetadata> { private tokenBucketStates; private consumptionHistory; private compensationDebt; saveTokenBucketState(key: string, state: TokenBucketState): Promise<void>; loadTokenBucketState(key: string): Promise<TokenBucketState | null>; saveConsumptionHistory(records: ConsumptionRecord<TMetadata>[]): Promise<void>; loadConsumptionHistory(limit?: number): Promise<ConsumptionRecord<TMetadata>[]>; addConsumptionRecord(record: ConsumptionRecord<TMetadata>): Promise<void>; cleanupConsumptionHistory(olderThan: number): Promise<number>; saveCompensationDebt(debt: number): Promise<void>; loadCompensationDebt(): Promise<number>; clear(): Promise<void>; isAvailable(): Promise<boolean>; } declare class TokenBucket { private _capacity; private _available; private _refillRate; private _lastRefill; private _clock; private _storage?; private _storageKey?; private _initialized; constructor(config: TokenBucketConfig, storage?: ThrottleStorage<unknown>); private validateConfig; get capacity(): number; get available(): number; get refillRate(): number; private refill; hasTokens(count: number): boolean; consume(count: number): boolean; refund(count: number): void; timeUntilNextToken(): number; timeUntilTokens(count: number): number; reset(): void; /** * Get current internal state for snapshots */ getState(): TokenBucketState; /** * Restore state from snapshot */ restoreState(state: TokenBucketState): void; /** * Validate internal consistency */ validateConsistency(): boolean; /** * Initialize from storage if available */ initializeFromStorage(): Promise<void>; /** * Persist current state to storage */ private persistState; } declare class RateLimitError extends Error { readonly reason: 'rpm_limit' | 'tpm_limit'; readonly availableIn: number; constructor(message: string, reason: 'rpm_limit' | 'tpm_limit', availableIn: number); } declare class InvalidConfigError extends Error { constructor(message: string); } /** * Simple async lock implementation for protecting critical sections */ declare class AsyncLock { private locked; private queue; /** * Acquire the lock */ acquire(): Promise<void>; /** * Release the lock */ release(): void; /** * Execute a function with the lock acquired */ withLock<T>(fn: () => Promise<T>): Promise<T>; /** * Check if the lock is currently held */ isLocked(): boolean; /** * Get the number of pending operations waiting for the lock */ getQueueLength(): number; /** * Clear all pending operations (useful for cleanup) */ clear(): void; } /** * Clock utilities for high-precision timing */ /** * Creates a monotonic clock function based on the environment */ declare function createMonotonicClock(): () => number; /** * Creates a standard clock function (Date.now) */ declare function createStandardClock(): () => number; /** * Auto-detects and creates the best available clock */ declare function createOptimalClock(preferMonotonic?: boolean): () => number; /** * Gets environment information for debugging */ declare function getClockInfo(): { hasNodeHrtime: boolean; hasPerformanceNow: boolean; recommendedClock: 'monotonic' | 'standard'; }; /** * Configuration validation utilities */ interface ValidationResult { valid: boolean; errors: string[]; warnings: string[]; } /** * Default validation rules for DualRateLimitConfig */ declare const defaultValidationRules: ValidationRule<DualRateLimitConfig>[]; /** * Validates configuration against rules */ declare function validateConfig(config: DualRateLimitConfig, customRules?: ValidationRule<DualRateLimitConfig>[], logger?: Logger): ValidationResult; /** * Validates and normalizes configuration, throwing on errors */ declare function validateAndNormalizeConfig(config: DualRateLimitConfig, customRules?: ValidationRule<DualRateLimitConfig>[], logger?: Logger): DualRateLimitConfig; /** * Integration utility for @aid-on/fuzztok token estimation */ /** * Estimates token count for given text using fuzztok */ declare function estimateTokens(text: string): Promise<number>; /** * Checks if fuzztok is available */ declare function isFuzztokAvailable(): Promise<boolean>; /** * Estimates tokens with automatic fallback */ declare function robustEstimateTokens(text: string): Promise<number>; /** * Enhanced configuration for LLMThrottle with clear storage options */ interface LLMThrottleConfig<TMetadata = Record<string, unknown>> { /** Requests per minute limit */ rpm: number; /** Tokens per minute limit */ tpm: number; /** Optional burst capacity for RPM (defaults to rpm) */ burstRPM?: number; /** Optional burst capacity for TPM (defaults to tpm) */ burstTPM?: number; /** Optional custom clock function for testing */ clock?: () => number; /** Optional custom logger (defaults to console) */ logger?: Logger; /** Strategy when adjustConsumption fails to consume additional tokens */ adjustmentFailureStrategy?: AdjustmentFailureStrategy; /** Maximum number of consumption records to keep in memory */ maxHistoryRecords?: number; /** Maximum history retention time in milliseconds (defaults to 60000) */ historyRetentionMs?: number; /** Enable monotonic clock (auto-detected by default) */ monotonicClock?: boolean; /** Custom validation rules */ validationRules?: Array<{ name: string; validate: (value: unknown) => boolean | string; level: 'error' | 'warn'; }>; /** Number of records to use for efficiency calculation (defaults to 50) */ efficiencyWindowSize?: number; /** Storage implementation for persistence */ storage?: ThrottleStorage<TMetadata>; } /** * LLM Throttle - Rate limiter with dual constraints (RPM + TPM) and optional persistence */ declare class LLMThrottle<TMetadata = Record<string, unknown>> { private rpmBucket; private tpmBucket; private consumptionHistory; private clock; private logger; private lock; private compensationDebt; private historyRetentionMs; private maxHistoryRecords; private efficiencyWindowSize; private adjustmentFailureStrategy; private storage; private storageEnabled; private initialized; /** * Create a new LLMThrottle instance * @param config Configuration including optional storage implementation */ constructor(config: LLMThrottleConfig<TMetadata>); /** * Initialize from storage if available * Call this after creating the instance to restore persisted state */ initialize(): Promise<void>; canProcess(estimatedTokens: number): RateLimitCheckResult; consume(requestId: string, estimatedTokens: number, metadata?: TMetadata): boolean; consumeAsync(requestId: string, estimatedTokens: number, metadata?: TMetadata): Promise<boolean>; consumeOrThrow(requestId: string, estimatedTokens: number, metadata?: TMetadata): void; consumeOrThrowAsync(requestId: string, estimatedTokens: number, metadata?: TMetadata): Promise<void>; adjustConsumption(requestId: string, actualTokens: number): void; adjustConsumptionAsync(requestId: string, actualTokens: number): Promise<void>; private handleAdjustmentFailureSync; private handleAdjustmentFailure; getMetrics(): RateLimitMetrics; getConsumptionHistory(): ConsumptionRecord<TMetadata>[]; reset(): void; resetAsync(): Promise<void>; setHistoryRetention(ms: number): void; setMaxHistoryRecords(count: number): void; private cleanupHistory; private getHistoryStatistics; private getMemoryMetrics; private calculateEfficiency; /** * Validates internal state consistency */ validateState(): boolean; /** * Creates a state snapshot for backup/restore */ createSnapshot(): StateSnapshot; /** * Restores state from a snapshot */ restoreFromSnapshot(snapshot: StateSnapshot): Promise<void>; /** * Attempts to repair inconsistent state */ repairState(): Promise<boolean>; /** * Persist compensation debt to storage */ private persistCompensationDebt; } declare function createLLMThrottle<TMetadata = Record<string, unknown>>(config: LLMThrottleConfig<TMetadata>): LLMThrottle<TMetadata>; declare function createLLMThrottleWithStorage<TMetadata = Record<string, unknown>>(config: Omit<LLMThrottleConfig<TMetadata>, 'storage'>, storage: ThrottleStorage<TMetadata>): LLMThrottle<TMetadata>; export { type AdjustmentFailureStrategy, AsyncLock, type ConsumptionRecord, type DualRateLimitConfig, InMemoryStorage, InvalidConfigError, LLMThrottle, type LLMThrottleConfig, type Logger, type MemoryMetrics, type RateLimitCheckResult, RateLimitError, type RateLimitMetrics, type StateSnapshot, type StorageConfig, type ThrottleStorage, TokenBucket, type TokenBucketConfig, type TokenBucketState, type ValidationRule, createLLMThrottle, createLLMThrottleWithStorage, createMonotonicClock, createOptimalClock, createStandardClock, defaultValidationRules, estimateTokens, getClockInfo, isFuzztokAvailable, robustEstimateTokens, validateAndNormalizeConfig, validateConfig };