@limitrate/core
Version:
Core rate limiting and cost control engine for LimitRate
1,137 lines (1,110 loc) • 34.9 kB
text/typescript
import Redis from 'ioredis';
import { Redis as Redis$1 } from '@upstash/redis';
/**
* Core types for LimitRate rate limiting and cost control
*/
type PlanName = 'free' | 'pro' | 'enterprise' | string;
type EnforcementAction = 'allow' | 'block' | 'slowdown' | 'allow-and-log';
interface RateRule {
/** Maximum requests per second */
maxPerSecond?: number;
/** Maximum requests per minute */
maxPerMinute?: number;
/** Maximum requests per hour */
maxPerHour?: number;
/** Maximum requests per day */
maxPerDay?: number;
/** Burst allowance (extra tokens beyond steady rate) */
burst?: number;
/** Maximum tokens per minute (AI feature - v1.4.0) */
maxTokensPerMinute?: number;
/** Maximum tokens per hour (AI feature - v1.4.0) */
maxTokensPerHour?: number;
/** Maximum tokens per day (AI feature - v1.4.0) */
maxTokensPerDay?: number;
/** Action to take when limit exceeded */
actionOnExceed: EnforcementAction;
/** Delay in milliseconds if action is 'slowdown' */
slowdownMs?: number;
}
interface CostRule {
/** Function to estimate cost of a request */
estimateCost: (context: any) => number;
/** Maximum cost per hour */
hourlyCap?: number;
/** Maximum cost per day */
dailyCap?: number;
/** Action to take when cost cap exceeded */
actionOnExceed: EnforcementAction;
}
/**
* Concurrency configuration (v2.0.0 - D1)
* Controls how many requests can run simultaneously
*/
interface ConcurrencyConfig$1 {
/** Maximum concurrent requests */
max: number;
/** Max wait time in queue (ms), default 30000 */
queueTimeout?: number;
/** What to do when limit exceeded: 'queue' (wait) or 'block' (reject immediately) */
actionOnExceed?: 'queue' | 'block';
}
/**
* Penalty/Reward configuration (v2.0.0 - D4)
*/
interface PenaltyConfig {
/** Enable penalty/reward system */
enabled?: boolean;
/** Penalty on rate limit violation */
onViolation?: {
/** Duration in seconds */
duration: number;
/** Multiplier for limit (0.5 = reduce to 50%) */
multiplier: number;
};
/** Reward for low usage */
rewards?: {
/** Duration in seconds */
duration: number;
/** Multiplier for limit (1.5 = increase to 150%) */
multiplier: number;
/** Trigger condition */
trigger: 'below_50_percent' | 'below_25_percent' | 'below_10_percent';
};
}
interface EndpointPolicy {
/** Rate limiting rule */
rate?: RateRule;
/** Cost limiting rule */
cost?: CostRule;
/** Concurrency limiting rule (v2.0.0 - D1) */
concurrency?: ConcurrencyConfig$1;
/** Penalty/Reward configuration (v2.0.0 - D4) */
penalty?: PenaltyConfig;
}
type PolicyConfig = Record<PlanName, {
/** Endpoint-specific policies (key = "METHOD|/path") */
endpoints: Record<string, EndpointPolicy>;
/** Default policy for endpoints not explicitly configured */
defaults?: EndpointPolicy;
}>;
/**
* User override configuration (v1.6.0 - B4)
* Allows specific users to have custom rate limits regardless of their plan
*/
interface UserOverride {
/** Maximum requests per second (overrides plan limit) */
maxPerSecond?: number;
/** Maximum requests per minute (overrides plan limit) */
maxPerMinute?: number;
/** Maximum requests per hour (overrides plan limit) */
maxPerHour?: number;
/** Maximum requests per day (overrides plan limit) */
maxPerDay?: number;
/** Reason for override (audit trail) */
reason?: string;
/** Endpoint-specific overrides (optional) */
endpoints?: Record<string, Omit<UserOverride, 'reason' | 'endpoints'>>;
}
/**
* Static user overrides configuration
*/
type UserOverridesConfig = Record<string, UserOverride>;
interface StoreConfig {
/** Store type */
type: 'memory' | 'redis' | 'upstash';
/** Redis connection URL (for redis/upstash) */
url?: string;
/** Upstash REST token (for upstash) */
token?: string;
/** Redis client options */
options?: any;
}
interface RateCheckResult {
/** Whether request is allowed */
allowed: boolean;
/** Current usage count */
current: number;
/** Remaining requests in window */
remaining: number;
/** Seconds until window resets */
resetInSeconds: number;
/** Limit that was checked against */
limit: number;
/** Current burst tokens available (if burst enabled) */
burstTokens?: number;
}
interface CostCheckResult {
/** Whether request is allowed */
allowed: boolean;
/** Current cost usage */
current: number;
/** Remaining cost in window */
remaining: number;
/** Seconds until window resets */
resetInSeconds: number;
/** Cap that was checked against */
cap: number;
}
interface TokenCheckResult {
/** Whether request is allowed based on token limit */
allowed: boolean;
/** Current token usage */
current: number;
/** Remaining tokens in window */
remaining: number;
/** Seconds until window resets */
resetInSeconds: number;
/** Token limit that was checked against */
limit: number;
}
interface LimitRateEvent {
/** Event timestamp (Unix milliseconds) */
timestamp: number;
/** User identifier */
user: string;
/** User's plan */
plan: PlanName;
/** Endpoint (METHOD|/path) */
endpoint: string;
/** Event type */
type: 'rate_exceeded' | 'cost_exceeded' | 'token_limit_exceeded' | 'token_usage_tracked' | 'slowdown_applied' | 'allowed' | 'blocked';
/** Time window (e.g., "1m", "1h", "1d") */
window?: string;
/** Current value (count, cost, or tokens) */
value?: number;
/** Threshold that was checked */
threshold?: number;
/** Token count (for token events - v1.4.0) */
tokens?: number;
}
interface Store {
/**
* Check rate limit for a key (increments counter)
* @param key - Unique identifier (e.g., "user_123:POST|/api")
* @param limit - Maximum requests allowed
* @param windowSeconds - Time window in seconds
* @param burst - Optional burst allowance (extra tokens beyond limit)
* @returns Rate check result
*/
checkRate(key: string, limit: number, windowSeconds: number, burst?: number): Promise<RateCheckResult>;
/**
* Peek at rate limit status without incrementing (v1.7.0 - B5)
* Used by status endpoints to show quota without consuming it
* @param key - Unique identifier (e.g., "user_123:POST|/api")
* @param limit - Maximum requests allowed
* @param windowSeconds - Time window in seconds
* @returns Rate check result (without incrementing)
*/
peekRate(key: string, limit: number, windowSeconds: number): Promise<RateCheckResult>;
/**
* Increment cost for a key
* @param key - Unique identifier (e.g., "user_123:POST|/api:cost")
* @param cost - Cost to add
* @param windowSeconds - Time window in seconds
* @param cap - Maximum cost allowed in window
* @returns Cost check result
*/
incrementCost(key: string, cost: number, windowSeconds: number, cap: number): Promise<CostCheckResult>;
/**
* Increment token usage for a key (v1.4.0 - AI feature)
* @param key - Unique identifier (e.g., "user_123:POST|/api:tokens")
* @param tokens - Number of tokens to add
* @param windowSeconds - Time window in seconds
* @param limit - Maximum tokens allowed in window
* @returns Token check result
*/
incrementTokens(key: string, tokens: number, windowSeconds: number, limit: number): Promise<TokenCheckResult>;
/**
* Health check
* @returns Whether store is healthy
*/
ping(): Promise<boolean>;
/**
* Close connections
*/
close(): Promise<void>;
/**
* Generic get method for arbitrary data (v2.0.0 - D4)
* Used by penalty/reward system and other features
*/
get<T = any>(key: string): Promise<T | null>;
/**
* Generic set method for arbitrary data (v2.0.0 - D4)
* @param ttl - Time to live in seconds (optional)
*/
set<T = any>(key: string, value: T, ttl?: number): Promise<void>;
/**
* Generic delete method (v2.0.0 - D4)
*/
delete(key: string): Promise<void>;
}
/**
* In-memory store for rate limiting (single instance only)
* Uses LRU cache with auto-expiry
*/
declare class MemoryStore implements Store {
private cache;
private readonly maxKeys;
private cleanupInterval;
constructor(options?: {
maxKeys?: number;
cleanupIntervalMs?: number;
});
checkRate(key: string, limit: number, windowSeconds: number, burst?: number): Promise<RateCheckResult>;
peekRate(key: string, limit: number, windowSeconds: number): Promise<RateCheckResult>;
incrementCost(key: string, cost: number, windowSeconds: number, cap: number): Promise<CostCheckResult>;
incrementTokens(key: string, tokens: number, windowSeconds: number, limit: number): Promise<TokenCheckResult>;
ping(): Promise<boolean>;
close(): Promise<void>;
/**
* Remove expired entries
*/
private cleanup;
/**
* Evict oldest entry if cache is full (simple LRU)
*/
private evictIfNeeded;
/**
* Generic get method for arbitrary data (v2.0.0 - D4)
*/
get<T = any>(key: string): Promise<T | null>;
/**
* Generic set method for arbitrary data (v2.0.0 - D4)
*/
set<T = any>(key: string, value: T, ttl?: number): Promise<void>;
/**
* Generic delete method (v2.0.0 - D4)
*/
delete(key: string): Promise<void>;
/**
* Get current cache size (for testing/debugging)
*/
getCacheSize(): number;
}
/**
* Redis store for distributed rate limiting
* Uses atomic Lua scripts for correctness
*/
interface RedisStoreOptions {
/** Redis connection URL or ioredis instance */
client?: Redis | string;
/** Key prefix for all LimitRate keys */
keyPrefix?: string;
/** Redis client options (if URL provided) */
redisOptions?: any;
}
declare class RedisStore implements Store {
private client;
private ownClient;
private keyPrefix;
constructor(options?: RedisStoreOptions);
checkRate(key: string, limit: number, windowSeconds: number, burst?: number): Promise<RateCheckResult>;
peekRate(key: string, limit: number, windowSeconds: number): Promise<RateCheckResult>;
incrementCost(key: string, cost: number, windowSeconds: number, cap: number): Promise<CostCheckResult>;
incrementTokens(key: string, tokens: number, windowSeconds: number, limit: number): Promise<TokenCheckResult>;
ping(): Promise<boolean>;
close(): Promise<void>;
/**
* Generic get method for arbitrary data (v2.0.0 - D4)
*/
get<T = any>(key: string): Promise<T | null>;
/**
* Generic set method for arbitrary data (v2.0.0 - D4)
*/
set<T = any>(key: string, value: T, ttl?: number): Promise<void>;
/**
* Generic delete method (v2.0.0 - D4)
*/
delete(key: string): Promise<void>;
/**
* Get underlying Redis client (for advanced use cases)
*/
getClient(): Redis;
}
/**
* Upstash store for serverless-friendly rate limiting
* Uses HTTP REST API (no persistent connections)
*/
interface UpstashStoreOptions {
/** Upstash Redis REST URL */
url: string;
/** Upstash Redis REST token */
token: string;
/** Key prefix for all LimitRate keys */
keyPrefix?: string;
}
declare class UpstashStore implements Store {
private client;
private keyPrefix;
private url;
private token;
constructor(options: UpstashStoreOptions);
checkRate(key: string, limit: number, windowSeconds: number, burst?: number): Promise<RateCheckResult>;
peekRate(key: string, limit: number, windowSeconds: number): Promise<RateCheckResult>;
incrementCost(key: string, cost: number, windowSeconds: number, cap: number): Promise<CostCheckResult>;
incrementTokens(key: string, tokens: number, windowSeconds: number, limit: number): Promise<TokenCheckResult>;
ping(): Promise<boolean>;
close(): Promise<void>;
/**
* Generic get method for arbitrary data (v2.0.0 - D4)
*/
get<T = any>(key: string): Promise<T | null>;
/**
* Generic set method for arbitrary data (v2.0.0 - D4)
*/
set<T = any>(key: string, value: T, ttl?: number): Promise<void>;
/**
* Generic delete method (v2.0.0 - D4)
*/
delete(key: string): Promise<void>;
/**
* Get underlying Upstash client (for advanced use cases)
*/
getClient(): Redis$1;
}
/**
* Store factory and exports
*/
/**
* Create a store from config (internal, for middleware)
*/
declare function createStore(config: StoreConfig): Store;
/**
* Create a SHARED memory store that can be reused across multiple limitrate instances.
*
* **Benefits:**
* - Reduces memory usage when using multiple limiters
* - Single cleanup interval instead of N intervals
* - Consistent cache across all limiters
*
* **Example:**
* ```typescript
* import { createSharedMemoryStore } from '@limitrate/core';
* import { limitrate } from '@limitrate/express';
*
* // Create ONCE
* const store = createSharedMemoryStore();
*
* // Reuse everywhere
* app.use(limitrate({ store, policies: { free: {...} } }));
* app.use('/api', limitrate({ store, policies: { api: {...} } }));
* app.use('/admin', limitrate({ store, policies: { admin: {...} } }));
* ```
*
* @param options - Optional memory store configuration
* @returns Shared MemoryStore instance
*/
declare function createSharedMemoryStore(options?: {
maxKeys?: number;
cleanupIntervalMs?: number;
}): MemoryStore;
/**
* Create a SHARED Redis store that can be reused across multiple limitrate instances.
*
* **Benefits:**
* - **Reduces Redis connections from N → 1** (critical for production)
* - Reduces memory usage by 75%+ when using multiple limiters
* - Prevents connection pool exhaustion
* - Single connection management instead of N connections
*
* **Example:**
* ```typescript
* import { createSharedRedisStore } from '@limitrate/core';
* import { limitrate } from '@limitrate/express';
*
* // Create ONCE
* const store = createSharedRedisStore({ url: process.env.REDIS_URL });
*
* // Reuse everywhere (1 connection, not 4!)
* app.use(limitrate({ store, policies: { free: {...} } }));
* app.use('/api', limitrate({ store, policies: { api: {...} } }));
* app.use('/admin', limitrate({ store, policies: { admin: {...} } }));
* app.use('/webhooks', limitrate({ store, policies: { webhooks: {...} } }));
*
* // Result: 1 Redis connection, 75% less memory
* ```
*
* **Advanced - Pass existing Redis client:**
* ```typescript
* import Redis from 'ioredis';
* import { createSharedRedisStore } from '@limitrate/core';
*
* const redisClient = new Redis(process.env.REDIS_URL);
* const store = createSharedRedisStore({ client: redisClient });
*
* // LimitRate will NOT close this client when done
* // You manage the lifecycle
* ```
*
* @param options - Redis URL or existing client + options
* @returns Shared RedisStore instance
*/
declare function createSharedRedisStore(options: {
url: string;
keyPrefix?: string;
redisOptions?: any;
} | {
client: any;
keyPrefix?: string;
}): RedisStore;
/**
* Create a SHARED Upstash store that can be reused across multiple limitrate instances.
*
* **Benefits:**
* - Serverless-friendly (HTTP-based, no persistent connections)
* - Reduces HTTP client overhead when using multiple limiters
* - Single token management
* - Consistent configuration across all limiters
*
* **Example:**
* ```typescript
* import { createSharedUpstashStore } from '@limitrate/core';
* import { limitrate } from '@limitrate/express';
*
* // Create ONCE
* const store = createSharedUpstashStore({
* url: process.env.UPSTASH_REDIS_REST_URL,
* token: process.env.UPSTASH_REDIS_REST_TOKEN
* });
*
* // Reuse everywhere
* app.use(limitrate({ store, policies: { free: {...} } }));
* app.use('/api', limitrate({ store, policies: { api: {...} } }));
* app.use('/admin', limitrate({ store, policies: { admin: {...} } }));
* ```
*
* **Perfect for:**
* - Vercel Edge Functions
* - Cloudflare Workers
* - AWS Lambda
* - Any serverless environment
*
* @param options - Upstash REST API URL and token
* @returns Shared UpstashStore instance
*/
declare function createSharedUpstashStore(options: {
url: string;
token: string;
keyPrefix?: string;
}): UpstashStore;
/**
* Event emitter for LimitRate events
*/
type EventHandler = (event: LimitRateEvent) => void | Promise<void>;
declare class EventEmitter {
private handlers;
constructor();
/**
* Register an event handler
*/
on(handler: EventHandler): void;
/**
* Unregister an event handler
*/
off(handler: EventHandler): void;
/**
* Emit an event to all handlers
*/
emit(event: LimitRateEvent): Promise<void>;
/**
* Get number of registered handlers
*/
getHandlerCount(): number;
/**
* Clear all handlers
*/
clear(): void;
}
/**
* Policy evaluation engine
* Evaluates rate and cost rules, determines enforcement actions
*/
interface CheckContext {
/** User identifier */
user: string;
/** User's plan */
plan: PlanName;
/** Endpoint key (METHOD|/path) */
endpoint: string;
/** Optional cost estimation context */
costContext?: any;
/** Optional route-specific policy override */
policyOverride?: EndpointPolicy;
/** Optional user override (v1.6.0 - B4) */
userOverride?: UserOverride | null;
/** Optional token count for this request (v1.4.0 - AI feature) */
tokens?: number;
}
interface CheckResult {
/** Whether request should be allowed */
allowed: boolean;
/** Enforcement action to take */
action: EnforcementAction;
/** Reason for decision */
reason?: 'rate_exceeded' | 'cost_exceeded' | 'token_limit_exceeded';
/** Seconds to retry after (if blocked) */
retryAfterSeconds?: number;
/** Delay in milliseconds (if slowdown) */
slowdownMs?: number;
/** Details for response */
details: {
used: number;
limit: number;
remaining: number;
resetInSeconds: number;
burstTokens?: number;
};
}
declare class PolicyEngine {
private store;
private policies;
private events;
private penaltyManager;
constructor(store: Store, policies: PolicyConfig);
/**
* Register event handler
*/
onEvent(handler: (event: LimitRateEvent) => void | Promise<void>): void;
/**
* Check if request should be allowed
*/
check(context: CheckContext): Promise<CheckResult>;
/**
* Check rate limit
*/
private checkRate;
/**
* Check cost limit
*/
private checkCost;
/**
* Check token limits (v1.4.0 - AI feature)
*/
private checkTokens;
/**
* Resolve policy for plan and endpoint
*/
private resolvePolicy;
/**
* Emit event
*/
private emitEvent;
/**
* Get event emitter (for external handlers)
*/
getEventEmitter(): EventEmitter;
}
/**
* Configuration validation - fail fast at startup
*/
declare class ValidationError extends Error {
constructor(message: string);
}
declare function validatePolicyConfig(config: PolicyConfig): void;
declare function validateStoreConfig(config: StoreConfig): void;
declare function validateIPList(ips: string[], listName: string): void;
/**
* Route normalization utilities
* Converts dynamic routes to templates (e.g., /users/123 → /users/:id)
*/
/**
* Normalize a route path by replacing dynamic segments with :id
* @param path - The request path
* @param routePath - The route template (if available from framework)
* @returns Normalized path (e.g., "/users/:id")
*/
declare function normalizeRoutePath(path: string, routePath?: string): string;
/**
* Create endpoint key from method and path
* @param method - HTTP method
* @param path - Route path
* @param routePath - Route template (if available)
* @returns Endpoint key (e.g., "POST|/api/users/:id")
*/
declare function createEndpointKey(method: string, path: string, routePath?: string): string;
/**
* Extract IP address from request, respecting proxy headers
* @param ip - Direct IP from socket
* @param forwardedFor - X-Forwarded-For header value
* @param trustProxy - Whether to trust proxy headers
* @returns Client IP address
*/
declare function extractIP(ip: string, forwardedFor?: string, trustProxy?: boolean): string;
/**
* Check if IP is in allowlist/blocklist
* @param ip - IP to check
* @param list - List of IPs (supports CIDR notation)
* @returns Whether IP matches
*/
declare function isIPInList(ip: string, list: string[]): boolean;
/**
* Endpoint Auto-Discovery System
*
* Tracks all endpoints that receive requests, showing which are protected
* by rate limiting and which are not. Helps catch forgotten endpoints.
*/
interface EndpointStats {
/** HTTP method (GET, POST, etc.) */
method: string;
/** Endpoint path (e.g., /api/users/:id) */
path: string;
/** Whether this endpoint has rate limiting */
hasRateLimit: boolean;
/** Total request count */
requestCount: number;
/** Number of rate limited requests */
rateLimitedCount: number;
/** First seen timestamp */
firstSeen: Date;
/** Last seen timestamp */
lastSeen: Date;
/** Rate limit policy applied (if any) */
policy?: string;
/** Rate limit config (if any) */
limit?: number;
}
interface EndpointTrackerOptions {
/** Max endpoints to track (default: 1000) */
maxEndpoints?: number;
/** How long to keep endpoint stats (default: 24h) */
retentionMs?: number;
}
/**
* Tracks discovered endpoints at runtime
*/
declare class EndpointTracker {
private endpoints;
private options;
constructor(options?: EndpointTrackerOptions);
/**
* Track a request to an endpoint
*/
trackRequest(method: string, path: string, options: {
hasRateLimit: boolean;
wasRateLimited?: boolean;
policy?: string;
limit?: number;
}): void;
/**
* Get all tracked endpoints
*/
getEndpoints(): EndpointStats[];
/**
* Get unprotected endpoints (those without rate limits)
*/
getUnprotectedEndpoints(): EndpointStats[];
/**
* Get protected endpoints (those with rate limits)
*/
getProtectedEndpoints(): EndpointStats[];
/**
* Get stats summary
*/
getStats(): {
totalEndpoints: number;
protectedEndpoints: number;
unprotectedEndpoints: number;
totalRequests: number;
totalRateLimited: number;
};
/**
* Clear all tracked endpoints
*/
clear(): void;
/**
* Clean up old endpoints based on retention policy
*/
private cleanup;
/**
* Find the oldest endpoint (by lastSeen)
*/
private findOldestEndpoint;
}
/**
* Get or create the global endpoint tracker
*/
declare function getGlobalEndpointTracker(): EndpointTracker;
/**
* Set a custom global endpoint tracker
*/
declare function setGlobalEndpointTracker(tracker: EndpointTracker): void;
/**
* Tokenizer utilities for accurate token counting
* Supports OpenAI (tiktoken) and Anthropic tokenizers as optional dependencies
*/
interface Tokenizer {
/**
* Count tokens in text or array of texts
*/
count(text: string | string[]): Promise<number>;
/**
* Model name
*/
model: string;
/**
* Whether this is a fallback tokenizer (length/4 approximation)
*/
isFallback: boolean;
}
type TokenizerFunction = (text: string | string[]) => Promise<number> | number;
/**
* Create a tokenizer for the specified model
*
* Supports:
* - OpenAI models: gpt-3.5-turbo, gpt-4, gpt-4-turbo, gpt-4o, gpt-4o-mini (requires tiktoken)
* - Anthropic models: claude-3-opus, claude-3-sonnet, claude-3-haiku (requires @anthropic-ai/sdk)
* - Custom function
* - Fallback (length/4 approximation) if dependencies not installed
*
* @param modelOrFunction - Model name string or custom tokenizer function
* @param options - Options for tokenizer creation
* @returns Tokenizer instance
*
* @example
* ```typescript
* // Using OpenAI model (requires: npm install tiktoken)
* const tokenizer = await createTokenizer('gpt-4');
* const count = await tokenizer.count('Hello world');
*
* // Using Anthropic model (requires: npm install @anthropic-ai/sdk)
* const tokenizer = await createTokenizer('claude-3-opus');
*
* // Using custom function
* const tokenizer = await createTokenizer((text) => {
* // Your custom counting logic
* return text.length / 3.5;
* });
* ```
*/
declare function createTokenizer(modelOrFunction: string | TokenizerFunction, options?: {
warnOnFallback?: boolean;
}): Promise<Tokenizer>;
/**
* Clear the tokenizer cache
* Useful for testing or if you need to reinitialize tokenizers
*/
declare function clearTokenizerCache(): void;
/**
* Model limits database for AI models
* Contains context window and output limits for popular models
*/
interface ModelLimits {
/**
* Maximum input context window in tokens
*/
maxInputTokens: number;
/**
* Maximum output tokens
*/
maxOutputTokens: number;
/**
* Model provider (for grouping and suggestions)
*/
provider: 'openai' | 'anthropic' | 'google' | 'mistral' | 'other';
/**
* Model display name
*/
displayName: string;
}
/**
* Built-in model limits database
* Updated as of November 2025
*/
declare const MODEL_LIMITS: Record<string, ModelLimits>;
/**
* Get model limits for a specific model
* Returns undefined if model is not in the database
*/
declare function getModelLimits(model: string): ModelLimits | undefined;
/**
* Get suggested alternative models with larger context windows
* Used for helpful error messages
*/
declare function getSuggestedAlternatives(currentModel: string, requiredTokens: number): string[];
/**
* Pre-flight validation for AI prompts
* Validates token counts against model limits BEFORE consuming rate limits
*/
/**
* Validation result
*/
interface ValidationResult {
/**
* Whether the prompt is valid (within limits)
*/
valid: boolean;
/**
* Reason for failure (if invalid)
*/
reason?: string;
/**
* Detected input token count
*/
inputTokens: number;
/**
* Maximum allowed input tokens for this model
*/
maxInputTokens?: number;
/**
* Requested output tokens (if provided)
*/
outputTokens?: number;
/**
* Maximum allowed output tokens for this model
*/
maxOutputTokens?: number;
/**
* Total tokens (input + output)
*/
totalTokens: number;
/**
* Suggested alternative models (if validation failed)
*/
suggestedModels?: string[];
/**
* Model display name
*/
modelDisplayName?: string;
}
/**
* Validation options
*/
interface ValidationOptions {
/**
* Model name (e.g., 'gpt-4', 'claude-3-opus')
*/
model: string;
/**
* Tokenizer to use for counting tokens
*/
tokenizer: Tokenizer;
/**
* Prompt text or array of messages
*/
prompt: string | string[];
/**
* Requested output tokens (optional)
* If provided, total tokens will be validated
*/
maxOutputTokens?: number;
/**
* Custom model limits (overrides built-in database)
* Useful for new models or fine-tuned models
*/
customLimits?: Partial<ModelLimits>;
}
/**
* Validate a prompt against model limits
*
* @example
* ```typescript
* const tokenizer = await createTokenizer('gpt-4');
* const result = await validatePrompt({
* model: 'gpt-4',
* tokenizer,
* prompt: 'Hello world',
* maxOutputTokens: 1000
* });
*
* if (!result.valid) {
* throw new Error(result.reason);
* }
* ```
*/
declare function validatePrompt(options: ValidationOptions): Promise<ValidationResult>;
/**
* Format a validation error message for user display
*
* @example
* ```typescript
* const result = await validatePrompt(options);
* if (!result.valid) {
* console.error(formatValidationError(result));
* }
* ```
*/
declare function formatValidationError(result: ValidationResult): string;
/**
* Streaming response token tracker (v1.7.0 - C4)
* Manual tracking API for streaming responses
*/
/**
* Streaming tracker for manual token tracking
* Simple accumulator for tracking tokens during streaming
*/
declare class StreamingTracker {
private accumulatedTokens;
/**
* Track tokens from a streaming chunk
* Call this for each chunk received from the streaming API
*
* @param tokens Number of tokens in this chunk
* @returns Current accumulated token count
*
* @example
* ```typescript
* const tracker = new StreamingTracker();
*
* for await (const chunk of stream) {
* const tokens = countTokensInChunk(chunk);
* tracker.trackChunk(tokens);
* }
*
* const total = tracker.getTotalTokens();
* ```
*/
trackChunk(tokens: number): number;
/**
* Get total accumulated tokens so far
*/
getTotalTokens(): number;
/**
* Reset the accumulated token count
* Useful if you want to reuse the tracker
*/
reset(): void;
}
/**
* Parse OpenAI streaming chunk for token usage
* OpenAI streams usage info in the final chunk
*
* @example
* ```typescript
* for await (const chunk of stream) {
* const tokens = parseOpenAIChunk(chunk);
* if (tokens) {
* await tracker.trackChunk(tokens);
* }
* }
* ```
*/
declare function parseOpenAIChunk(chunk: string): number | null;
/**
* Parse Anthropic streaming chunk for token usage
* Anthropic sends usage info in specific message types
*
* @example
* ```typescript
* for await (const chunk of stream) {
* const tokens = parseAnthropicChunk(chunk);
* if (tokens) {
* await tracker.trackChunk(tokens);
* }
* }
* ```
*/
declare function parseAnthropicChunk(chunk: string): number | null;
/**
* Helper to count tokens in any text (fallback)
* Uses simple length/4 approximation
*
* @example
* ```typescript
* const tokens = estimateTokens(chunkText);
* await tracker.trackChunk(tokens);
* ```
*/
declare function estimateTokens(text: string): number;
/**
* Concurrency Limiter (v2.0.0 - D1)
* Limit how many requests can run simultaneously
*/
/**
* Concurrency configuration
*/
interface ConcurrencyConfig {
max: number;
queueTimeout?: number;
actionOnExceed?: 'queue' | 'block';
}
/**
* Concurrency limiter
* Controls how many operations can run simultaneously
*/
declare class ConcurrencyLimiter {
private running;
private queue;
private readonly max;
private readonly queueTimeout;
private readonly actionOnExceed;
constructor(config: ConcurrencyConfig);
/**
* Acquire a slot for execution
* Returns a promise that resolves when a slot is available
*
* @param priority - Lower number = higher priority (default: 5)
*/
acquire(priority?: number): Promise<() => void>;
/**
* Release a slot
* Process next item in queue if available
*/
private release;
/**
* Get current stats
*/
getStats(): {
running: number;
queued: number;
available: number;
};
/**
* Clear queue (useful for cleanup)
*/
clearQueue(): void;
}
/**
* Get or create a concurrency limiter for an endpoint
*
* IMPORTANT: The limiter is cached per endpoint AND config.
* Different configs for the same endpoint will create separate limiters.
* This allows tests to use the same endpoint with different concurrency configs.
*/
declare function getConcurrencyLimiter(endpoint: string, config: ConcurrencyConfig): ConcurrencyLimiter;
/**
* Clear all limiters (useful for testing)
*/
declare function clearAllLimiters(): void;
/**
* Penalty/Reward Manager (v2.0.0 - D4)
* Dynamically adjust rate limits based on user behavior
*/
interface PenaltyState {
multiplier: number;
expiresAt: number;
reason: 'violation' | 'reward';
}
/**
* Penalty/Reward Manager
*/
declare class PenaltyManager {
private store;
constructor(store: Store);
/**
* Get penalty state key for a user/endpoint
*/
private getPenaltyKey;
/**
* Get current penalty/reward multiplier for a user/endpoint
* Returns 1.0 if no penalty/reward is active
*/
getMultiplier(user: string, endpoint: string): Promise<number>;
/**
* Apply a penalty to a user/endpoint
*/
applyPenalty(user: string, endpoint: string, config: NonNullable<PenaltyConfig['onViolation']>): Promise<void>;
/**
* Apply a reward to a user/endpoint
*/
applyReward(user: string, endpoint: string, config: NonNullable<PenaltyConfig['rewards']>): Promise<void>;
/**
* Check if a reward should be granted based on usage
*/
shouldGrantReward(currentUsage: number, limit: number, rewardConfig: NonNullable<PenaltyConfig['rewards']>): boolean;
/**
* Clear penalty/reward for a user/endpoint
*/
clear(user: string, endpoint: string): Promise<void>;
}
export { type CheckContext, type CheckResult, type ConcurrencyConfig$1 as ConcurrencyConfig, ConcurrencyLimiter, type CostCheckResult, type CostRule, type EndpointPolicy, type EndpointStats, EndpointTracker, type EndpointTrackerOptions, type EnforcementAction, EventEmitter, type EventHandler, type LimitRateEvent, MODEL_LIMITS, MemoryStore, type ModelLimits, type PenaltyConfig, PenaltyManager, type PenaltyState, type PlanName, type PolicyConfig, PolicyEngine, type RateCheckResult, type RateRule, RedisStore, type RedisStoreOptions, type Store, type StoreConfig, StreamingTracker, type Tokenizer, type TokenizerFunction, UpstashStore, type UpstashStoreOptions, type UserOverride, type UserOverridesConfig, ValidationError, type ValidationOptions, type ValidationResult, clearAllLimiters, clearTokenizerCache, createEndpointKey, createSharedMemoryStore, createSharedRedisStore, createSharedUpstashStore, createStore, createTokenizer, estimateTokens, extractIP, formatValidationError, getConcurrencyLimiter, getGlobalEndpointTracker, getModelLimits, getSuggestedAlternatives, isIPInList, normalizeRoutePath, parseAnthropicChunk, parseOpenAIChunk, setGlobalEndpointTracker, validateIPList, validatePolicyConfig, validatePrompt, validateStoreConfig };