UNPKG

jay-code

Version:

Streamlined AI CLI orchestration engine with mathematical rigor and enterprise-grade reliability

531 lines (456 loc) 12.3 kB
/** * Multi-LLM Provider Types and Interfaces * Unified type system for all LLM providers */ import { EventEmitter } from 'events'; // ===== PROVIDER TYPES ===== export type LLMProvider = | 'openai' | 'anthropic' | 'google' | 'cohere' | 'ollama' | 'llama-cpp' | 'custom'; export type LLMModel = // OpenAI Models | 'gpt-4-turbo-preview' | 'gpt-4' | 'gpt-4-32k' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-16k' // Anthropic Models | 'claude-3-opus-20240229' | 'claude-3-sonnet-20240229' | 'claude-3-haiku-20240307' | 'claude-2.1' | 'claude-2.0' | 'claude-instant-1.2' // Google Models | 'gemini-pro' | 'gemini-pro-vision' | 'palm-2' | 'bison' // Cohere Models | 'command' | 'command-light' | 'command-nightly' | 'generate-xlarge' | 'generate-medium' // Local Models | 'llama-2-7b' | 'llama-2-13b' | 'llama-2-70b' | 'mistral-7b' | 'mixtral-8x7b' | 'custom-model'; // ===== BASE INTERFACES ===== export interface LLMProviderConfig { provider: LLMProvider; apiKey?: string; apiUrl?: string; model: LLMModel; // Common parameters temperature?: number; maxTokens?: number; topP?: number; topK?: number; frequencyPenalty?: number; presencePenalty?: number; stopSequences?: string[]; // Provider-specific settings providerOptions?: Record<string, any>; // Performance settings timeout?: number; retryAttempts?: number; retryDelay?: number; // Advanced features enableStreaming?: boolean; enableCaching?: boolean; cacheTimeout?: number; // Cost optimization enableCostOptimization?: boolean; maxCostPerRequest?: number; fallbackModels?: LLMModel[]; } export interface LLMMessage { role: 'system' | 'user' | 'assistant' | 'function'; content: string; name?: string; // For function messages functionCall?: { name: string; arguments: string; }; } export interface LLMRequest { messages: LLMMessage[]; model?: LLMModel; temperature?: number; maxTokens?: number; topP?: number; topK?: number; frequencyPenalty?: number; presencePenalty?: number; stopSequences?: string[]; stream?: boolean; // Function calling functions?: LLMFunction[]; functionCall?: 'auto' | 'none' | { name: string }; // Provider-specific options providerOptions?: Record<string, any>; // Cost optimization costConstraints?: { maxCost?: number; preferredModels?: LLMModel[]; }; } export interface LLMFunction { name: string; description: string; parameters: { type: 'object'; properties: Record<string, any>; required?: string[]; }; } export interface LLMResponse { id: string; model: LLMModel; provider: LLMProvider; // Content content: string; functionCall?: { name: string; arguments: string; }; // Metadata usage: { promptTokens: number; completionTokens: number; totalTokens: number; }; // Cost tracking cost?: { promptCost: number; completionCost: number; totalCost: number; currency: string; }; // Performance metrics latency?: number; // Additional info finishReason?: 'stop' | 'length' | 'function_call' | 'content_filter'; metadata?: Record<string, any>; } export interface LLMStreamEvent { type: 'content' | 'function_call' | 'error' | 'done'; delta?: { content?: string; functionCall?: { name?: string; arguments?: string; }; }; error?: Error; usage?: LLMResponse['usage']; cost?: LLMResponse['cost']; } // ===== PROVIDER CAPABILITIES ===== export interface ProviderCapabilities { // Model features supportedModels: LLMModel[]; maxContextLength: Record<LLMModel, number>; maxOutputTokens: Record<LLMModel, number>; // Feature support supportsStreaming: boolean; supportsFunctionCalling: boolean; supportsSystemMessages: boolean; supportsVision: boolean; supportsAudio: boolean; supportsTools: boolean; // Advanced features supportsFineTuning: boolean; supportsEmbeddings: boolean; supportsLogprobs: boolean; supportsBatching: boolean; // Constraints rateLimit?: { requestsPerMinute: number; tokensPerMinute: number; concurrentRequests: number; }; // Cost information pricing?: { [model: string]: { promptCostPer1k: number; completionCostPer1k: number; currency: string; }; }; } // ===== ERROR HANDLING ===== export class LLMProviderError extends Error { constructor( message: string, public code: string, public provider: LLMProvider, public statusCode?: number, public retryable: boolean = true, public details?: any ) { super(message); this.name = 'LLMProviderError'; } } export class RateLimitError extends LLMProviderError { constructor( message: string, provider: LLMProvider, public retryAfter?: number, details?: any ) { super(message, 'RATE_LIMIT', provider, 429, true, details); this.name = 'RateLimitError'; } } export class AuthenticationError extends LLMProviderError { constructor(message: string, provider: LLMProvider, details?: any) { super(message, 'AUTHENTICATION', provider, 401, false, details); this.name = 'AuthenticationError'; } } export class ModelNotFoundError extends LLMProviderError { constructor(model: string, provider: LLMProvider, details?: any) { super(`Model ${model} not found`, 'MODEL_NOT_FOUND', provider, 404, false, details); this.name = 'ModelNotFoundError'; } } export class ProviderUnavailableError extends LLMProviderError { constructor(provider: LLMProvider, details?: any) { super(`Provider ${provider} is unavailable`, 'PROVIDER_UNAVAILABLE', provider, 503, true, details); this.name = 'ProviderUnavailableError'; } } // ===== ABSTRACT PROVIDER INTERFACE ===== export interface ILLMProvider extends EventEmitter { // Properties readonly name: LLMProvider; readonly capabilities: ProviderCapabilities; config: LLMProviderConfig; // Core methods initialize(): Promise<void>; complete(request: LLMRequest): Promise<LLMResponse>; streamComplete(request: LLMRequest): AsyncIterable<LLMStreamEvent>; // Model management listModels(): Promise<LLMModel[]>; getModelInfo(model: LLMModel): Promise<ModelInfo>; validateModel(model: LLMModel): boolean; // Health and status healthCheck(): Promise<HealthCheckResult>; getStatus(): ProviderStatus; // Cost management estimateCost(request: LLMRequest): Promise<CostEstimate>; getUsage(period?: UsagePeriod): Promise<UsageStats>; // Cleanup destroy(): void; } export interface ModelInfo { model: LLMModel; name: string; description: string; contextLength: number; maxOutputTokens: number; supportedFeatures: string[]; pricing?: { promptCostPer1k: number; completionCostPer1k: number; currency: string; }; deprecated?: boolean; deprecationDate?: Date; recommendedReplacement?: LLMModel; } export interface HealthCheckResult { healthy: boolean; latency?: number; error?: string; timestamp: Date; details?: Record<string, any>; } export interface ProviderStatus { available: boolean; currentLoad: number; queueLength: number; activeRequests: number; rateLimitRemaining?: number; rateLimitReset?: Date; } export interface CostEstimate { estimatedPromptTokens: number; estimatedCompletionTokens: number; estimatedTotalTokens: number; estimatedCost: { prompt: number; completion: number; total: number; currency: string; }; confidence: number; // 0-1 } export interface UsageStats { period: { start: Date; end: Date; }; requests: number; tokens: { prompt: number; completion: number; total: number; }; cost: { prompt: number; completion: number; total: number; currency: string; }; errors: number; averageLatency: number; modelBreakdown: Record<LLMModel, { requests: number; tokens: number; cost: number; }>; } export type UsagePeriod = 'hour' | 'day' | 'week' | 'month' | 'all'; // ===== FALLBACK AND RETRY STRATEGIES ===== export interface FallbackStrategy { name: string; enabled: boolean; rules: FallbackRule[]; maxAttempts: number; } export interface FallbackRule { condition: 'error' | 'rate_limit' | 'timeout' | 'cost' | 'unavailable'; errorCodes?: string[]; fallbackProviders: LLMProvider[]; fallbackModels?: LLMModel[]; retryOriginal: boolean; retryDelay?: number; } export interface RetryStrategy { maxAttempts: number; initialDelay: number; maxDelay: number; backoffMultiplier: number; jitter: boolean; retryableErrors: string[]; } // ===== CACHING INTERFACES ===== export interface CacheConfig { enabled: boolean; ttl: number; // Time to live in seconds maxSize: number; // Max cache size in MB strategy: 'lru' | 'lfu' | 'ttl'; keyGenerator?: (request: LLMRequest) => string; } export interface CacheEntry { key: string; request: LLMRequest; response: LLMResponse; timestamp: Date; hits: number; size: number; } // ===== RATE LIMITING ===== export interface RateLimiter { checkLimit(provider: LLMProvider, model?: LLMModel): Promise<boolean>; consumeToken(provider: LLMProvider, tokens: number): Promise<void>; getRemainingTokens(provider: LLMProvider): Promise<number>; getResetTime(provider: LLMProvider): Promise<Date | null>; waitForCapacity(provider: LLMProvider, tokens: number): Promise<void>; } // ===== LOAD BALANCING ===== export interface LoadBalancer { selectProvider(request: LLMRequest, availableProviders: ILLMProvider[]): Promise<ILLMProvider>; updateProviderMetrics(provider: LLMProvider, metrics: ProviderMetrics): void; rebalance(): Promise<void>; } export interface ProviderMetrics { provider: LLMProvider; timestamp: Date; latency: number; errorRate: number; successRate: number; load: number; cost: number; availability: number; } // ===== MONITORING AND ANALYTICS ===== export interface ProviderMonitor { trackRequest(provider: LLMProvider, request: LLMRequest, response: LLMResponse | Error): void; getMetrics(provider?: LLMProvider, period?: UsagePeriod): Promise<ProviderMetrics[]>; getAlerts(): Alert[]; setAlertThreshold(metric: string, threshold: number): void; } export interface Alert { id: string; timestamp: Date; provider: LLMProvider; type: 'error_rate' | 'latency' | 'cost' | 'rate_limit' | 'availability'; severity: 'info' | 'warning' | 'error' | 'critical'; message: string; value: number; threshold: number; } // ===== COST OPTIMIZATION ===== export interface CostOptimizer { selectOptimalModel(request: LLMRequest, constraints: CostConstraints): Promise<OptimizationResult>; analyzeCostTrends(period: UsagePeriod): Promise<CostAnalysis>; suggestOptimizations(): Promise<OptimizationSuggestion[]>; } export interface CostConstraints { maxCostPerRequest?: number; maxCostPerToken?: number; preferredProviders?: LLMProvider[]; requiredFeatures?: string[]; minQuality?: number; // 0-1 } export interface OptimizationResult { provider: LLMProvider; model: LLMModel; estimatedCost: number; estimatedQuality: number; // 0-1 reasoning: string; } export interface CostAnalysis { period: UsagePeriod; totalCost: number; costByProvider: Record<LLMProvider, number>; costByModel: Record<LLMModel, number>; trends: { dailyAverage: number; weeklyGrowth: number; projection30Days: number; }; } export interface OptimizationSuggestion { type: 'model_switch' | 'provider_switch' | 'parameter_tuning' | 'caching' | 'batching'; description: string; estimatedSavings: number; implementation: string; impact: 'low' | 'medium' | 'high'; } // ===== TYPE GUARDS ===== export function isLLMResponse(obj: any): obj is LLMResponse { return obj && typeof obj.id === 'string' && typeof obj.content === 'string'; } export function isLLMStreamEvent(obj: any): obj is LLMStreamEvent { return obj && typeof obj.type === 'string'; } export function isLLMProviderError(error: any): error is LLMProviderError { return error instanceof LLMProviderError; } export function isRateLimitError(error: any): error is RateLimitError { return error instanceof RateLimitError; }