@catalystlabs/tryai

Version:

Dead simple AI library. One line setup. Zero config. Just works.

git.catalystlab.cc/caseycollier/tryai

1 lines • 330 kB

Source Map (JSON)

{"version":3,"sources":["../core/conversation-manager.ts","../core/model-selector.ts","../core/metadata-config.ts","../core/cost-estimator.ts","../core/rate-limiter.ts","../core/settings-loader.ts","../utils/template-helpers.ts","../templates/prompt-template-loader.ts","../templates/response-template-loader.ts","../templates/template-system.ts","../transformers/response-transformer.ts","../utils/function-helpers.ts","../tools/tool-system.ts","../pipeline/pipeline-system.ts","../utils/database-recorder.ts","../unified-client.ts","../utils/image-helpers.ts","../apis/image-api.ts","../unified-client-enhanced.ts","../system/config-loader.ts","../apis/embeddings.ts","../core/ai.ts","../types/provider.ts","../utils/audio-helpers.ts","../utils/token-estimator.ts"],"sourcesContent":["/**\n * Conversation Manager - Manages conversation history for each client\n */\n\nimport { v4 as uuidv4 } from 'uuid';\nimport type { \n Message, \n MessageMetadata, \n ConversationConfig, \n ConversationMetadata,\n FileReference \n} from '../types';\nimport { ALL_MODELS } from './model-selector';\nimport type { DatabaseRecorder } from '../utils/database-recorder';\n\nexport class ConversationManager {\n private messages: Message[] = [];\n private config: ConversationConfig;\n private conversationId: string;\n private currentModel?: string;\n private currentProvider?: string;\n private databaseRecorder?: DatabaseRecorder | null;\n private files: { uploaded: FileReference[]; generated: FileReference[] } = {\n uploaded: [],\n generated: []\n };\n \n constructor(config?: ConversationConfig, databaseRecorder?: DatabaseRecorder | null) {\n this.config = config || { maxHistory: 50 };\n this.conversationId = uuidv4();\n this.databaseRecorder = databaseRecorder;\n }\n \n /**\n * Record message to database (fire-and-forget, never blocks)\n */\n private recordMessageAsync(message: Message): void {\n if (!this.databaseRecorder) return;\n\n this.databaseRecorder.recordAsync(async () => {\n await this.databaseRecorder!.recordMessage({\n conversation_id: this.conversationId,\n role: message.role,\n content: message.content,\n message_index: this.messages.length - 1,\n in_context: true,\n tokens_estimate: message.metadata?.tokens?.total,\n });\n });\n }\n \n /**\n * Add a message to the conversation with proper metadata\n */\n addMessage(message: Omit<Message, 'id' | 'timestamp' | 'inContext'>, metadata?: MessageMetadata): Message {\n const fullMessage: Message = {\n id: uuidv4(),\n timestamp: new Date(),\n ...message\n };\n \n // Attach metadata if provided (e.g., for assistant responses)\n if (metadata) {\n fullMessage.metadata = metadata;\n // Update model info if this is an assistant message\n if (message.role === 'assistant' && metadata.provider && metadata.model) {\n this.setModel(metadata.provider, metadata.model);\n }\n }\n \n this.messages.push(fullMessage);\n \n // Record message to database (fire-and-forget)\n this.recordMessageAsync(fullMessage);\n \n // Trim history if needed\n if (this.config.maxHistory && this.messages.length > this.config.maxHistory) {\n // Keep system messages and recent messages\n const systemMessages = this.messages.filter(m => m.role === 'system');\n const recentMessages = this.messages\n .filter(m => m.role !== 'system')\n .slice(-(this.config.maxHistory - systemMessages.length));\n \n this.messages = [...systemMessages, ...recentMessages];\n }\n \n // Update context window tracking\n this.updateContextWindow();\n \n return fullMessage;\n }\n \n /**\n * Get conversation history\n */\n getHistory(limit?: number): Message[] {\n if (limit) {\n return this.messages.slice(-limit);\n }\n return [...this.messages];\n }\n \n /**\n * Get messages formatted for API calls\n */\n getFormattedHistory(limit?: number): Array<{ role: string; content: string }> {\n const messages = limit ? this.messages.slice(-limit) : this.messages;\n return messages.map(m => ({\n role: m.role,\n content: m.content\n }));\n }\n \n /**\n * Clear conversation history\n */\n clear(): number {\n const count = this.messages.length;\n this.messages = [];\n this.conversationId = uuidv4();\n return count;\n }\n \n /**\n * Set current model for context window tracking\n */\n setModel(provider: string, model: string) {\n this.currentProvider = provider;\n this.currentModel = model;\n this.updateContextWindow();\n }\n \n /**\n * Update which messages are in the context window\n */\n private updateContextWindow() {\n if (!this.currentModel || !this.currentProvider) return;\n \n // Get model's context window size\n const modelKey = `${this.currentModel}`;\n const modelInfo = ALL_MODELS[modelKey] || ALL_MODELS[this.currentModel];\n if (!modelInfo) return;\n \n const contextWindowSize = modelInfo.maxTokens;\n let tokensUsed = 0;\n let messagesInContext = [];\n \n // Iterate backwards through messages to find what fits in context\n for (let i = this.messages.length - 1; i >= 0; i--) {\n const msg = this.messages[i];\n const msgTokens = msg.metadata?.tokens?.total || Math.ceil(msg.content.length / 4);\n \n if (tokensUsed + msgTokens <= contextWindowSize) {\n tokensUsed += msgTokens;\n messagesInContext.unshift(i);\n this.messages[i].inContext = true;\n } else {\n this.messages[i].inContext = false;\n }\n }\n }\n \n /**\n * Add a file reference\n */\n addFile(file: FileReference, type: 'uploaded' | 'generated') {\n this.files[type].push(file);\n }\n \n /**\n * Get conversation metadata with context window info\n */\n getConversationMetadata(): ConversationMetadata {\n const stats = this.getStats();\n \n // Calculate total tokens and costs from messages with metadata\n let totalTokens = { input: 0, output: 0, total: 0 };\n let totalCost = { input: 0, output: 0, total: 0, currency: 'USD' as const };\n let totalLatency = 0;\n let messagesWithMetadata = 0;\n \n // Track context window usage\n let contextTokensUsed = 0;\n let messagesInContextIndices: number[] = [];\n \n this.messages.forEach((msg, idx) => {\n if (msg.metadata?.tokens) {\n totalTokens.input += msg.metadata.tokens.input;\n totalTokens.output += msg.metadata.tokens.output;\n totalTokens.total += msg.metadata.tokens.total;\n \n if (msg.inContext) {\n contextTokensUsed += msg.metadata.tokens.total;\n messagesInContextIndices.push(idx);\n }\n }\n if (msg.metadata?.cost) {\n totalCost.input += msg.metadata.cost.input;\n totalCost.output += msg.metadata.cost.output;\n totalCost.total += msg.metadata.cost.total;\n }\n if (msg.metadata?.latency) {\n totalLatency += msg.metadata.latency;\n messagesWithMetadata++;\n }\n });\n \n // Get model context window size\n const modelKey = this.currentModel || '';\n const modelInfo = ALL_MODELS[modelKey] || ALL_MODELS[this.currentModel || ''];\n const contextWindowSize = modelInfo?.maxTokens || 0;\n \n return {\n id: this.conversationId,\n messageCount: this.messages.length,\n startedAt: this.messages[0]?.timestamp,\n lastMessageAt: this.messages[this.messages.length - 1]?.timestamp,\n contextWindow: {\n size: contextWindowSize,\n used: contextTokensUsed,\n messagesInContext: {\n start: messagesInContextIndices[0] || 0,\n end: messagesInContextIndices[messagesInContextIndices.length - 1] || 0,\n count: messagesInContextIndices.length\n }\n },\n totalTokens,\n totalCost,\n files: this.files,\n stats,\n averageLatency: messagesWithMetadata > 0 ? totalLatency / messagesWithMetadata : 0\n };\n }\n \n /**\n * Export conversation as JSON\n */\n export(): string {\n return JSON.stringify({\n conversationId: this.conversationId,\n exportedAt: new Date().toISOString(),\n messages: this.messages,\n config: this.config,\n metadata: this.getConversationMetadata()\n }, null, 2);\n }\n \n /**\n * Import conversation from JSON\n */\n import(json: string): void {\n try {\n const data = JSON.parse(json);\n this.conversationId = data.conversationId || uuidv4();\n this.messages = data.messages || [];\n this.config = data.config || this.config;\n \n // Convert timestamps back to Date objects\n this.messages = this.messages.map(m => ({\n ...m,\n timestamp: new Date(m.timestamp),\n metadata: m.metadata ? {\n ...m.metadata,\n timestamp: new Date(m.metadata.timestamp)\n } : undefined\n }));\n } catch (error) {\n throw new Error(`Failed to import conversation: ${error}`);\n }\n }\n \n /**\n * Get message by ID\n */\n getMessage(id: string): Message | undefined {\n return this.messages.find(m => m.id === id);\n }\n \n /**\n * Update a message\n */\n updateMessage(id: string, updates: Partial<Message>): boolean {\n const index = this.messages.findIndex(m => m.id === id);\n if (index === -1) return false;\n \n this.messages[index] = {\n ...this.messages[index],\n ...updates,\n id: this.messages[index].id, // Prevent ID change\n timestamp: this.messages[index].timestamp // Prevent timestamp change\n };\n \n return true;\n }\n \n /**\n * Remove a message\n */\n removeMessage(id: string): boolean {\n const index = this.messages.findIndex(m => m.id === id);\n if (index === -1) return false;\n \n this.messages.splice(index, 1);\n return true;\n }\n \n /**\n * Get conversation statistics\n */\n getStats() {\n const userMessages = this.messages.filter(m => m.role === 'user').length;\n const assistantMessages = this.messages.filter(m => m.role === 'assistant').length;\n const systemMessages = this.messages.filter(m => m.role === 'system').length;\n const toolMessages = this.messages.filter(m => m.role === 'tool').length;\n \n return {\n total: this.messages.length,\n userMessages,\n assistantMessages,\n systemMessages,\n toolMessages,\n averageMessageLength: this.messages.reduce((acc, m) => acc + m.content.length, 0) / this.messages.length || 0\n };\n }\n \n /**\n * Get the last assistant message with metadata\n */\n getLastAssistantMessage(): Message | undefined {\n return [...this.messages].reverse().find(m => m.role === 'assistant');\n }\n \n /**\n * Calculate total conversation cost\n */\n getTotalCost(): number {\n return this.messages.reduce((total, msg) => {\n return total + (msg.metadata?.cost?.total || 0);\n }, 0);\n }\n}","/**\n * Model selector for choosing the right model based on use case\n */\n\nimport { ModelCapabilities, TaskRequirements } from \"../types/model\";\n\n// OpenAI models\nexport const OPENAI_MODELS: Record<string, ModelCapabilities> = {\n\t// GPT-4.1 Series\n\t\"gpt-4.1-2025-04-14\": {\n\t\ttext: true,\n\t\timage: false,\n\t\tvideo: false,\n\t\taudio: false,\n\t\tfunctionCalling: true,\n\t\tjsonMode: true,\n\t\tmaxTokens: 1000000, // 1M context\n\t\tmaxOutput: 32000,\n\t\tcostPer1MTokens: { input: 2.0, output: 8.0 },\n\t\trateLimit: { rpm: 500, tpm: 300000 },\n\t},\n\t\"gpt-4.1-mini-2025-04-14\": {\n\t\ttext: true,\n\t\timage: false,\n\t\tvideo: false,\n\t\taudio: false,\n\t\tfunctionCalling: true,\n\t\tjsonMode: true,\n\t\tmaxTokens: 1000000, // 1M context\n\t\tmaxOutput: 32000,\n\t\tcostPer1MTokens: { input: 0.4, output: 1.6 },\n\t\trateLimit: { rpm: 500, tpm: 300000 },\n\t},\n\t\"gpt-4.1-nano-2025-04-14\": {\n\t\ttext: true,\n\t\timage: false,\n\t\tvideo: false,\n\t\taudio: false,\n\t\tfunctionCalling: true,\n\t\tjsonMode: true,\n\t\tmaxTokens: 1000000, // 1M context\n\t\tmaxOutput: 32000,\n\t\tcostPer1MTokens: { input: 0.1, output: 0.4 },\n\t\trateLimit: { rpm: 500, tpm: 300000 },\n\t},\n\n\t// GPT-4o Series\n\t\"gpt-4o-2024-08-06\": {\n\t\ttext: true,\n\t\timage: true,\n\t\tvideo: true,\n\t\taudio: true,\n\t\tfunctionCalling: true,\n\t\tjsonMode: true,\n\t\tmaxTokens: 128000, // 128K context\n\t\tmaxOutput: 16384,\n\t\tcostPer1MTokens: { input: 2.5, output: 10.0 },\n\t\trateLimit: { rpm: 500, tpm: 300000 },\n\t},\n\t\"gpt-4o-mini-2024-07-18\": {\n\t\ttext: true,\n\t\timage: true,\n\t\tvideo: true,\n\t\taudio: true,\n\t\tfunctionCalling: true,\n\t\tjsonMode: true,\n\t\tmaxTokens: 128000, // 128K context\n\t\tmaxOutput: 16384,\n\t\tcostPer1MTokens: { input: 0.15, output: 0.6 },\n\t\trateLimit: { rpm: 500, tpm: 300000 },\n\t},\n\n\t// O-Series (Reasoning)\n\t\"o3-2025-04-16\": {\n\t\ttext: true,\n\t\timage: false,\n\t\tvideo: false,\n\t\taudio: false,\n\t\tfunctionCalling: true,\n\t\tjsonMode: true,\n\t\tmaxTokens: 200000, // 200K context\n\t\tmaxOutput: 32768,\n\t\tcostPer1MTokens: { input: 2.0, output: 8.0 },\n\t\trateLimit: { rpm: 100, tpm: 100000 },\n\t},\n\t\"o4-mini-2025-04-16\": {\n\t\ttext: true,\n\t\timage: false,\n\t\tvideo: false,\n\t\taudio: false,\n\t\tfunctionCalling: true,\n\t\tjsonMode: true,\n\t\tmaxTokens: 200000, // 200K context\n\t\tmaxOutput: 32768,\n\t\tcostPer1MTokens: { input: 1.1, output: 4.4 },\n\t\trateLimit: { rpm: 100, tpm: 100000 },\n\t},\n\n\t// Legacy\n\t\"gpt-3.5-turbo-0125\": {\n\t\ttext: true,\n\t\timage: false,\n\t\tvideo: false,\n\t\taudio: false,\n\t\tfunctionCalling: true,\n\t\tjsonMode: true,\n\t\tmaxTokens: 16385,\n\t\tmaxOutput: 4096,\n\t\tcostPer1MTokens: { input: 0.5, output: 1.5 },\n\t\trateLimit: { rpm: 3500, tpm: 350000 },\n\t},\n};\n\n// Anthropic models\nexport const ANTHROPIC_MODELS: Record<string, ModelCapabilities> = {\n\t\"claude-opus-4-20250514\": {\n\t\ttext: true,\n\t\timage: true,\n\t\tvideo: false,\n\t\taudio: false,\n\t\tfunctionCalling: true,\n\t\tjsonMode: true,\n\t\tmaxTokens: 200000, // 200K context\n\t\tmaxOutput: 32000,\n\t\tcostPer1MTokens: { input: 15.0, output: 75.0 },\n\t},\n\t\"claude-sonnet-4-20250514\": {\n\t\ttext: true,\n\t\timage: true,\n\t\tvideo: false,\n\t\taudio: false,\n\t\tfunctionCalling: true,\n\t\tjsonMode: true,\n\t\tmaxTokens: 200000, // 200K context\n\t\tmaxOutput: 64000,\n\t\tcostPer1MTokens: { input: 3.0, output: 15.0 },\n\t},\n\t\"claude-3-7-sonnet-20250219\": {\n\t\ttext: true,\n\t\timage: true,\n\t\tvideo: false,\n\t\taudio: false,\n\t\tfunctionCalling: true,\n\t\tjsonMode: true,\n\t\tmaxTokens: 200000, // 200K context\n\t\tmaxOutput: 128000, // with beta header\n\t\tcostPer1MTokens: { input: 3.0, output: 15.0 },\n\t},\n\t\"claude-3-5-haiku-20241022\": {\n\t\ttext: true,\n\t\timage: true,\n\t\tvideo: false,\n\t\taudio: false,\n\t\tfunctionCalling: true,\n\t\tjsonMode: true,\n\t\tmaxTokens: 200000, // 200K context\n\t\tmaxOutput: 8192,\n\t\tcostPer1MTokens: { input: 0.8, output: 4.0 },\n\t},\n};\n\n// Google Gemini models\nexport const GEMINI_MODELS: Record<string, ModelCapabilities> = {\n\t\"gemini-2.5-pro\": {\n\t\ttext: true,\n\t\timage: true,\n\t\tvideo: true,\n\t\taudio: true,\n\t\tfunctionCalling: true,\n\t\tjsonMode: true,\n\t\tmaxTokens: 1000000, // 1M context\n\t\tmaxOutput: 32000,\n\t\tcostPer1MTokens: {\n\t\t\tinput: 1.25, // <= 200K tokens\n\t\t\toutput: 10.0, // <= 200K tokens\n\t\t},\n\t},\n\t\"gemini-2.5-flash\": {\n\t\ttext: true,\n\t\timage: true,\n\t\tvideo: true,\n\t\taudio: true,\n\t\tfunctionCalling: true,\n\t\tjsonMode: true,\n\t\tmaxTokens: 1000000, // 1M context\n\t\tmaxOutput: 64000,\n\t\tcostPer1MTokens: {\n\t\t\tinput: 0.3, // text/image/video\n\t\t\toutput: 2.5,\n\t\t},\n\t},\n\t\"gemini-2.5-flash-lite\": {\n\t\ttext: true,\n\t\timage: true,\n\t\tvideo: true,\n\t\taudio: true,\n\t\tfunctionCalling: true,\n\t\tjsonMode: true,\n\t\tmaxTokens: 1000000, // 1M context\n\t\tmaxOutput: 64000,\n\t\tcostPer1MTokens: {\n\t\t\tinput: 0.1, // text/image/video\n\t\t\toutput: 0.4,\n\t\t},\n\t},\n};\n\n// Llama models with their specific capabilities\nexport const LLAMA_MODELS: Record<string, ModelCapabilities> = {\n\t\"Llama-4-Maverick-17B-128E-Instruct-FP8\": {\n\t\ttext: true,\n\t\timage: true,\n\t\tvideo: false,\n\t\taudio: false,\n\t\tfunctionCalling: true,\n\t\tjsonMode: true,\n\t\tmaxTokens: 128000, // 128K context\n\t\tmaxOutput: 8192,\n\t\tcostPer1MTokens: { input: 0.1, output: 0.1 },\n\t},\n\t\"Llama-4-Scout-17B-16E-Instruct-FP8\": {\n\t\ttext: true,\n\t\timage: true,\n\t\tvideo: false,\n\t\taudio: false,\n\t\tfunctionCalling: true,\n\t\tjsonMode: true,\n\t\tmaxTokens: 16000, // 16K context\n\t\tmaxOutput: 8192,\n\t\tcostPer1MTokens: { input: 0.08, output: 0.08 },\n\t},\n\t\"Llama-3.3-70B-Instruct\": {\n\t\ttext: true,\n\t\timage: false,\n\t\tvideo: false,\n\t\taudio: false,\n\t\tfunctionCalling: true,\n\t\tjsonMode: true,\n\t\tmaxTokens: 8192,\n\t\tmaxOutput: 8192,\n\t\tcostPer1MTokens: { input: 0.35, output: 0.4 },\n\t},\n\t\"Llama-3.3-8B-Instruct\": {\n\t\ttext: true,\n\t\timage: false,\n\t\tvideo: false,\n\t\taudio: false,\n\t\tfunctionCalling: true,\n\t\tjsonMode: true,\n\t\tmaxTokens: 8192,\n\t\tmaxOutput: 8192,\n\t\tcostPer1MTokens: { input: 0.05, output: 0.05 },\n\t},\n};\n\n// All models combined\nexport const ALL_MODELS: Record<string, ModelCapabilities> = {\n\t...OPENAI_MODELS,\n\t...ANTHROPIC_MODELS,\n\t...GEMINI_MODELS,\n\t...LLAMA_MODELS,\n};\n\nexport function selectModelForTask(task: TaskRequirements): string {\n\tlet candidates = Object.entries(ALL_MODELS);\n\n\t// Filter by provider if specified\n\tif (task.provider && task.provider !== \"any\") {\n\t\tconst providerModels = {\n\t\t\topenai: OPENAI_MODELS,\n\t\t\tanthropic: ANTHROPIC_MODELS,\n\t\t\tgoogle: GEMINI_MODELS,\n\t\t\tllama: LLAMA_MODELS,\n\t\t}[task.provider];\n\n\t\tcandidates = Object.entries(providerModels);\n\t}\n\n\t// Filter by capabilities\n\tif (task.needsImage) {\n\t\tcandidates = candidates.filter(([_, m]) => m.image);\n\t}\n\tif (task.needsVideo) {\n\t\tcandidates = candidates.filter(([_, m]) => m.video);\n\t}\n\tif (task.needsAudio) {\n\t\tcandidates = candidates.filter(([_, m]) => m.audio);\n\t}\n\tif (task.needsFunctionCalling) {\n\t\tcandidates = candidates.filter(([_, m]) => m.functionCalling);\n\t}\n\tif (task.needsJsonMode) {\n\t\tcandidates = candidates.filter(([_, m]) => m.jsonMode);\n\t}\n\n\t// Filter by context window\n\tif (task.minContextWindow) {\n\t\tcandidates = candidates.filter(\n\t\t\t([_, m]) => m.maxTokens >= task.minContextWindow!\n\t\t);\n\t}\n\tif (task.needsLongContext) {\n\t\tcandidates = candidates.filter(([_, m]) => m.maxTokens >= 128000);\n\t}\n\n\t// No candidates found\n\tif (candidates.length === 0) {\n\t\tthrow new Error(\"No models match the specified requirements\");\n\t}\n\n\t// Sort by preference\n\tif (task.preferCheap) {\n\t\tcandidates.sort(([_, a], [__, b]) => {\n\t\t\tconst costA = (a.costPer1MTokens.input + a.costPer1MTokens.output) / 2;\n\t\t\tconst costB = (b.costPer1MTokens.input + b.costPer1MTokens.output) / 2;\n\t\t\treturn costA - costB;\n\t\t});\n\t} else if (task.preferFast) {\n\t\t// Prefer models with higher rate limits and smaller size\n\t\tcandidates.sort(([nameA, a], [nameB, b]) => {\n\t\t\t// Rough heuristic: mini/lite models are usually faster\n\t\t\tconst scoreA =\n\t\t\t\tnameA.includes(\"mini\") ||\n\t\t\t\tnameA.includes(\"lite\") ||\n\t\t\t\tnameA.includes(\"flash\")\n\t\t\t\t\t? 0\n\t\t\t\t\t: 1;\n\t\t\tconst scoreB =\n\t\t\t\tnameB.includes(\"mini\") ||\n\t\t\t\tnameB.includes(\"lite\") ||\n\t\t\t\tnameB.includes(\"flash\")\n\t\t\t\t\t? 0\n\t\t\t\t\t: 1;\n\t\t\treturn scoreA - scoreB;\n\t\t});\n\t} else {\n\t\t// Default: balance cost and capability\n\t\tcandidates.sort(([_, a], [__, b]) => {\n\t\t\tconst costA = (a.costPer1MTokens.input + a.costPer1MTokens.output) / 2;\n\t\t\tconst costB = (b.costPer1MTokens.input + b.costPer1MTokens.output) / 2;\n\t\t\t// Normalize by context window (bigger context = more valuable)\n\t\t\tconst scoreA = costA / Math.log(a.maxTokens);\n\t\t\tconst scoreB = costB / Math.log(b.maxTokens);\n\t\t\treturn scoreA - scoreB;\n\t\t});\n\t}\n\n\treturn candidates[0][0];\n}\n","/**\n * Metadata Configuration Manager\n *\n * Handles metadata configuration with proper priority:\n * 1. Client config (highest priority)\n * 2. Global config\n * 3. Default config (lowest priority)\n */\n\nimport type {\n\tBaseConfig,\n\tGlobalConfig,\n\tMessageMetadata,\n\tProviderType,\n} from \"../types\";\nimport { v4 as uuidv4 } from \"uuid\";\nimport { CostEstimator } from \"./cost-estimator\";\n\nexport interface MetadataConfig {\n\tenabled?: boolean;\n\ttrackCosts?: boolean;\n\ttrackLatency?: boolean;\n\ttrackTokens?: boolean;\n\tincludeTimestamp?: boolean;\n}\n\n// Default metadata configuration\nconst DEFAULT_METADATA_CONFIG: Required<MetadataConfig> = {\n\tenabled: true,\n\ttrackCosts: true,\n\ttrackLatency: true,\n\ttrackTokens: true,\n\tincludeTimestamp: true,\n};\n\nexport class MetadataConfigManager {\n\tprivate static costEstimator = new CostEstimator();\n\n\t/**\n\t * Get merged metadata configuration with proper priority\n\t */\n\tstatic getMetadataConfig(\n\t\tclientConfig?: BaseConfig,\n\t\tglobalConfig?: GlobalConfig\n\t): Required<MetadataConfig> {\n\t\t// Start with defaults\n\t\tlet config = { ...DEFAULT_METADATA_CONFIG };\n\n\t\t// Apply global config if exists\n\t\tif (globalConfig?.metadata) {\n\t\t\tconfig = {\n\t\t\t\t...config,\n\t\t\t\t...globalConfig.metadata,\n\t\t\t};\n\t\t}\n\n\t\t// Apply client config if exists (highest priority)\n\t\tif (clientConfig?.metadata) {\n\t\t\tconfig = {\n\t\t\t\t...config,\n\t\t\t\t...clientConfig.metadata,\n\t\t\t};\n\t\t}\n\n\t\treturn config;\n\t}\n\n\t/**\n\t * Check if metadata collection is enabled\n\t */\n\tstatic isEnabled(\n\t\tclientConfig?: BaseConfig,\n\t\tglobalConfig?: GlobalConfig\n\t): boolean {\n\t\tconst config = this.getMetadataConfig(clientConfig, globalConfig);\n\t\treturn config.enabled;\n\t}\n\n\t/**\n\t * Create metadata for a message\n\t */\n\tstatic async createMessageMetadata(\n\t\toptions: {\n\t\t\tprovider: ProviderType;\n\t\t\tmodel: string;\n\t\t\tstartTime: number;\n\t\t\tinputText: string;\n\t\t\toutputText: string;\n\t\t\tusage?: {\n\t\t\t\tpromptTokens: number;\n\t\t\t\tcompletionTokens: number;\n\t\t\t\ttotalTokens: number;\n\t\t\t};\n\t\t},\n\t\tclientConfig?: BaseConfig,\n\t\tglobalConfig?: GlobalConfig\n\t): Promise<MessageMetadata | undefined> {\n\t\tconst config = this.getMetadataConfig(clientConfig, globalConfig);\n\n\t\t// If metadata is disabled, return undefined\n\t\tif (!config.enabled) {\n\t\t\treturn undefined;\n\t\t}\n\n\t\tconst metadata: MessageMetadata = {\n\t\t\tid: uuidv4(),\n\t\t\ttimestamp: new Date(options.startTime),\n\t\t\tprovider: options.provider,\n\t\t\tmodel: options.model,\n\t\t\tlatency: 0, // Will be calculated below\n\t\t};\n\n\t\t// Track latency if enabled\n\t\tif (config.trackLatency) {\n\t\t\tmetadata.latency = Date.now() - options.startTime;\n\t\t}\n\n\t\t// Track tokens if enabled\n\t\tif (config.trackTokens) {\n\t\t\tlet inputTokens: number;\n\t\t\tlet outputTokens: number;\n\t\t\tlet estimated = false;\n\n\t\t\tif (options.usage) {\n\t\t\t\t// Use actual token counts from API\n\t\t\t\tinputTokens = options.usage.promptTokens;\n\t\t\t\toutputTokens = options.usage.completionTokens;\n\t\t\t} else {\n\t\t\t\t// Estimate tokens (rough approximation: 1 token ≈ 4 characters)\n\t\t\t\tinputTokens = Math.ceil(options.inputText.length / 4);\n\t\t\t\toutputTokens = Math.ceil(options.outputText.length / 4);\n\t\t\t\testimated = true;\n\t\t\t}\n\n\t\t\tmetadata.tokens = {\n\t\t\t\tinput: inputTokens,\n\t\t\t\toutput: outputTokens,\n\t\t\t\ttotal: inputTokens + outputTokens,\n\t\t\t\testimated,\n\t\t\t};\n\n\t\t\t// Track costs if enabled and tokens are available\n\t\t\tif (config.trackCosts && metadata.tokens) {\n\t\t\t\tconst costEstimate = await this.costEstimator.estimate(\n\t\t\t\t\toptions.provider,\n\t\t\t\t\toptions.model,\n\t\t\t\t\tinputTokens,\n\t\t\t\t\toutputTokens\n\t\t\t\t);\n\n\t\t\t\tmetadata.cost = {\n\t\t\t\t\tinput: costEstimate.input,\n\t\t\t\t\toutput: costEstimate.output,\n\t\t\t\t\ttotal: costEstimate.total,\n\t\t\t\t\tcurrency: costEstimate.currency,\n\t\t\t\t\testimated: costEstimate.estimated || estimated,\n\t\t\t\t};\n\t\t\t}\n\t\t}\n\n\t\treturn metadata;\n\t}\n\n\t/**\n\t * Filter metadata based on configuration\n\t */\n\tstatic filterMetadata(\n\t\tmetadata: MessageMetadata,\n\t\tclientConfig?: BaseConfig,\n\t\tglobalConfig?: GlobalConfig\n\t): MessageMetadata {\n\t\tconst config = this.getMetadataConfig(clientConfig, globalConfig);\n\n\t\t// Create filtered metadata\n\t\tconst filtered: MessageMetadata = {\n\t\t\tid: metadata.id,\n\t\t\ttimestamp: metadata.timestamp,\n\t\t\tprovider: metadata.provider,\n\t\t\tmodel: metadata.model,\n\t\t\tlatency: config.trackLatency ? metadata.latency : 0,\n\t\t};\n\n\t\t// Only include tokens if tracking is enabled\n\t\tif (config.trackTokens && metadata.tokens) {\n\t\t\tfiltered.tokens = metadata.tokens;\n\t\t}\n\n\t\t// Only include cost if tracking is enabled\n\t\tif (config.trackCosts && metadata.cost) {\n\t\t\tfiltered.cost = metadata.cost;\n\t\t}\n\n\t\treturn filtered;\n\t}\n\n\t/**\n\t * Merge metadata configurations with proper priority\n\t */\n\tstatic mergeConfigs(\n\t\tclientConfig?: BaseConfig,\n\t\tglobalConfig?: GlobalConfig,\n\t\tdefaultConfig?: BaseConfig\n\t): BaseConfig {\n\t\t// Start with default or empty config\n\t\tlet merged: BaseConfig = defaultConfig ? { ...defaultConfig } : {};\n\n\t\t// Apply global config\n\t\tif (globalConfig) {\n\t\t\tmerged = this.deepMerge(merged, globalConfig);\n\t\t}\n\n\t\t// Apply client config (highest priority)\n\t\tif (clientConfig) {\n\t\t\tmerged = this.deepMerge(merged, clientConfig);\n\t\t}\n\n\t\treturn merged;\n\t}\n\n\t/**\n\t * Deep merge configuration objects\n\t */\n\tprivate static deepMerge(target: any, source: any): any {\n\t\tconst result = { ...target };\n\n\t\tfor (const key in source) {\n\t\t\tif (\n\t\t\t\tsource[key] &&\n\t\t\t\ttypeof source[key] === \"object\" &&\n\t\t\t\t!Array.isArray(source[key])\n\t\t\t) {\n\t\t\t\tresult[key] = this.deepMerge(result[key] || {}, source[key]);\n\t\t\t} else {\n\t\t\t\tresult[key] = source[key];\n\t\t\t}\n\t\t}\n\n\t\treturn result;\n\t}\n}\n","/**\n * Cost Estimator - Accurate pricing for all providers\n *\n * Last updated: January 2025\n * Prices are in USD per 1000 tokens unless otherwise noted\n */\n\nimport { CostEstimate } from \"../types\";\n\ninterface ModelPricing {\n\tinputPer1k: number;\n\toutputPer1k: number;\n\tcached?: boolean;\n\tnotes?: string;\n}\n\nexport class CostEstimator {\n\t// Accurate pricing from official documentation (January 2025)\n\tprivate pricing: Record<string, ModelPricing> = {\n\t\t// OpenAI - GPT-4.1 Series\n\t\t\"openai:gpt-4.1-2025-04-14\": { inputPer1k: 0.002, outputPer1k: 0.008 },\n\t\t\"openai:gpt-4.1-mini-2025-04-14\": {\n\t\t\tinputPer1k: 0.0004,\n\t\t\toutputPer1k: 0.0016,\n\t\t},\n\t\t\"openai:gpt-4.1-nano-2025-04-14\": {\n\t\t\tinputPer1k: 0.0001,\n\t\t\toutputPer1k: 0.0004,\n\t\t},\n\t\t\"openai:gpt-4.5-preview-2025-02-27\": {\n\t\t\tinputPer1k: 0.075,\n\t\t\toutputPer1k: 0.15,\n\t\t},\n\n\t\t// OpenAI - GPT-4o Series\n\t\t\"openai:gpt-4o-2024-08-06\": { inputPer1k: 0.0025, outputPer1k: 0.01 },\n\t\t\"openai:gpt-4o-mini-2024-07-18\": {\n\t\t\tinputPer1k: 0.00015,\n\t\t\toutputPer1k: 0.0006,\n\t\t},\n\t\t\"openai:gpt-4o-audio-preview-2025-06-03\": {\n\t\t\tinputPer1k: 0.0025, // text\n\t\t\toutputPer1k: 0.01, // text\n\t\t\tnotes: \"Audio: $40/1M input, $80/1M output\",\n\t\t},\n\n\t\t// OpenAI - O-Series (Reasoning)\n\t\t\"openai:o1-2024-12-17\": { inputPer1k: 0.015, outputPer1k: 0.06 },\n\t\t\"openai:o1-pro-2025-03-19\": { inputPer1k: 0.15, outputPer1k: 0.6 },\n\t\t\"openai:o3-2025-04-16\": { inputPer1k: 0.002, outputPer1k: 0.008 },\n\t\t\"openai:o3-pro-2025-06-10\": { inputPer1k: 0.02, outputPer1k: 0.08 },\n\t\t\"openai:o3-mini-2025-01-31\": { inputPer1k: 0.0011, outputPer1k: 0.0044 },\n\t\t\"openai:o4-mini-2025-04-16\": { inputPer1k: 0.0011, outputPer1k: 0.0044 },\n\t\t\"openai:o1-mini-2024-09-12\": { inputPer1k: 0.0011, outputPer1k: 0.0044 },\n\n\t\t// OpenAI - Legacy\n\t\t\"openai:gpt-4-turbo-2024-04-09\": { inputPer1k: 0.01, outputPer1k: 0.03 },\n\t\t\"openai:gpt-4\": { inputPer1k: 0.03, outputPer1k: 0.06 },\n\t\t\"openai:gpt-4-32k\": { inputPer1k: 0.06, outputPer1k: 0.12 },\n\t\t\"openai:gpt-3.5-turbo-0125\": { inputPer1k: 0.0005, outputPer1k: 0.0015 },\n\t\t\"openai:gpt-3.5-turbo\": { inputPer1k: 0.0005, outputPer1k: 0.0015 },\n\n\t\t// Anthropic - Claude 4 Series\n\t\t\"anthropic:claude-opus-4-20250514\": {\n\t\t\tinputPer1k: 0.015,\n\t\t\toutputPer1k: 0.075,\n\t\t},\n\t\t\"anthropic:claude-sonnet-4-20250514\": {\n\t\t\tinputPer1k: 0.003,\n\t\t\toutputPer1k: 0.015,\n\t\t},\n\t\t\"anthropic:claude-3-7-sonnet-20250219\": {\n\t\t\tinputPer1k: 0.003,\n\t\t\toutputPer1k: 0.015,\n\t\t},\n\t\t\"anthropic:claude-3-5-haiku-20241022\": {\n\t\t\tinputPer1k: 0.0008,\n\t\t\toutputPer1k: 0.004,\n\t\t},\n\n\t\t// Anthropic - Legacy\n\t\t\"anthropic:claude-3-5-sonnet-20241022\": {\n\t\t\tinputPer1k: 0.003,\n\t\t\toutputPer1k: 0.015,\n\t\t},\n\t\t\"anthropic:claude-3-5-sonnet-20240620\": {\n\t\t\tinputPer1k: 0.003,\n\t\t\toutputPer1k: 0.015,\n\t\t},\n\t\t\"anthropic:claude-3-haiku-20240307\": {\n\t\t\tinputPer1k: 0.00025,\n\t\t\toutputPer1k: 0.00125,\n\t\t},\n\t\t\"anthropic:claude-3-opus-20240229\": {\n\t\t\tinputPer1k: 0.015,\n\t\t\toutputPer1k: 0.075,\n\t\t},\n\t\t\"anthropic:claude-3-sonnet-20240229\": {\n\t\t\tinputPer1k: 0.003,\n\t\t\toutputPer1k: 0.015,\n\t\t},\n\n\t\t// Google Gemini\n\t\t\"google:gemini-2.5-pro\": {\n\t\t\tinputPer1k: 0.00125, // <= 200K tokens\n\t\t\toutputPer1k: 0.01, // <= 200K tokens\n\t\t\tnotes: \">200K: $2.50 input, $15 output per 1M\",\n\t\t},\n\t\t\"google:gemini-2.5-flash\": {\n\t\t\tinputPer1k: 0.0003, // text/image/video\n\t\t\toutputPer1k: 0.0025,\n\t\t\tnotes: \"Audio: $1 per 1M input\",\n\t\t},\n\t\t\"google:gemini-2.5-flash-lite\": {\n\t\t\tinputPer1k: 0.0001, // text/image/video\n\t\t\toutputPer1k: 0.0004,\n\t\t\tnotes: \"Audio: $0.30 per 1M input\",\n\t\t},\n\t\t\"google:gemini-2.5-flash-native-audio\": {\n\t\t\tinputPer1k: 0.0005, // text\n\t\t\toutputPer1k: 0.002, // text\n\t\t\tnotes: \"Audio: $3/1M input, $12/1M output\",\n\t\t},\n\t\t\"google:gemini-2.5-flash-preview-tts\": {\n\t\t\tinputPer1k: 0.0005, // text\n\t\t\toutputPer1k: 0.01, // audio\n\t\t},\n\t\t\"google:gemini-2.5-pro-preview-tts\": {\n\t\t\tinputPer1k: 0.001, // text\n\t\t\toutputPer1k: 0.02, // audio\n\t\t},\n\n\t\t// Google Legacy\n\t\t\"google:gemini-pro\": { inputPer1k: 0.00025, outputPer1k: 0.00125 },\n\t\t\"google:gemini-pro-vision\": { inputPer1k: 0.00025, outputPer1k: 0.00125 },\n\t\t\"google:gemini-1.5-pro\": { inputPer1k: 0.00125, outputPer1k: 0.005 },\n\t\t\"google:gemini-1.5-flash\": { inputPer1k: 0.00025, outputPer1k: 0.00075 },\n\n\t\t// Llama Models\n\t\t\"llama:Llama-4-Maverick-17B-128E-Instruct-FP8\": {\n\t\t\tinputPer1k: 0.0001,\n\t\t\toutputPer1k: 0.0001,\n\t\t},\n\t\t\"llama:Llama-4-Scout-17B-16E-Instruct-FP8\": {\n\t\t\tinputPer1k: 0.00008,\n\t\t\toutputPer1k: 0.00008,\n\t\t},\n\t\t\"llama:Llama-3.3-70B-Instruct\": {\n\t\t\tinputPer1k: 0.00035,\n\t\t\toutputPer1k: 0.0004,\n\t\t},\n\t\t\"llama:Llama-3.3-8B-Instruct\": {\n\t\t\tinputPer1k: 0.00005,\n\t\t\toutputPer1k: 0.00005,\n\t\t},\n\n\t\t// Llama Legacy\n\t\t\"llama:llama-3-70b\": { inputPer1k: 0.0008, outputPer1k: 0.0008 },\n\t\t\"llama:llama-3-8b\": { inputPer1k: 0.0002, outputPer1k: 0.0002 },\n\t\t\"llama:code-llama-70b\": { inputPer1k: 0.0008, outputPer1k: 0.0008 },\n\n\t\t// LM Studio (local = free)\n\t\t\"lmstudio:any\": { inputPer1k: 0, outputPer1k: 0 },\n\t};\n\n\t// Provider defaults for unknown models\n\tprivate providerDefaults: Record<string, ModelPricing> = {\n\t\topenai: { inputPer1k: 0.002, outputPer1k: 0.008 },\n\t\tanthropic: { inputPer1k: 0.003, outputPer1k: 0.015 },\n\t\tgoogle: { inputPer1k: 0.0005, outputPer1k: 0.002 },\n\t\tllama: { inputPer1k: 0.0001, outputPer1k: 0.0001 },\n\t\tlmstudio: { inputPer1k: 0, outputPer1k: 0 },\n\t};\n\n\t// Cache write multipliers (based on provider)\n\tprivate cacheMultipliers: Record<string, number> = {\n\t\topenai: 0.25, // 25% of input cost for cache writes\n\t\tanthropic: 1.25, // 125% of input cost for cache writes\n\t\tgoogle: 0.25, // 25% of input cost for cache writes\n\t\tllama: 0, // No caching support\n\t\tlmstudio: 0, // No caching support\n\t};\n\n\t/**\n\t * Estimate cost for token usage\n\t */\n\tasync estimate(\n\t\tprovider: string,\n\t\tmodel: string,\n\t\tinputTokens: number,\n\t\toutputTokens: number,\n\t\toptions?: {\n\t\t\tcached?: boolean;\n\t\t\taudioTokens?: { input?: number; output?: number };\n\t\t}\n\t): Promise<CostEstimate> {\n\t\t// Look up specific model pricing\n\t\tconst key = `${provider}:${model}`;\n\t\tlet pricing = this.pricing[key];\n\n\t\t// Fall back to provider defaults\n\t\tif (!pricing) {\n\t\t\tpricing = this.providerDefaults[provider] || {\n\t\t\t\tinputPer1k: 0.001,\n\t\t\t\toutputPer1k: 0.001,\n\t\t\t};\n\t\t}\n\n\t\t// Calculate base costs\n\t\tlet inputCost = (inputTokens / 1000) * pricing.inputPer1k;\n\t\tconst outputCost = (outputTokens / 1000) * pricing.outputPer1k;\n\n\t\t// Apply cache discount if applicable\n\t\tif (options?.cached && this.cacheMultipliers[provider]) {\n\t\t\tinputCost *= this.cacheMultipliers[provider];\n\t\t}\n\n\t\t// Add audio costs if applicable\n\t\tif (options?.audioTokens) {\n\t\t\t// Add special audio pricing for multimodal models\n\t\t\tif (model.includes(\"audio\") || model.includes(\"native-audio\")) {\n\t\t\t\t// Model-specific audio rates would go here\n\t\t\t}\n\t\t}\n\n\t\treturn {\n\t\t\tinput: this.roundCost(inputCost),\n\t\t\toutput: this.roundCost(outputCost),\n\t\t\ttotal: this.roundCost(inputCost + outputCost),\n\t\t\tcurrency: \"USD\",\n\t\t\testimated: !this.pricing[key], // Mark as estimated if using defaults\n\t\t};\n\t}\n\n\t/**\n\t * Estimate cost for image generation\n\t */\n\tasync estimateImageGeneration(\n\t\tprovider: string,\n\t\tmodel: string,\n\t\tcount: number = 1,\n\t\tsize: string = \"1024x1024\",\n\t\tquality: string = \"standard\"\n\t): Promise<CostEstimate> {\n\t\tconst imagePricing: Record<string, number> = {\n\t\t\t// OpenAI DALL-E\n\t\t\t\"openai:dall-e-3:standard:1024x1024\": 0.04,\n\t\t\t\"openai:dall-e-3:standard:1024x1792\": 0.08,\n\t\t\t\"openai:dall-e-3:standard:1792x1024\": 0.08,\n\t\t\t\"openai:dall-e-3:hd:1024x1024\": 0.08,\n\t\t\t\"openai:dall-e-3:hd:1024x1792\": 0.12,\n\t\t\t\"openai:dall-e-3:hd:1792x1024\": 0.12,\n\t\t\t\"openai:dall-e-2:standard:256x256\": 0.016,\n\t\t\t\"openai:dall-e-2:standard:512x512\": 0.018,\n\t\t\t\"openai:dall-e-2:standard:1024x1024\": 0.02,\n\n\t\t\t// Google Imagen\n\t\t\t\"google:imagen-3\": 0.03,\n\n\t\t\t// Google Veo (per second of video)\n\t\t\t\"google:veo-3\": 0.75,\n\t\t\t\"google:veo-2\": 0.5,\n\t\t};\n\n\t\tconst key = `${provider}:${model}:${quality}:${size}`;\n\t\tconst pricePerImage =\n\t\t\timagePricing[key] || imagePricing[`${provider}:${model}`] || 0.05;\n\t\tconst total = pricePerImage * count;\n\n\t\treturn {\n\t\t\tinput: 0,\n\t\t\toutput: this.roundCost(total),\n\t\t\ttotal: this.roundCost(total),\n\t\t\tcurrency: \"USD\",\n\t\t\testimated: !imagePricing[key],\n\t\t};\n\t}\n\n\t/**\n\t * Round cost to reasonable precision\n\t */\n\tprivate roundCost(cost: number): number {\n\t\t// Round to 6 decimal places (tenth of a cent precision)\n\t\treturn Math.round(cost * 1000000) / 1000000;\n\t}\n\n\t/**\n\t * Get pricing info for a specific model\n\t */\n\tgetPricing(provider: string, model: string): ModelPricing | null {\n\t\tconst key = `${provider}:${model}`;\n\t\treturn this.pricing[key] || this.providerDefaults[provider] || null;\n\t}\n\n\t/**\n\t * Check if a model is free (local models)\n\t */\n\tisFree(provider: string): boolean {\n\t\treturn provider === \"lmstudio\";\n\t}\n\n\t/**\n\t * Get all available models for a provider\n\t */\n\tgetProviderModels(provider: string): string[] {\n\t\treturn Object.keys(this.pricing)\n\t\t\t.filter((key) => key.startsWith(`${provider}:`))\n\t\t\t.map((key) => key.split(\":\")[1]);\n\t}\n}\n","/**\n * Unified Rate Limiter for all AI providers\n *\n * Features:\n * - Provider-specific rate limits\n * - Token-based and request-based limiting\n * - Automatic queuing and retry\n * - Invisible to the user (never fails due to rate limits)\n */\n\nimport { EventEmitter } from \"events\";\nimport type { ProviderType } from \"../types\";\nimport type { RateLimitConfig, QueuedRequest } from \"../types/rate-limit\";\n\n// Internal config with required properties\ntype RequiredRateLimitConfig = Required<\n\tPick<RateLimitConfig, \"enabled\" | \"requestsPerMinute\" | \"tokensPerMinute\">\n> &\n\tPick<RateLimitConfig, \"requestsPerHour\" | \"tokensPerHour\">;\n\n// Provider-specific default rate limits (based on documentation)\nconst PROVIDER_LIMITS: Record<ProviderType, Partial<RateLimitConfig>> = {\n\topenai: {\n\t\trequestsPerMinute: 500, // GPT-4 tier\n\t\ttokensPerMinute: 300000,\n\t\trequestsPerHour: 10000,\n\t\ttokensPerHour: 18000000,\n\t},\n\tanthropic: {\n\t\trequestsPerMinute: 50, // Claude tier\n\t\ttokensPerMinute: 100000,\n\t\trequestsPerHour: 1000,\n\t\ttokensPerHour: 5000000,\n\t},\n\tgoogle: {\n\t\trequestsPerMinute: 60, // Gemini tier\n\t\ttokensPerMinute: 1000000, // 1M context window\n\t\trequestsPerHour: 1500, // Based on RPD limits\n\t\ttokensPerHour: 60000000,\n\t},\n\tllama: {\n\t\trequestsPerMinute: 100, // Typical hosted limits\n\t\ttokensPerMinute: 500000,\n\t\trequestsPerHour: 5000,\n\t\ttokensPerHour: 30000000,\n\t},\n\tlmstudio: {\n\t\trequestsPerMinute: 1000, // Local, no real limits\n\t\ttokensPerMinute: 10000000,\n\t\trequestsPerHour: 60000,\n\t\ttokensPerHour: 600000000,\n\t},\n\trouter: {\n\t\trequestsPerMinute: 200, // Router delegates to providers\n\t\ttokensPerMinute: 1000000,\n\t\trequestsPerHour: 10000,\n\t\ttokensPerHour: 60000000,\n\t},\n};\n\n// Model-specific adjustments\nconst MODEL_MULTIPLIERS: Record<string, number> = {\n\t// OpenAI\n\t\"gpt-3.5-turbo\": 7, // 7x higher limits than GPT-4\n\t\"gpt-4.1-nano\": 5, // 5x higher for nano\n\t\"gpt-4.1-mini\": 3, // 3x higher for mini\n\n\t// Anthropic\n\t\"claude-3-5-haiku\": 2, // 2x higher for Haiku\n\n\t// Google\n\t\"gemini-2.5-flash-lite\": 3, // 3x higher for lite\n\t\"gemini-2.5-flash\": 2, // 2x higher for flash\n\n\t// Llama\n\t\"Llama-3.3-8B-Instruct\": 2, // 2x higher for smaller models\n};\n\nexport class RateLimiter extends EventEmitter {\n\tprivate queues: Map<ProviderType, QueuedRequest[]> = new Map();\n\tprivate usage: Map<\n\t\tstring,\n\t\t{ requests: number; tokens: number; resetAt: number }\n\t> = new Map();\n\tprivate processing: Set<string> = new Set();\n\tprivate config: RequiredRateLimitConfig;\n\n\tconstructor(config?: Partial<RateLimitConfig>) {\n\t\tsuper();\n\t\tthis.config = {\n\t\t\tenabled: true,\n\t\t\trequestsPerMinute: 60,\n\t\t\ttokensPerMinute: 150000,\n\t\t\t...config,\n\t\t};\n\n\t\t// Initialize queues for each provider\n\t\tconst providers: ProviderType[] = [\n\t\t\t\"openai\",\n\t\t\t\"anthropic\",\n\t\t\t\"google\",\n\t\t\t\"llama\",\n\t\t\t\"lmstudio\",\n\t\t];\n\t\tproviders.forEach((provider) => {\n\t\t\tthis.queues.set(provider, []);\n\t\t});\n\n\t\t// Start processing loop\n\t\tif (this.config.enabled) {\n\t\t\tthis.startProcessingLoop();\n\t\t}\n\t}\n\n\t/**\n\t * Execute a request with rate limiting\n\t */\n\tasync execute<T>(\n\t\tprovider: ProviderType,\n\t\tmodel: string,\n\t\tfn: () => Promise<T>,\n\t\testimatedTokens?: number\n\t): Promise<T> {\n\t\t// If rate limiting is disabled, execute immediately\n\t\tif (!this.config.enabled) {\n\t\t\treturn fn();\n\t\t}\n\n\t\t// Check if we can execute immediately\n\t\tif (this.canExecute(provider, model, estimatedTokens)) {\n\t\t\tthis.recordUsage(provider, model, estimatedTokens);\n\t\t\treturn fn();\n\t\t}\n\n\t\t// Queue the request\n\t\treturn new Promise((resolve, reject) => {\n\t\t\tconst request: QueuedRequest = {\n\t\t\t\tid: Math.random().toString(36).substr(2, 9),\n\t\t\t\tprovider,\n\t\t\t\tmodel,\n\t\t\t\texecute: fn,\n\t\t\t\tresolve,\n\t\t\t\treject,\n\t\t\t\ttimestamp: Date.now(),\n\t\t\t\testimatedTokens,\n\t\t\t\tretryCount: 0,\n\t\t\t};\n\n\t\t\tthis.enqueue(request);\n\t\t\tthis.emit(\"queued\", {\n\t\t\t\tprovider,\n\t\t\t\tmodel,\n\t\t\t\tqueueLength: this.getQueueLength(provider),\n\t\t\t});\n\t\t});\n\t}\n\n\t/**\n\t * Check if request can be executed immediately\n\t */\n\tprivate canExecute(\n\t\tprovider: ProviderType,\n\t\tmodel: string,\n\t\testimatedTokens: number = 100\n\t): boolean {\n\t\tconst limits = this.getLimits(provider, model);\n\t\tconst usage = this.getUsage(provider);\n\n\t\t// Check if usage window has expired\n\t\tif (usage.resetAt < Date.now()) {\n\t\t\tthis.resetUsage(provider);\n\t\t\treturn true;\n\t\t}\n\n\t\t// Check request limits\n\t\tif (usage.requests >= limits.requestsPerMinute) {\n\t\t\treturn false;\n\t\t}\n\n\t\t// Check token limits\n\t\tif (\n\t\t\testimatedTokens &&\n\t\t\tusage.tokens + estimatedTokens > limits.tokensPerMinute\n\t\t) {\n\t\t\treturn false;\n\t\t}\n\n\t\treturn true;\n\t}\n\n\t/**\n\t * Get rate limits for provider/model combination\n\t */\n\tprivate getLimits(\n\t\tprovider: ProviderType,\n\t\tmodel: string\n\t): RequiredRateLimitConfig {\n\t\tconst providerLimits = PROVIDER_LIMITS[provider] || {};\n\t\tconst multiplier = MODEL_MULTIPLIERS[model] || 1;\n\n\t\treturn {\n\t\t\tenabled: true,\n\t\t\trequestsPerMinute: Math.floor(\n\t\t\t\t(providerLimits.requestsPerMinute || this.config.requestsPerMinute) *\n\t\t\t\t\tmultiplier\n\t\t\t),\n\t\t\ttokensPerMinute: Math.floor(\n\t\t\t\t(providerLimits.tokensPerMinute || this.config.tokensPerMinute) *\n\t\t\t\t\tmultiplier\n\t\t\t),\n\t\t\trequestsPerHour: providerLimits.requestsPerHour\n\t\t\t\t? Math.floor(providerLimits.requestsPerHour * multiplier)\n\t\t\t\t: undefined,\n\t\t\ttokensPerHour: providerLimits.tokensPerHour\n\t\t\t\t? Math.floor(providerLimits.tokensPerHour * multiplier)\n\t\t\t\t: undefined,\n\t\t};\n\t}\n\n\t/**\n\t * Get current usage for provider\n\t */\n\tprivate getUsage(provider: ProviderType) {\n\t\tconst key = `${provider}:minute`;\n\t\tlet usage = this.usage.get(key);\n\n\t\tif (!usage || usage.resetAt < Date.now()) {\n\t\t\tusage = {\n\t\t\t\trequests: 0,\n\t\t\t\ttokens: 0,\n\t\t\t\tresetAt: Date.now() + 60000, // 1 minute\n\t\t\t};\n\t\t\tthis.usage.set(key, usage);\n\t\t}\n\n\t\treturn usage;\n\t}\n\n\t/**\n\t * Record usage\n\t */\n\tprivate recordUsage(\n\t\tprovider: ProviderType,\n\t\tmodel: string,\n\t\ttokens: number = 100\n\t) {\n\t\tconst usage = this.getUsage(provider);\n\t\tusage.requests++;\n\t\tusage.tokens += tokens;\n\t}\n\n\t/**\n\t * Reset usage for provider\n\t */\n\tprivate resetUsage(provider: ProviderType) {\n\t\tconst key = `${provider}:minute`;\n\t\tthis.usage.set(key, {\n\t\t\trequests: 0,\n\t\t\ttokens: 0,\n\t\t\tresetAt: Date.now() + 60000,\n\t\t});\n\t}\n\n\t/**\n\t * Add request to queue\n\t */\n\tprivate enqueue(request: QueuedRequest) {\n\t\tconst queue = this.queues.get(request.provider) || [];\n\t\tqueue.push(request);\n\t\tthis.queues.set(request.provider, queue);\n\t}\n\n\t/**\n\t * Get queue length for provider\n\t */\n\tprivate getQueueLength(provider: ProviderType): number {\n\t\treturn this.queues.get(provider)?.length || 0;\n\t}\n\n\t/**\n\t * Start processing queued requests\n\t */\n\tprivate startProcessingLoop() {\n\t\tsetInterval(() => {\n\t\t\tthis.processQueues();\n\t\t}, 100); // Check every 100ms\n\t}\n\n\t/**\n\t * Process all provider queues\n\t */\n\tprivate async processQueues() {\n\t\tconst providers: ProviderType[] = [\n\t\t\t\"openai\",\n\t\t\t\"anthropic\",\n\t\t\t\"google\",\n\t\t\t\"llama\",\n\t\t\t\"lmstudio\",\n\t\t];\n\n\t\tfor (const provider of providers) {\n\t\t\tif (this.processing.has(provider)) continue;\n\n\t\t\tconst queue = this.queues.get(provider);\n\t\t\tif (!queue || queue.length === 0) continue;\n\n\t\t\t// Process queue for this provider\n\t\t\tthis.processing.add(provider);\n\t\t\tthis.processProviderQueue(provider).finally(() =>\n\t\t\t\tthis.processing.delete(provider)\n\t\t\t);\n\t\t}\n\t}\n\n\t/**\n\t * Process queue for specific provider\n\t */\n\tprivate async processProviderQueue(provider: ProviderType) {\n\t\tconst queue = this.queues.get(provider) || [];\n\t\tconst processed: string[] = [];\n\n\t\twhile (queue.length > 0) {\n\t\t\tconst request = queue[0];\n\n\t\t\t// Check if we can execute\n\t\t\tif (!this.canExecute(provider, request.model, request.estimatedTokens)) {\n\t\t\t\t// Wait a bit before checking again\n\t\t\t\tawait new Promise((resolve) => setTimeout(resolve, 1000));\n\n\t\t\t\t// Check for stale requests (older than 5 minutes)\n\t\t\t\tif (Date.now() - request.timestamp > 300000) {\n\t\t\t\t\tqueue.shift();\n\t\t\t\t\trequest.reject(new Error(\"Request timed out in rate limit queue\"));\n\t\t\t\t\tcontinue;\n\t\t\t\t}\n\n\t\t\t\tbreak; // Can't process more right now\n\t\t\t}\n\n\t\t\t// Remove from queue\n\t\t\tqueue.shift();\n\t\t\tprocessed.push(request.id);\n\n\t\t\t// Execute request\n\t\t\ttry {\n\t\t\t\tthis.recordUsage(provider, request.model, request.estimatedTokens);\n\t\t\t\tconst result = await request.execute();\n\t\t\t\trequest.resolve(result);\n\n\t\t\t\tthis.emit(\"executed\", {\n\t\t\t\t\tprovider,\n\t\t\t\t\tmodel: request.model,\n\t\t\t\t\tqueueTime: Date.now() - request.timestamp,\n\t\t\t\t});\n\t\t\t} catch (error: any) {\n\t\t\t\t// Check if it's a rate limit error\n\t\t\t\tif (this.isRateLimitError(error) && request.retryCount < 3) {\n\t\t\t\t\t// Re-queue with increased retry count\n\t\t\t\t\trequest.retryCount++;\n\t\t\t\t\trequest.timestamp = Date.now(); // Reset timestamp\n\t\t\t\t\tthis.enqueue(request);\n\n\t\t\t\t\tthis.emit(\"retry\", {\n\t\t\t\t\t\tprovider,\n\t\t\t\t\t\tmodel: request.model,\n\t\t\t\t\t\tretryCount: request.retryCount,\n\t\t\t\t\t\terror: error.message,\n\t\t\t\t\t});\n\t\t\t\t} else {\n\t\t\t\t\t// Final failure\n\t\t\t\t\trequest.reject(error);\n\n\t\t\t\t\tthis.emit(\"failed\", {\n\t\t\t\t\t\tprovider,\n\t\t\t\t\t\tmodel: request.model,\n\t\t\t\t\t\terror: error.message,\n\t\t\t\t\t});\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\t// Update queue\n\t\tthis.queues.set(provider, queue);\n\t}\n\n\t/**\n\t * Check if error is rate limit related\n\t */\n\tprivate isRateLimitError(error: any): boolean {\n\t\tconst message = error.message?.toLowerCase() || \"\";\n\t\tconst code = error.code?.toLowerCase() || \"\";\n\n\t\treturn (\n\t\t\tmessage.includes(\"rate limit\") ||\n\t\t\tmessage.includes(\"too many requests\") ||\n\t\t\tmessage.includes(\"quota exceeded\") ||\n\t\t\tcode === \"rate_limit_exceeded\" ||\n\t\t\tcode === \"429\" ||\n\t\t\terror.status === 429\n\t\t);\n\t}\n\n\t/**\n\t * Get current queue status\n\t */\n\tgetStatus() {\n\t\tconst status: Record<string, any> = {};\n\n\t\tthis.queues.forEach((queue, provider) => {\n\t\t\tconst usage = this.getUsage(provider);\n\t\t\tstatus[provider] = {\n\t\t\t\tqueueLength: queue.length,\n\t\t\t\tusage: {\n\t\t\t\t\trequests: usage.requests,\n\t\t\t\t\ttokens: usage.tokens,\n\t\t\t\t\tresetIn: Math.max(0, usage.resetAt - Date.now()),\n\t\t\t\t},\n\t\t\t\tprocessing: this.processing.has(provider),\n\t\t\t};\n\t\t});\n\n\t\treturn status;\n\t}\n\n\t/**\n\t * Clear all queues (use with caution)\n\t */\n\tclearQueues() {\n\t\tthis.queues.forEach((queue, provider) => {\n\t\t\tqueue.forEach((request) => {\n\t\t\t\trequest.reject(new Error(\"Queue cleared\"));\n\t\t\t});\n\t\t\tthis.queues.set(provider, []);\n\t\t});\n\t}\n}\n","/**\n * Settings Loader - Loads global configuration from tryai directory\n *\n * Looks for settings in:\n * 1. tryai/settings.ts or tryai/settings.js\n * 2. .tryai/settings.ts or .tryai/settings.js\n * 3. Environment variables\n */\n\nimport { existsSync } from \"fs\";\nimport { join } from \"path\";\nimport type { GlobalConfig } from \"../types\";\n\nexport class SettingsLoader {\n\tprivate static cachedSettings?: GlobalConfig;\n\n\t/**\n\t * Load global settings with caching\n\t */\n\tstatic load(): GlobalConfig | undefined {\n\t\tif (this.cachedSettings) {\n\t\t\treturn this.cachedSettings;\n\t\t}\n\n\t\t// Try to load settings\n\t\tconst settings = this.loadFromFile() || this.loadFromEnv();\n\n\t\t// Cache if found\n\t\tif (settings) {\n\t\t\tthis.cachedSettings = settings;\n\t\t}\n\n\t\treturn settings;\n\t}\n\n\t/**\n\t * Try to load settings from file\n\t */\n\tprivate static loadFromFile(): GlobalConfig | undefined {\n\t\tconst possiblePaths = [\n\t\t\tjoin(process.cwd(), \"tryai\", \"settings.ts\"),\n\t\t\tjoin(process.cwd(), \"tryai\", \"settings.js\"),\n\t\t\tjoin(process.cwd(), \".tryai\", \"settings.ts\"),\n\t\t\tjoin(process.cwd(), \".tryai\", \"settings.js\"),\n\t\t];\n\n\t\tfor (const path of possiblePaths) {\n\t\t\tif (existsSync(path)) {\n\t\t\t\ttry {\n\t\t\t\t\t// Clear require cache to get fresh settings\n\t\t\t\t\tdelete require.cache[require.resolve(path)];\n\t\t\t\t\tconst module = require(path);\n\t\t\t\t\treturn module.default || module;\n\t\t\t\t} catch (error) {\n\t\t\t\t\tconsole.warn(`Failed to load settings from ${path}:`, error);\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\treturn undefined;\n\t}\n\n\t/**\n\t * Load settings from environment variables\n\t */\n\tprivate static loadFromEnv(): GlobalConfig | undefined {\n\t\tconst hasAnyApiKey =\n\t\t\tprocess.env.OPENAI_API_KEY ||\n\t\t\tprocess.env.ANTHROPIC_API_KEY ||\n\t\t\tprocess.env.GOOGLE_API_KEY ||\n\t\t\tprocess.env.GEMINI_API_KEY ||\n\t\t\tprocess.env.LLAMA_API_KEY;\n\n\t\tif (!hasAnyApiKey) {\n\t\t\treturn undefined;\n\t\t}\n\n\t\treturn {\n\t\t\tapiKeys: {\n\t\t\t\tope