UNPKG

@continue-reasoning/mini-agent

Version:

A platform-agnostic AI agent framework for building autonomous AI agents with tool execution capabilities

667 lines 31 kB
/** * @fileoverview OpenAI Response API Chat implementation * * This module provides an OpenAI Chat implementation based on the Response API * that follows the IChat interface and integrates with our Agent framework. * It uses OpenAI's Response API for streaming responses which provides more * structured event-based streaming compared to traditional chat completions. * * Key features: * - Response API streaming for structured events * - Event-based response handling * - Integrated token tracking with ITokenTracker * - Dual history system (comprehensive vs curated) * - Platform-agnostic content types * - Robust error handling and validation * - Function calling support with streaming */ import OpenAI from 'openai'; import { TokenTracker } from './tokenTracker'; import { LogLevel, createLogger } from '../logger'; import { convertTypesToLowercase } from '../utils'; // type OpenaiOutputMessage = OpenAI.Responses.ResponseOutputMessage; // type OpenaiUserInputMessage = OpenAI.Responses.ResponseInputItem.Message; /** * OpenAI Response API Chat implementation using our platform-agnostic interfaces * * This class provides streaming chat functionality using OpenAI's Response API * which offers event-based streaming with more structured response handling. * It implements the IChat interface and works with our ConversationContent type * system while interfacing with OpenAI's Response API. * * Key implementation details: * - Uses OpenAI Response API for structured streaming * - Event-based response processing * - Converts between our types and OpenAI's types * - Maintains conversation history in our format * - Provides real-time token tracking * - Supports function calling with proper streaming */ export class OpenAIChatResponse { chatConfig; history = []; tokenTracker; sendPromise = Promise.resolve(); isCurrentlyProcessing = false; openai; logger; lastResponseId = null; // 🔑 NEW: Track previous response for cache optimization enableCacheOptimization = true; // 🔑 NEW: Feature flag for cache optimization constructor(chatConfig) { this.chatConfig = chatConfig; this.chatConfig = chatConfig; this.logger = createLogger('OpenAIChatResponse', { level: LogLevel.INFO }); this.logger.debug(`Initializing OpenAIChatResponse with model: ${chatConfig.modelName}`, 'OpenAIChatResponse.constructor()'); this.openai = new OpenAI({ apiKey: chatConfig.apiKey, }); this.history = [...chatConfig.initialHistory || []]; this.tokenTracker = new TokenTracker(chatConfig.modelName, chatConfig.tokenLimit); } /** * Send a message and get streaming response * * Implements the IChat interface for streaming message sending using Response API. * Converts our ChatMessage format to OpenAI Response API format and processes * the event-based streaming response. * * @param message - Message in our ChatMessage format * @param promptId - Unique identifier for this prompt * @returns AsyncGenerator yielding LLMResponse objects */ async sendMessageStream(message, promptId) { // Return immediately with an AsyncGenerator that handles initialization internally return this.createStreamingResponse(message, promptId); } /** * Create streaming response with internal initialization using Response API * * This method immediately returns an AsyncGenerator and handles all initialization * (connection, auth, retries) internally within the generator. This eliminates * the initial await delay and provides true streaming from the first moment. */ async *createStreamingResponse(message, promptId) { await this.sendPromise; this.isCurrentlyProcessing = true; // 🎯 Add user input to history FIRST for correct ordering this.addHistory(message); // Create a promise to track completion and set it immediately let completionResolve; let completionReject; this.sendPromise = new Promise((resolve, reject) => { completionResolve = resolve; completionReject = reject; }); try { // 🔑 NEW: Determine input strategy based on cache optimization let inputMessages = []; let previousResponseId; if (this.enableCacheOptimization && this.lastResponseId && this.isMultiTurnRequest(message)) { // Cache optimization: Only send incremental content inputMessages = this.buildIncrementalInput(message); previousResponseId = this.lastResponseId; this.logger.info(`Incremental input messages: ${JSON.stringify(inputMessages, null, 2)}`, 'OpenAIChatResponse.createStreamingResponse()'); } else { // Standard: Full history (existing logic) inputMessages = this.buildFullHistoryInput(); } this.logger.info(`Request contains ${inputMessages.length} messages:\n ${JSON.stringify(inputMessages, null, 2)}`, 'OpenAIChatResponse.createStreamingResponse()'); let tools = []; // Add tools if we have tool declarations if (this.chatConfig.toolDeclarations && this.chatConfig.toolDeclarations.length > 0) { tools = this.chatConfig.toolDeclarations.map((tool) => ({ name: tool.name, description: tool.description, parameters: convertTypesToLowercase(tool.parameters), strict: false, type: 'function', })); } this.logger.debug(`Calling OpenAI Response API with model: ${this.chatConfig.modelName}`, 'OpenAIChatResponse.createStreamingResponse()'); let streamResponse; if (this.enableCacheOptimization && previousResponseId && this.isMultiTurnRequest(message)) { // Use chat.completions.create for streaming // Initialize the stream inside the generator - this is where the await happens // But from the caller's perspective, streaming has already beguns this.logger.info(`Using cache optimization with previous_response_id: ${previousResponseId} and inputMessages: ${JSON.stringify(inputMessages, null, 2)}`, 'OpenAIChatResponse.createStreamingResponse()'); streamResponse = await this.openai.responses.create({ model: this.chatConfig.modelName, instructions: this.chatConfig.systemPrompt || null, input: inputMessages, previous_response_id: previousResponseId, // 🔑 NEW: Cache optimization stream: true, store: true, tools: tools, }); } else { streamResponse = await this.openai.responses.create({ model: this.chatConfig.modelName, instructions: this.chatConfig.systemPrompt || null, input: inputMessages, stream: true, store: true, tools: tools, }); } // Now stream the actual responses using event-based processing yield* this.processResponseStreamInternal(streamResponse, message, promptId); // Stream completed successfully completionResolve(); } catch (error) { this.isCurrentlyProcessing = false; this.logger.error(`Error in createStreamingResponse: ${error instanceof Error ? error.message : String(error)}`, 'OpenAIChatResponse.createStreamingResponse()'); completionReject(error); throw error; } } /** * Internal stream processing for Response API events * * This processes the event-based streaming response from the Response API, * handling different event types and converting them to our format. */ async *processResponseStreamInternal(streamResponse, _inputContent, promptId) { const outputContent = []; let errorOccurred = false; let chunkCount = 0; this.logger.debug(`Processing Response API stream for prompt: ${promptId}`, 'OpenAIChatResponse.processResponseStreamInternal()'); /* we deal the function call use the 'response.output_item.add/done' event and the 'response.function_call_arguments.delta/done' event {"type":"response.output_item.added","response_id":"resp_1234xyz","output_index":0,"item":{"type":"function_call","id":"fc_1234xyz","call_id":"call_1234xyz","name":"get_weather","arguments":""}} {"type":"response.function_call_arguments.delta","response_id":"resp_1234xyz","item_id":"fc_1234xyz","output_index":0,"delta":"{\""} {"type":"response.function_call_arguments.delta","response_id":"resp_1234xyz","item_id":"fc_1234xyz","output_index":0,"delta":"location"} {"type":"response.function_call_arguments.delta","response_id":"resp_1234xyz","item_id":"fc_1234xyz","output_index":0,"delta":"\":\""} {"type":"response.function_call_arguments.delta","response_id":"resp_1234xyz","item_id":"fc_1234xyz","output_index":0,"delta":"Paris"} {"type":"response.function_call_arguments.delta","response_id":"resp_1234xyz","item_id":"fc_1234xyz","output_index":0,"delta":","} {"type":"response.function_call_arguments.delta","response_id":"resp_1234xyz","item_id":"fc_1234xyz","output_index":0,"delta":" France"} {"type":"response.function_call_arguments.delta","response_id":"resp_1234xyz","item_id":"fc_1234xyz","output_index":0,"delta":"\"}"} {"type":"response.function_call_arguments.done","response_id":"resp_1234xyz","item_id":"fc_1234xyz","output_index":0,"arguments":"{\"location\":\"Paris, France\"}"} {"type":"response.output_item.done","response_id":"resp_1234xyz","output_index":0,"item":{"type":"function_call","id":"fc_1234xyz","call_id":"call_2345abc","name":"get_weather","arguments":"{\"location\":\"Paris, France\"}"}} */ try { let curChunkFunctionCall; // Extract chunks from output const responseChunkItems = []; // Handle event-based streaming response from the Response API for await (const event of streamResponse) { chunkCount++; if (event.type == 'response.created') { yield { id: event.response.id, type: 'response.start', model: this.chatConfig.modelName, tools: this.chatConfig.toolDeclarations, }; } else if (event.type == 'response.output_item.added') { let chunk = { type: 'response.chunk.added', chunk_id: event.item.id, chunk_idx: event.output_index, }; yield chunk; if (event.item.type == 'function_call') { curChunkFunctionCall = { id: event.item.id || '', call_id: event.item.call_id, name: event.item.name, args: event.item.arguments, // now the arguments is empty }; } continue; } else if (event.type == 'response.output_item.done') { let chunk = { type: 'response.chunk.done', chunk_id: event.item.id, chunk_idx: event.output_index, }; yield chunk; continue; } else if (event.type == 'response.output_text.delta') { let chunk = { type: 'response.chunk.text.delta', chunk_idx: event.output_index, content: { type: 'text', text_delta: event.delta, }, }; responseChunkItems.push(chunk); yield chunk; continue; } else if (event.type == 'response.output_text.done') { let chunk = { type: 'response.chunk.text.done', chunk_idx: event.output_index, content: { type: 'text', text: event.text, }, }; responseChunkItems.push(chunk); // Don't add to history here - let baseAgent manage the correct order yield chunk; } else if (event.type == 'response.reasoning_summary_text.delta') { let chunk = { type: 'response.chunk.thinking.delta', thinking: event.delta, chunk_idx: event.output_index, content: { type: 'text', thinking_delta: event.delta, }, }; responseChunkItems.push(chunk); yield chunk; continue; } else if (event.type == 'response.reasoning_summary_text.done') { let chunk = { type: 'response.chunk.thinking.done', thinking: event.text, chunk_idx: event.output_index, content: { type: 'text', thinking: event.text, }, }; responseChunkItems.push(chunk); // Don't add to history here - let baseAgent manage the correct order yield chunk; continue; } else if (event.type == 'response.function_call_arguments.delta') { if (curChunkFunctionCall) { let chunk = { type: 'response.chunk.function_call.delta', content: { type: 'function_call', functionCall: { id: curChunkFunctionCall.id, call_id: curChunkFunctionCall.call_id, name: curChunkFunctionCall.name, args: event.delta, // This is the delta part }, }, }; responseChunkItems.push(chunk); yield chunk; } else { throw new Error('curChunkFunctionCall is undefined'); } continue; } else if (event.type == 'response.function_call_arguments.done') { if (curChunkFunctionCall) { let chunk = { type: 'response.chunk.function_call.done', content: { type: 'function_call', functionCall: { id: curChunkFunctionCall.id, call_id: curChunkFunctionCall.call_id, name: curChunkFunctionCall.name, args: event.arguments, // Use the complete arguments }, }, }; // Don't add to history here - let baseAgent manage the correct order responseChunkItems.push(chunk); yield chunk; } else { throw new Error('curChunkFunctionCall is undefined'); } curChunkFunctionCall = undefined; continue; } else if (event.type === 'response.completed') { // 🔑 NEW: Store response ID for cache optimization this.lastResponseId = event.response.id; this.logger.debug(`Stored response ID for cache optimization: ${this.lastResponseId}`, 'OpenAIChatResponse.processResponseStreamInternal()'); // Update token tracking if (event.response?.usage) { this.tokenTracker.updateUsage({ inputTokens: event.response.usage.input_tokens || 0, inputTokenDetails: { cachedTokens: event.response.usage.input_tokens_details?.cached_tokens || 0, }, outputTokens: event.response.usage.output_tokens || 0, outputTokenDetails: { reasoningTokens: event.response.usage.output_tokens_details?.reasoning_tokens || 0, }, totalTokens: event.response.usage.total_tokens || ((event.response.usage.input_tokens || 0) + (event.response.usage.output_tokens || 0)), }); } this.logger.info(`LLM Token Usage: ${JSON.stringify(event.response.usage)}`, 'OpenAIChatResponse.processResponseStreamInternal()'); yield { response_id: event.response.id, type: 'response.complete', model: this.chatConfig.modelName, chunks: responseChunkItems, usage: event.response?.usage ? { inputTokens: event.response.usage.input_tokens || 0, inputTokenDetails: { cachedTokens: event.response.usage.input_tokens_details?.cached_tokens || 0, }, outputTokens: event.response.usage.output_tokens || 0, outputTokenDetails: { reasoningTokens: event.response.usage.output_tokens_details?.reasoning_tokens || 0, }, totalTokens: event.response.usage.total_tokens || ((event.response.usage.input_tokens || 0) + (event.response.usage.output_tokens || 0)), } : undefined, previous_response_id: event.response.previous_response_id || '', }; } else if (event.type === 'response.failed') { yield { response_id: event.response.id, type: 'response.failed', model: this.chatConfig.modelName, chunks: responseChunkItems, previous_response_id: event.response.previous_response_id || '', error: { code: event.response.error?.code, message: event.response.error?.message, }, }; } else if (event.type === 'response.incomplete') { yield { response_id: event.response.id, type: 'response.incomplete', model: this.chatConfig.modelName, chunks: responseChunkItems, previous_response_id: event.response.previous_response_id || '', incomplete_details: { reason: event.response.incomplete_details?.reason || 'unknown', }, }; } else { // Handle other event types continue; } } this.logger.debug(`Response API stream processing completed - ${chunkCount} events processed, ${outputContent.length} valid responses`, 'OpenAIChatResponse.processResponseStreamInternal()'); } catch (error) { errorOccurred = true; this.logger.error(`Error processing Response API stream: ${error instanceof Error ? error.message : String(error)}`, 'OpenAIChatResponse.processResponseStreamInternal()'); throw error; } finally { if (!errorOccurred) { // History is now managed by baseAgent for correct ordering // this.logger.debug(`Recording history - input + ${outputContent.length} responses`, 'OpenAIChatResponse.processResponseStreamInternal()'); // this.recordHistory(inputContent, outputContent); } this.isCurrentlyProcessing = false; } } // recordHistory method removed - history is now managed by baseAgent for correct ordering /** * Extract curated history (valid interactions only) */ extractCuratedHistory(history) { const curatedHistory = []; let i = 0; while (i < history.length) { if (history[i].role === 'user') { const userMessage = history[i]; curatedHistory.push(userMessage); i++; // Look for corresponding assistant response const assistantResponses = []; let isValid = true; while (i < history.length && history[i].role === 'assistant') { assistantResponses.push(history[i]); // With the new event system, we don't need content validation i++; } if (isValid && assistantResponses.length > 0) { curatedHistory.push(...assistantResponses); } else { // Remove the corresponding user input if assistant output is invalid curatedHistory.pop(); } } else { // Skip orphaned assistant messages i++; } } return curatedHistory; } /** * Get conversation history */ getHistory(curated = false) { const history = curated ? this.extractCuratedHistory(this.history) : this.history; return structuredClone(history); // Deep copy to prevent external mutations } /** * Clear conversation history */ clearHistory() { this.history = []; this.tokenTracker.reset(); this.lastResponseId = null; } /** * Add content to conversation history */ addHistory(content) { this.history.push(content); } /** * Set entire conversation history */ setHistory(history) { this.history = [...history]; this.tokenTracker.reset(); // Reset token tracking when setting new history } /** * Get current token usage tracking */ getTokenUsage() { return this.tokenTracker.getUsage(); } /** * Get token tracker instance */ getTokenTracker() { return this.tokenTracker; } /** * Check if chat is currently processing a message */ isProcessing() { return this.isCurrentlyProcessing; } /** * Get current model information */ getModelInfo() { return { model: this.chatConfig.modelName, tokenLimit: this.chatConfig.tokenLimit, }; } /** * Set system prompt */ setSystemPrompt(systemPrompt) { this.chatConfig.systemPrompt = systemPrompt; } /** * Get current system prompt */ getSystemPrompt() { return this.chatConfig.systemPrompt; } /** * Handle model fallback */ handleModelFallback(fallbackModel) { try { // Check if API key is available const apiKey = this.chatConfig.apiKey; if (!apiKey || apiKey.trim() === '') { console.warn('No API key available for model fallback'); return false; } // Update model name in config this.chatConfig.modelName = fallbackModel; // Create new OpenAI client instance (API key is same) this.openai = new OpenAI({ apiKey }); return true; } catch (error) { console.warn('Failed to switch to fallback model:', error); return false; } } /** * Get usage summary for debugging */ getUsageSummary() { return this.tokenTracker.getUsageSummary(); } // ============================================================================ // CACHE OPTIMIZATION METHODS - Phase 2 Implementation // ============================================================================ /** * Build incremental input for cache optimization * Filters history to include only current user message and previous turn's function responses */ buildIncrementalInput(currentMessage) { const incrementalHistory = []; // Get current turn number from message const currentTurn = this.getCurrentTurnFromMessage(currentMessage); // Include current user message (e.g., "continue execution") incrementalHistory.push(currentMessage); // Filter and include function responses from previous turn const previousTurnFunctionResponses = this.history.filter(msg => msg.turnIdx === (currentTurn - 1) && msg.content.type === 'function_response'); const previousAssistantMessages = this.history.filter(msg => msg.turnIdx === (currentTurn - 1) && msg.role === 'assistant' && msg.content.type === 'text'); const previousAssistantThinking = this.history.filter(msg => msg.turnIdx === (currentTurn - 1) && msg.role === 'assistant' && msg.content.type === 'thinking'); incrementalHistory.push(...previousAssistantMessages); incrementalHistory.push(...previousAssistantThinking); incrementalHistory.push(...previousTurnFunctionResponses); return incrementalHistory.map(msg => this.convertToProviderMessage(msg)); } /** * Build full history input (standard approach) */ buildFullHistoryInput() { return this.history.map(historyItem => this.convertToProviderMessage(historyItem)); } /** * Check if this is a multi-turn request (continuation turn) */ isMultiTurnRequest(message) { // Check if this is a continuation turn (e.g., "continue execution") return message.content.type === 'text' && message.content.text === 'continue execution'; } /** * Get current turn number from message */ getCurrentTurnFromMessage(message) { return message.turnIdx || message.metadata?.turn || 1; } /** * Enable cache optimization feature * This is a feature flag to control cache optimization rollout */ enableCacheOptimizationFeature() { this.enableCacheOptimization = true; this.logger.info('Cache optimization enabled', 'OpenAIChatResponse.enableCacheOptimizationFeature()'); } /** * Disable cache optimization feature */ disableCacheOptimizationFeature() { this.enableCacheOptimization = false; this.logger.info('Cache optimization disabled', 'OpenAIChatResponse.disableCacheOptimizationFeature()'); } // ============================================================================ // TYPE CONVERSION METHODS // ============================================================================ /** * Convert our content format to OpenAI's format */ convertToProviderMessage(message) { if (message.content.type === 'text') { return { role: message.role, content: message.content.text, }; } else if (message.content.type === 'function_call') { if (!message.content.functionCall) { throw new Error('Function call is undefined'); } if (message.role !== 'assistant') { throw new Error('Function call is not allowed for user role'); } let content = { type: 'function_call', id: message.content.functionCall.id, call_id: message.content.functionCall.call_id, name: message.content.functionCall.name, arguments: message.content.functionCall.args, }; this.logger.debug(`OpenaiFunctionCall: ${JSON.stringify(content)}`, 'OpenAIChatResponse.convertToProviderMessage()'); return content; } else if (message.content.type === 'function_response') { if (!message.content.functionResponse) { throw new Error('Function response is undefined'); } if (message.role !== 'user') { throw new Error('Function response is not allowed for assistant role'); } let content = { type: 'function_call_output', call_id: message.content.functionResponse.call_id, output: message.content.functionResponse.result, }; this.logger.debug(`OpenaiFunctionCallOutput: ${JSON.stringify(content)}`, 'OpenAIChatResponse.convertToProviderMessage()'); return content; } else { throw new Error(`Unsupported content type: ${message.content.type}`); } } /** * Convert chunk items to message item for history * @param chunk Chunk to convert * @param role Role for the resulting message * @returns Message item for adding to history */ convertFromChunkItems(chunk, role) { // Use the unified content structure from the chunk // The content is already in the correct ContentPart format return { role: role, content: chunk.content, }; } } //# sourceMappingURL=openaiChat.js.map