UNPKG

@llumiverse/drivers

Version:

LLM driver implementations. Currently supported are: openai, huggingface, bedrock, replicate.

1,271 lines (1,113 loc) 51.8 kB
import { AbstractDriver, AIModel, Completion, CompletionChunkObject, CompletionResult, DataSource, DriverOptions, EmbeddingsOptions, EmbeddingsResult, ExecutionOptions, ExecutionTokenUsage, getConversationMeta, getModelCapabilities, incrementConversationTurn, JSONSchema, LlumiverseError, LlumiverseErrorContext, modelModalitiesToArray, ModelType, OpenAiDalleOptions, OpenAiGptImageOptions, Providers, stripBase64ImagesFromConversation, stripHeartbeatsFromConversation, supportsToolUse, ToolDefinition, ToolUse, TrainingJob, TrainingJobStatus, TrainingOptions, TrainingPromptOptions, truncateLargeTextInConversation, unwrapConversationArray, } from "@llumiverse/core"; import OpenAI, { AzureOpenAI } from "openai"; import { APIConnectionError, APIConnectionTimeoutError, APIError, AuthenticationError, BadRequestError, ConflictError, ContentFilterFinishReasonError, InternalServerError, LengthFinishReasonError, NotFoundError, OpenAIError, PermissionDeniedError, RateLimitError, UnprocessableEntityError, } from 'openai/error'; import { formatOpenAILikeMultimodalPrompt } from "./openai_format.js"; // Response API types type ResponseInputItem = OpenAI.Responses.ResponseInputItem; type EasyInputMessage = OpenAI.Responses.EasyInputMessage; // Helper function to convert string to CompletionResult[] function textToCompletionResult(text: string): CompletionResult[] { return text ? [{ type: "text", value: text }] : []; } function isOpenAIReasoningModel(model: string): boolean { const normalized = model.toLowerCase(); return normalized.includes("o1") || normalized.includes("o3") || normalized.includes("o4") || normalized.includes("gpt-5"); } function isGpt5ProModel(model: string): boolean { const modelName = model.toLowerCase().split('/').pop() ?? model.toLowerCase(); return /^gpt-5(?:\.\d+)?-pro/.test(modelName); } function openAIReasoningEffort(model: string, effort: string | undefined): "low" | "medium" | "high" | undefined { if (!effort || !isOpenAIReasoningModel(model)) { return undefined; } if (isGpt5ProModel(model)) { return "high"; } return effort === "low" || effort === "medium" || effort === "high" ? effort : undefined; } //TODO: Do we need a list?, replace with if statements and modernize? const supportFineTunning = new Set([ "gpt-3.5-turbo-1106", "gpt-3.5-turbo-0613", "babbage-002", "davinci-002", "gpt-4-0613" ]); export interface BaseOpenAIDriverOptions extends DriverOptions { } export abstract class BaseOpenAIDriver extends AbstractDriver< BaseOpenAIDriverOptions, ResponseInputItem[] > { abstract provider: Providers.openai | Providers.azure_openai | Providers.xai | Providers.azure_foundry | Providers.openai_compatible; abstract service: OpenAI | AzureOpenAI; constructor(opts: BaseOpenAIDriverOptions) { super(opts); this.formatPrompt = formatOpenAILikeMultimodalPrompt; } extractDataFromResponse( _options: ExecutionOptions, result: OpenAI.Responses.Response ): Completion { const tokenInfo = mapUsage(result.usage); const tools = collectTools(result.output); // Collect all parts in order (text and images) const allResults = extractCompletionResults(result.output); if (allResults.length === 0 && !tools) { this.logger.error({ result }, "[OpenAI] Response is not valid"); throw new Error("Response is not valid: no data"); } return { result: allResults, token_usage: tokenInfo, finish_reason: responseFinishReason(result, tools), tool_use: tools, }; } async requestTextCompletionStream(prompt: ResponseInputItem[], options: ExecutionOptions): Promise<AsyncIterable<CompletionChunkObject>> { if (options.model_options?._option_id !== undefined && options.model_options?._option_id !== "openai-text" && options.model_options?._option_id !== "openai-thinking" && options.model_options?._option_id !== "text-fallback") { this.logger.debug({ options: options.model_options }, "Unexpected option id"); } // Include conversation history (same as non-streaming) // Fix orphaned function_call items (can occur when agent is stopped mid-tool-execution) let conversation = fixOrphanedToolUse(updateConversation(options.conversation, prompt)); const toolDefs = getToolDefinitions(options.tools); const useTools: boolean = toolDefs ? supportsToolUse(options.model, this.provider, true) : false; // When no tools are provided but conversation contains function_call/function_call_output // items (e.g. checkpoint summary calls), convert them to text to avoid API errors if (!useTools) { conversation = convertOpenAIFunctionItemsToText(conversation); } convertRoles(prompt, options.model); const model_options = options.model_options as any; insert_image_detail(prompt, model_options?.image_detail ?? "auto"); let parsedSchema: JSONSchema | undefined = undefined; let strictMode = false; if (options.result_schema && supportsSchema(options.model)) { try { parsedSchema = openAISchemaFormat(options.result_schema); strictMode = true; } catch (e) { parsedSchema = limitedSchemaFormat(options.result_schema); strictMode = false; } } const isReasoningModel = isOpenAIReasoningModel(options.model); const effort = openAIReasoningEffort(options.model, model_options?.effort ?? model_options?.reasoning_effort); const reasoning = effort ? { effort } : undefined; const stream = await this.service.responses.create({ stream: true, model: options.model, input: conversation, reasoning, temperature: isReasoningModel ? undefined : model_options?.temperature, top_p: isReasoningModel ? undefined : model_options?.top_p, max_output_tokens: model_options?.max_tokens, tools: useTools ? toolDefs : undefined, text: parsedSchema ? { format: { type: "json_schema", name: "format_output", schema: parsedSchema, strict: strictMode, } } : undefined, }); return mapResponseStream(stream); } async requestTextCompletion(prompt: ResponseInputItem[], options: ExecutionOptions): Promise<Completion> { if (options.model_options?._option_id !== undefined && options.model_options?._option_id !== "openai-text" && options.model_options?._option_id !== "openai-thinking") { this.logger.debug({ options: options.model_options }, "Unexpected option id"); } convertRoles(prompt, options.model); const model_options = options.model_options as any; insert_image_detail(prompt, model_options?.image_detail ?? "auto"); const toolDefs = getToolDefinitions(options.tools); const useTools: boolean = toolDefs ? supportsToolUse(options.model, this.provider) : false; // Fix orphaned function_call items (can occur when agent is stopped mid-tool-execution) let conversation = fixOrphanedToolUse(updateConversation(options.conversation, prompt)); // When no tools are provided but conversation contains function_call/function_call_output // items (e.g. checkpoint summary calls), convert them to text to avoid API errors if (!useTools) { conversation = convertOpenAIFunctionItemsToText(conversation); } let parsedSchema: JSONSchema | undefined = undefined; let strictMode = false; if (options.result_schema && supportsSchema(options.model)) { try { parsedSchema = openAISchemaFormat(options.result_schema); strictMode = true; } catch (e) { parsedSchema = limitedSchemaFormat(options.result_schema); strictMode = false; } } const isReasoningModel = isOpenAIReasoningModel(options.model); const effort = openAIReasoningEffort(options.model, model_options?.effort ?? model_options?.reasoning_effort); const reasoning = effort ? { effort } : undefined; const res = await this.service.responses.create({ stream: false, model: options.model, input: conversation, reasoning, temperature: isReasoningModel ? undefined : model_options?.temperature, top_p: isReasoningModel ? undefined : model_options?.top_p, max_output_tokens: model_options?.max_tokens, //TODO: use max_tokens for older models, currently relying on OpenAI to handle it tools: useTools ? toolDefs : undefined, text: parsedSchema ? { format: { type: "json_schema", name: "format_output", schema: parsedSchema, strict: strictMode, } } : undefined, }); const completion = this.extractDataFromResponse(options, res); if (options.include_original_response) { completion.original_response = res; } conversation = updateConversation(conversation, createAssistantMessageFromCompletion(completion)); // Increment turn counter for deferred stripping conversation = incrementConversationTurn(conversation) as ResponseInputItem[]; // Strip large base64 image data based on options.stripImagesAfterTurns const currentTurn = getConversationMeta(conversation).turnNumber; const stripOptions = { keepForTurns: options.stripImagesAfterTurns ?? Infinity, currentTurn, textMaxTokens: options.stripTextMaxTokens }; let processedConversation = stripBase64ImagesFromConversation(conversation, stripOptions); // Truncate large text content if configured processedConversation = truncateLargeTextInConversation(processedConversation, stripOptions); // Strip old heartbeat status messages processedConversation = stripHeartbeatsFromConversation(processedConversation, { keepForTurns: options.stripHeartbeatsAfterTurns ?? 1, currentTurn, }); completion.conversation = processedConversation; return completion; } protected canStream(_options: ExecutionOptions): Promise<boolean> { // Image generation models don't support streaming if (_options.model.includes("dall-e") || _options.model.includes("gpt-image") || _options.model.includes("chatgpt-image")) { return Promise.resolve(false); } if (_options.model.includes("o1") && !(_options.model.includes("mini") || _options.model.includes("preview"))) { //o1 full does not support streaming //TODO: Update when OpenAI adds support for streaming, last check 16/02/2025 return Promise.resolve(false); } return Promise.resolve(true); } /** * Build conversation context after streaming completion. * Reconstructs the assistant message from accumulated results and applies stripping. */ buildStreamingConversation( prompt: ResponseInputItem[], result: unknown[], toolUse: unknown[] | undefined, options: ExecutionOptions ): ResponseInputItem[] | undefined { // Build assistant message from accumulated CompletionResult[] const completionResults = result as CompletionResult[]; const textContent = completionResultsToText(completionResults); // Start with the conversation from options or the prompt let conversation = updateConversation(options.conversation, prompt); // Add assistant message as EasyInputMessage if (textContent) { const assistantMessage: EasyInputMessage = { role: 'assistant', content: textContent, }; conversation = updateConversation(conversation, [assistantMessage]); } // Add function calls as separate items (Response API format) if (toolUse && toolUse.length > 0) { const functionCalls: OpenAI.Responses.ResponseFunctionToolCall[] = (toolUse as ToolUse[]).map(t => ({ type: 'function_call' as const, call_id: t.id, name: t.tool_name, arguments: typeof t.tool_input === 'string' ? t.tool_input : JSON.stringify(t.tool_input ?? {}), })); conversation = updateConversation(conversation, functionCalls); } // Increment turn counter conversation = incrementConversationTurn(conversation) as ResponseInputItem[]; // Apply stripping based on options const currentTurn = getConversationMeta(conversation).turnNumber; const stripOptions = { keepForTurns: options.stripImagesAfterTurns ?? Infinity, currentTurn, textMaxTokens: options.stripTextMaxTokens }; let processedConversation = stripBase64ImagesFromConversation(conversation, stripOptions); processedConversation = truncateLargeTextInConversation(processedConversation, stripOptions); processedConversation = stripHeartbeatsFromConversation(processedConversation, { keepForTurns: options.stripHeartbeatsAfterTurns ?? 1, currentTurn, }); return processedConversation as ResponseInputItem[]; } createTrainingPrompt(options: TrainingPromptOptions): Promise<string> { if (options.model.includes("gpt")) { return super.createTrainingPrompt(options); } else { // babbage, davinci not yet implemented throw new Error("Unsupported model for training: " + options.model); } } async startTraining(dataset: DataSource, options: TrainingOptions): Promise<TrainingJob> { const url = await dataset.getURL(); const file = await this.service.files.create({ file: await fetch(url), purpose: "fine-tune", }); const job = await this.service.fineTuning.jobs.create({ training_file: file.id, model: options.model, hyperparameters: options.params }) return jobInfo(job); } async cancelTraining(jobId: string): Promise<TrainingJob> { const job = await this.service.fineTuning.jobs.cancel(jobId); return jobInfo(job); } async getTrainingJob(jobId: string): Promise<TrainingJob> { const job = await this.service.fineTuning.jobs.retrieve(jobId); return jobInfo(job); } // ========= management API ============= async validateConnection(): Promise<boolean> { try { await this.service.models.list(); return true; } catch (error) { return false; } } listTrainableModels(): Promise<AIModel<string>[]> { return this._listModels((m) => supportFineTunning.has(m.id)); } async listModels(): Promise<AIModel[]> { return this._listModels(); } async _listModels(filter?: (m: OpenAI.Models.Model) => boolean): Promise<AIModel[]> { let result = (await this.service.models.list()).data; //Some of these use the completions API instead of the chat completions API. //Others are for non-text input modalities. Therefore common to both. const wordBlacklist = ["embed", "whisper", "transcribe", "audio", "moderation", "tts", "realtime", "babbage", "davinci", "codex", "o1-pro", "computer-use", "sora"]; //OpenAI has very little information, filtering based on name. result = result.filter((m) => { return !wordBlacklist.some((word) => m.id.includes(word)); }); const models = filter ? result.filter(filter) : result; const aiModels = models.map((m) => { const modelCapability = getModelCapabilities(m.id, "openai"); let owner = m.owned_by; if (owner == "system") { owner = "openai"; } // Determine model type based on capabilities let modelType = ModelType.Text; if (m.id.includes("dall-e") || m.id.includes("gpt-image")) { modelType = ModelType.Image; } return { id: m.id, name: m.id, provider: this.provider, owner: owner, type: modelType, input_modalities: modelModalitiesToArray(modelCapability.input), output_modalities: modelModalitiesToArray(modelCapability.output), tool_support: modelCapability.tool_support, } satisfies AIModel<string>; }).sort((a, b) => a.id.localeCompare(b.id)); return aiModels; } async generateEmbeddings({ text, image, model = "text-embedding-3-small" }: EmbeddingsOptions): Promise<EmbeddingsResult> { if (image) { throw new Error("Image embeddings not supported by OpenAI"); } if (!text) { throw new Error("No text provided"); } const res = await this.service.embeddings.create({ input: text, model: model, }); const embeddings = res.data[0].embedding; if (!embeddings || embeddings.length === 0) { throw new Error("No embedding found"); } return { values: embeddings, model } satisfies EmbeddingsResult; } imageModels = ["dall-e", "gpt-image", "chatgpt-image"]; /** * Determine if a model is specifically an image generation model (not conversational image model) */ isImageModel(model: string): boolean { // DALL-E models are standalone image generation // gpt-image models can generate images in conversations, not standalone return this.imageModels.some(imageModel => model.includes(imageModel)); } /** * Request image generation from standalone Images API * Supports: DALL-E 2, DALL-E 3, GPT-image models (for edit/variation) */ async requestImageGeneration(prompt: ResponseInputItem[], options: ExecutionOptions): Promise<Completion> { this.logger.debug(`[${this.provider}] Generating image with model ${options.model}`); const model_options = options.model_options as OpenAiDalleOptions | OpenAiGptImageOptions | undefined; // Extract prompt text from ResponseInputItem[] let promptText = ""; for (const item of prompt) { if ('content' in item && typeof item.content === 'string') { promptText += item.content + "\\n"; } else if ('content' in item && Array.isArray(item.content)) { // Extract text from content array for (const part of item.content) { if ('type' in part && part.type === 'input_text' && 'text' in part) { promptText += part.text + "\\n"; } } } } promptText = promptText.trim(); try { const generateParams: OpenAI.Images.ImageGenerateParamsNonStreaming = { model: options.model, prompt: promptText, size: model_options?.size || "1024x1024", }; // Add DALL-E specific options if (options.model.includes("dall-e") || model_options?._option_id === "openai-dalle") { const dalleOptions = model_options as OpenAiDalleOptions | undefined; generateParams.n = dalleOptions?.n || 1; generateParams.response_format = dalleOptions?.response_format || "b64_json"; if (options.model.includes("dall-e-3")) { generateParams.quality = dalleOptions?.image_quality || "standard"; if (dalleOptions?.style) { generateParams.style = dalleOptions.style; } } } else { // Default for other models generateParams.n = 1; } const response = await this.service.images.generate(generateParams); // Convert response to CompletionResults const results: CompletionResult[] = []; if (response.data) { for (const image of response.data) { let imageValue: string; if (image.b64_json) { // Base64 format imageValue = `data:image/png;base64,${image.b64_json}`; } else if (image.url) { // URL format imageValue = image.url; } else { continue; } results.push({ type: "image", value: imageValue }); } } return { result: results }; } catch (error: any) { this.logger.error({ error }, `[${this.provider}] Image generation failed`); return { result: [], error: { message: error.message, code: error.code || 'GENERATION_FAILED' } }; } } /** * Format OpenAI API errors into LlumiverseError with proper status codes and retryability. * * OpenAI API errors have a specific structure: * - APIError.status: HTTP status code (400, 401, 403, 404, 409, 422, 429, 500+) * - APIError.error: Error object with type, message, param, code * - APIError.requestID: Request ID for support * - APIError.code: Error code (e.g., 'invalid_api_key', 'rate_limit_exceeded') * - APIError.param: Parameter that caused the error (optional) * - APIError.type: Error type (optional) * * Common error types: * - BadRequestError (400): Invalid request parameters * - AuthenticationError (401): Invalid API key * - PermissionDeniedError (403): Insufficient permissions * - NotFoundError (404): Resource not found * - ConflictError (409): Resource conflict * - UnprocessableEntityError (422): Validation error * - RateLimitError (429): Rate limit exceeded * - InternalServerError (500+): Server-side errors * - APIConnectionError: Connection issues (no status code) * - APIConnectionTimeoutError: Request timeout (no status code) * - LengthFinishReasonError: Response truncated due to length * - ContentFilterFinishReasonError: Content filtered * * This implementation works for: * - OpenAI API * - Azure OpenAI * - xAI (uses OpenAI-compatible API) * - Azure Foundry (OpenAI-compatible) * - Other OpenAI-compatible APIs * * @see https://platform.openai.com/docs/guides/error-codes */ public formatLlumiverseError( error: unknown, context: LlumiverseErrorContext ): LlumiverseError { // Check if it's an OpenAI API error const isOpenAIError = this.isOpenAIApiError(error); if (!isOpenAIError) { // Not an OpenAI API error, use default handling throw error; } const apiError = error as APIError; const httpStatusCode = apiError.status; // Extract error message const message = apiError.message || String(error); // Extract additional error details (only available on APIError) const errorCode = apiError.code; const errorParam = apiError.param; const errorType = apiError.type; // Build user-facing message with status code let userMessage = message; // Include status code in message (for end-user visibility) if (httpStatusCode) { userMessage = `[${httpStatusCode}] ${userMessage}`; } // Add error code if available and not already in message if (errorCode && !userMessage.includes(errorCode)) { userMessage += ` (code: ${errorCode})`; } // Add parameter info if available and helpful if (errorParam && !userMessage.toLowerCase().includes(errorParam.toLowerCase())) { userMessage += ` [param: ${errorParam}]`; } // Add request ID if available (useful for OpenAI support) if (apiError.requestID) { userMessage += ` (Request ID: ${apiError.requestID})`; } // Determine retryability based on OpenAI error types const retryable = this.isOpenAIErrorRetryable(error, httpStatusCode, errorCode, errorType); // Use the error constructor name as the error name const errorName = error.constructor?.name || 'OpenAIError'; return new LlumiverseError( `[${context.provider}] ${userMessage}`, retryable, context, error, httpStatusCode, errorName ); } /** * Type guard to check if error is an OpenAI API error or OpenAI-specific error. */ private isOpenAIApiError(error: unknown): error is APIError | OpenAIError { return ( error !== null && typeof error === 'object' && (error instanceof APIError || error instanceof OpenAIError) ); } /** * Determine if an OpenAI API error is retryable. * * Retryable errors: * - RateLimitError (429): Rate limit exceeded, retry with backoff * - InternalServerError (500+): Server-side errors * - APIConnectionTimeoutError: Request timeout * - Error codes: 'timeout', 'server_error', 'service_unavailable' * - Status codes: 408, 429, 502, 503, 504, 529, 5xx * * Non-retryable errors: * - BadRequestError (400): Invalid request parameters * - AuthenticationError (401): Invalid API key * - PermissionDeniedError (403): Insufficient permissions * - NotFoundError (404): Resource not found * - ConflictError (409): Resource conflict * - UnprocessableEntityError (422): Validation error * - LengthFinishReasonError: Length limit reached * - ContentFilterFinishReasonError: Content filtered * - Error codes: 'invalid_api_key', 'invalid_request_error', 'model_not_found' * - Other 4xx client errors * * @param error - The error object * @param httpStatusCode - The HTTP status code if available * @param errorCode - The error code if available * @param errorType - The error type if available * @returns True if retryable, false if not retryable, undefined if unknown */ private isOpenAIErrorRetryable( error: unknown, httpStatusCode: number | undefined, errorCode: string | null | undefined, errorType: string | undefined ): boolean | undefined { // Check specific OpenAI error types by class if (error instanceof RateLimitError) return true; if (error instanceof InternalServerError) return true; if (error instanceof APIConnectionTimeoutError) return true; // Non-retryable by error type if (error instanceof BadRequestError) return false; if (error instanceof AuthenticationError) return false; if (error instanceof PermissionDeniedError) return false; if (error instanceof NotFoundError) return false; if (error instanceof ConflictError) return false; if (error instanceof UnprocessableEntityError) return false; if (error instanceof LengthFinishReasonError) return false; if (error instanceof ContentFilterFinishReasonError) return false; // Check error codes (OpenAI specific) if (errorCode) { // Retryable error codes if (errorCode === 'timeout') return true; if (errorCode === 'server_error') return true; if (errorCode === 'service_unavailable') return true; if (errorCode === 'rate_limit_exceeded') return true; // Non-retryable error codes if (errorCode === 'invalid_api_key') return false; if (errorCode === 'invalid_request_error') return false; if (errorCode === 'model_not_found') return false; if (errorCode === 'insufficient_quota') return false; if (errorCode === 'invalid_model') return false; if (errorCode.includes('invalid_')) return false; } // Check error type if (errorType === 'invalid_request_error') return false; if (errorType === 'authentication_error') return false; // Use HTTP status code if (httpStatusCode !== undefined) { if (httpStatusCode === 429) return true; // Rate limit if (httpStatusCode === 408) return true; // Request timeout if (httpStatusCode === 502) return true; // Bad gateway if (httpStatusCode === 503) return true; // Service unavailable if (httpStatusCode === 504) return true; // Gateway timeout if (httpStatusCode === 529) return true; // Overloaded if (httpStatusCode >= 500 && httpStatusCode < 600) return true; // Server errors if (httpStatusCode >= 400 && httpStatusCode < 500) return false; // Client errors } // Connection errors without status codes if (error instanceof APIConnectionError && !(error instanceof APIConnectionTimeoutError)) { // Generic connection errors might be retryable (network issues) return true; } // Unknown error type - let consumer decide retry strategy return undefined; } } function jobInfo(job: OpenAI.FineTuning.Jobs.FineTuningJob): TrainingJob { //validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`. const jobStatus = job.status; let status = TrainingJobStatus.running; let details: string | undefined; if (jobStatus === 'succeeded') { status = TrainingJobStatus.succeeded; } else if (jobStatus === 'failed') { status = TrainingJobStatus.failed; details = job.error ? `${job.error.code} - ${job.error.message} ${job.error.param ? " [" + job.error.param + "]" : ""}` : "error"; } else if (jobStatus === 'cancelled') { status = TrainingJobStatus.cancelled; } else { status = TrainingJobStatus.running; details = jobStatus; } return { id: job.id, model: job.fine_tuned_model || undefined, status, details } } function mapUsage(usage?: OpenAI.Responses.ResponseUsage | null): ExecutionTokenUsage | undefined { if (!usage) { return undefined; } return { prompt: usage.input_tokens, result: usage.output_tokens, total: usage.total_tokens, prompt_cached: usage.input_tokens_details?.cached_tokens ?? undefined, prompt_new: usage.input_tokens - (usage.input_tokens_details?.cached_tokens ?? 0), }; } function completionResultsToText(completionResults: CompletionResult[] | undefined): string { if (!completionResults) { return ''; } return completionResults .map(r => { switch (r.type) { case 'text': return r.value; case 'json': return typeof r.value === 'string' ? r.value : JSON.stringify(r.value); case 'image': // Skip images in conversation - they're in the result return ''; default: return String((r as any).value || ''); } }) .join(''); } function createAssistantMessageFromCompletion(completion: Completion): ResponseInputItem[] { const textContent = completionResultsToText(completion.result); const result: ResponseInputItem[] = []; // Add assistant text message if present if (textContent) { const assistantMessage: EasyInputMessage = { role: 'assistant', content: textContent, }; result.push(assistantMessage); } // Add function calls as separate items (Response API format) if (completion.tool_use && completion.tool_use.length > 0) { for (const t of completion.tool_use) { const functionCall: OpenAI.Responses.ResponseFunctionToolCall = { type: 'function_call', call_id: t.id, name: t.tool_name, arguments: typeof t.tool_input === 'string' ? t.tool_input : JSON.stringify(t.tool_input ?? {}), }; result.push(functionCall); } } return result; } export function mapResponseStream(stream: AsyncIterable<OpenAI.Responses.ResponseStreamEvent>): AsyncIterable<CompletionChunkObject> { const toolCallMetadata = new Map<string, { syntheticId: string, callId?: string, name?: string }>(); return { async *[Symbol.asyncIterator]() { for await (const event of stream) { if (event.type === 'response.output_item.added' && event.item.type === 'function_call') { const syntheticId = `tool_${event.output_index}`; const callId = event.item.call_id ?? event.item.id; const metadata = { syntheticId, callId, name: event.item.name }; if (event.item.id) { toolCallMetadata.set(event.item.id, metadata); } if (event.item.call_id) { toolCallMetadata.set(event.item.call_id, metadata); } const toolUse: ToolUse & { _actual_id?: string } = { id: syntheticId, _actual_id: callId, tool_name: event.item.name, tool_input: '' as any, }; yield { result: [], tool_use: [toolUse], } satisfies CompletionChunkObject; } else if (event.type === 'response.function_call_arguments.delta') { const metadata = toolCallMetadata.get(event.item_id); const syntheticId = metadata?.syntheticId ?? `tool_${event.output_index}`; const callId = metadata?.callId ?? event.item_id; const toolUse: ToolUse & { _actual_id?: string } = { id: syntheticId, _actual_id: callId, tool_name: metadata?.name ?? '', tool_input: event.delta as any, }; yield { result: [], tool_use: [toolUse], } satisfies CompletionChunkObject; } // Note: We don't emit response.function_call_arguments.done because the arguments were already // streamed via delta events. Emitting it again would duplicate the tool_input content. // We only update the metadata to ensure the tool name is captured. else if (event.type === 'response.function_call_arguments.done') { // Just update metadata, don't yield (arguments already accumulated from delta events) const metadata = toolCallMetadata.get(event.item_id); const syntheticId = metadata?.syntheticId ?? `tool_${event.output_index}`; const tool_name = metadata?.name ?? event.name ?? ''; if (event.item_id) { toolCallMetadata.set(event.item_id, { syntheticId, callId: metadata?.callId, name: tool_name }); } } else if (event.type === 'response.output_text.delta') { yield { result: textToCompletionResult(event.delta), } satisfies CompletionChunkObject; } // Note: We don't emit response.output_text.done because the text was already // streamed via delta events. Emitting it again would duplicate the content. else if (event.type === 'response.completed' || event.type === 'response.incomplete' || event.type === 'response.failed') { const finalTools = collectTools(event.response.output); yield { result: [], finish_reason: responseFinishReason(event.response, finalTools), token_usage: mapUsage(event.response.usage), } satisfies CompletionChunkObject; } } } }; } function insert_image_detail(items: ResponseInputItem[], detail_level: string): ResponseInputItem[] { if (detail_level === "auto" || detail_level === "low" || detail_level === "high") { for (const item of items) { // Check if it's an EasyInputMessage or Message with content array if ('role' in item && 'content' in item && item.role !== 'assistant') { const content = (item as EasyInputMessage).content; if (Array.isArray(content)) { for (const part of content) { if (typeof part === 'object' && part.type === 'input_image') { (part as any).detail = detail_level; } } } } } } return items; } function convertRoles(items: ResponseInputItem[], model: string): ResponseInputItem[] { //New openai models use developer role instead of system if (model.includes("o1") || model.includes("o3")) { if (model.includes("o1-mini") || model.includes("o1-preview")) { //o1-mini and o1-preview support neither system nor developer for (const item of items) { if ('role' in item && (item as EasyInputMessage).role === 'system') { (item as any).role = 'user'; } } } else { //Models newer than o1 use developer role for (const item of items) { if ('role' in item && (item as EasyInputMessage).role === 'system') { (item as any).role = 'developer'; } } } } return items; } //Structured output support is typically aligned with tool use support //Not true for realtime models, which do not support structured output, but do support tool use. function supportsSchema(model: string): boolean { const realtimeModel = model.includes("realtime"); if (realtimeModel) { return false; } return supportsToolUse(model, "openai"); } /** * Converts function_call and function_call_output items to text messages in OpenAI conversation. * Preserves tool call information while removing structured items that require * tools to be defined in the API request. */ export function convertOpenAIFunctionItemsToText(items: ResponseInputItem[]): ResponseInputItem[] { const hasFunctionItems = items.some(item => { const type = (item as any).type; return type === 'function_call' || type === 'function_call_output'; }); if (!hasFunctionItems) return items; return items.map(item => { const typed = item as any; if (typed.type === 'function_call') { const argsStr = typed.arguments || ''; const truncated = argsStr.length > 500 ? argsStr.substring(0, 500) + '...' : argsStr; return { role: 'assistant' as const, content: `[Tool call: ${typed.name}(${truncated})]`, }; } if (typed.type === 'function_call_output') { const output = typed.output || 'No output'; const truncated = output.length > 500 ? output.substring(0, 500) + '...' : output; return { role: 'user' as const, content: `[Tool result: ${truncated}]`, }; } return item; }); } function getToolDefinitions(tools: ToolDefinition[] | undefined | null): OpenAI.Responses.Tool[] | undefined { return tools ? tools.map(getToolDefinition) : undefined; } function getToolDefinition(toolDef: ToolDefinition): OpenAI.Responses.FunctionTool { let parsedSchema: JSONSchema | undefined = undefined; let strictMode = false; if (toolDef.input_schema) { try { //TODO: type assertion here is not safe, does not work with satisfies parsedSchema = openAISchemaFormat(toolDef.input_schema as JSONSchema); strictMode = true; } catch (e) { //TODO: type assertion here is not safe, does not work with satisfies parsedSchema = limitedSchemaFormat(toolDef.input_schema as JSONSchema); strictMode = false; } } return { type: "function", name: toolDef.name, description: toolDef.description, parameters: parsedSchema ?? null, strict: strictMode, }; } function updateConversation(conversation: unknown, items: ResponseInputItem[]): ResponseInputItem[] { if (!items) { // Unwrap array if wrapped, otherwise treat as array const unwrapped = unwrapConversationArray<ResponseInputItem>(conversation); return unwrapped ?? (conversation as ResponseInputItem[] || []); } if (!conversation) { return items; } // Unwrap array if wrapped, otherwise treat as array const unwrapped = unwrapConversationArray<ResponseInputItem>(conversation); const convArray = unwrapped ?? (conversation as ResponseInputItem[]); return [...convArray, ...items]; } export function collectTools(output?: OpenAI.Responses.ResponseOutputItem[]): ToolUse[] | undefined { if (!output) { return undefined; } const tools: ToolUse[] = []; for (const item of output) { if (item.type === 'function_call') { const id = item.call_id || item.id; if (!id) { continue; } tools.push({ id, tool_name: item.name ?? '', tool_input: safeJsonParse(item.arguments), }); } } return tools.length > 0 ? tools : undefined; } /** * Collect all parts (text and images) from response output in order. * This preserves the original ordering of text and image parts. */ function extractCompletionResults(output?: OpenAI.Responses.ResponseOutputItem[]): CompletionResult[] { if (!output) { return []; } const results: CompletionResult[] = []; for (const item of output) { if (item.type === 'message') { // Extract text from message content for (const part of item.content) { if (part.type === 'output_text' && part.text) { results.push({ type: "text", value: part.text }); } } } else if (item.type === 'image_generation_call' && 'result' in item && item.result) { // GPT-image models return base64 encoded images in result field const base64Data = item.result; // Format as data URL for consistency with other image outputs const imageUrl = base64Data.startsWith('data:') ? base64Data : `data:image/png;base64,${base64Data}`; results.push({ type: "image", value: imageUrl }); } } return results; } //For strict mode false function limitedSchemaFormat(schema: JSONSchema): JSONSchema { const formattedSchema = { ...schema }; // Defaults not supported delete formattedSchema.default; // OpenAI requires type field even in non-strict mode // If no type is specified, default to 'object' for properties with format/editor hints, // otherwise 'string' as a safe fallback if (!formattedSchema.type && formattedSchema.description) { // Properties with format: "document" or editor hints are typically objects if (formattedSchema.format === 'document' || formattedSchema.editor) { formattedSchema.type = 'object'; } else { formattedSchema.type = 'string'; } } if (formattedSchema?.properties) { // Process each property recursively for (const propName of Object.keys(formattedSchema.properties)) { const property = formattedSchema.properties[propName]; // Recursively process properties formattedSchema.properties[propName] = limitedSchemaFormat(property); // Process arrays with items of type object if (property?.type === 'array' && property.items && property.items?.type === 'object') { formattedSchema.properties[propName] = { ...property, items: limitedSchemaFormat(property.items), }; } } } return formattedSchema; } //For strict mode true function openAISchemaFormat(schema: JSONSchema, nesting: number = 0): JSONSchema { if (nesting > 5) { throw new Error("OpenAI schema nesting too deep"); } const formattedSchema = { ...schema }; // Defaults not supported delete formattedSchema.default; // Additional properties not supported, required to be set. if (formattedSchema?.type === "object") { formattedSchema.additionalProperties = false; } if (formattedSchema?.properties) { // Set all properties as required formattedSchema.required = Object.keys(formattedSchema.properties); // Process each property recursively for (const propName of Object.keys(formattedSchema.properties)) { const property = formattedSchema.properties[propName]; // OpenAI strict mode requires all properties to have a type if (!property?.type) { throw new Error(`Property '${propName}' is missing required 'type' field for OpenAI strict mode`); } // Recursively process properties formattedSchema.properties[propName] = openAISchemaFormat(property, nesting + 1); // Process arrays with items of type object if (property?.type === 'array' && property.items && property.items?.type === 'object') { formattedSchema.properties[propName] = { ...property, items: openAISchemaFormat(property.items, nesting + 1), }; } } } if (formattedSchema?.type === 'object' && (!formattedSchema?.properties || Object.keys(formattedSchema?.properties ?? {}).length == 0)) { //If no properties are defined, then additionalProperties: true was set or the object would be empty. //OpenAI does not support this on structured output/ strict mode. throw new Error("OpenAI does not support empty objects or objects with additionalProperties set to true"); } return formattedSchema } function responseFinishReason(response: OpenAI.Responses.Response, tools?: ToolUse[] | undefined): string | undefined { if (tools && tools.length > 0) { return "tool_use"; } if (response.status === 'incomplete') { if (response.incomplete_details?.reason === 'max_output_tokens') { return 'length'; } return response.incomplete_details?.reason ?? 'incomplete'; } if (response.status && response.status !== 'completed') { return response.status; } return 'stop'; } /** * Fix orphaned function_call items in the OpenAI Responses API conversation. * * When an agent is stopped mid-tool-execution, the conversation may contain * function_call items without matching function_call_output items. The OpenAI * Responses API requires every function_call to have a matching function_call_output. * * This function detects such cases and injects synthetic function_call_output items * indicating the tools were interrupted, allowing the conversation to continue. */ export function fixOrphanedToolUse(items: ResponseInputItem[]): ResponseInputItem[] { if (items.length < 2) return items; // First pass: collect all function_call_output call_ids const outputCallIds = new Set<string>(); for (const item of items) { if ('type' in item && item.type === 'function_call_output') { outputCallIds.add((item as OpenAI.Responses.ResponseInputItem.FunctionCallOutput).call_id); } } // Second pass: build result, injecting synthetic outputs for orphaned function_calls const result: ResponseInputItem[] = []; const pendingCalls = new Map<string, string