UNPKG

@llumiverse/drivers

Version:

LLM driver implementations. Currently supported are: openai, huggingface, bedrock, replicate.

github.com/vertesia/llumiverse

vertesia/llumiverse

1,135 lines • 47.8 kB

JavaScript

"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.BaseOpenAIDriver = void 0; exports.mapResponseStream = mapResponseStream; exports.convertOpenAIFunctionItemsToText = convertOpenAIFunctionItemsToText; exports.collectTools = collectTools; exports.fixOrphanedToolUse = fixOrphanedToolUse; const core_1 = require("@llumiverse/core"); const error_1 = require("openai/error"); const openai_format_js_1 = require("./openai_format.js"); // Helper function to convert string to CompletionResult[] function textToCompletionResult(text) { return text ? [{ type: "text", value: text }] : []; } function isOpenAIReasoningModel(model) { const normalized = model.toLowerCase(); return normalized.includes("o1") || normalized.includes("o3") || normalized.includes("o4") || normalized.includes("gpt-5"); } function isGpt5ProModel(model) { const modelName = model.toLowerCase().split('/').pop() ?? model.toLowerCase(); return /^gpt-5(?:\.\d+)?-pro/.test(modelName); } function openAIReasoningEffort(model, effort) { if (!effort || !isOpenAIReasoningModel(model)) { return undefined; } if (isGpt5ProModel(model)) { return "high"; } return effort === "low" || effort === "medium" || effort === "high" ? effort : undefined; } //TODO: Do we need a list?, replace with if statements and modernize? const supportFineTunning = new Set([ "gpt-3.5-turbo-1106", "gpt-3.5-turbo-0613", "babbage-002", "davinci-002", "gpt-4-0613" ]); class BaseOpenAIDriver extends core_1.AbstractDriver { constructor(opts) { super(opts); this.formatPrompt = openai_format_js_1.formatOpenAILikeMultimodalPrompt; } extractDataFromResponse(_options, result) { const tokenInfo = mapUsage(result.usage); const tools = collectTools(result.output); // Collect all parts in order (text and images) const allResults = extractCompletionResults(result.output); if (allResults.length === 0 && !tools) { this.logger.error({ result }, "[OpenAI] Response is not valid"); throw new Error("Response is not valid: no data"); } return { result: allResults, token_usage: tokenInfo, finish_reason: responseFinishReason(result, tools), tool_use: tools, }; } async requestTextCompletionStream(prompt, options) { if (options.model_options?._option_id !== undefined && options.model_options?._option_id !== "openai-text" && options.model_options?._option_id !== "openai-thinking" && options.model_options?._option_id !== "text-fallback") { this.logger.debug({ options: options.model_options }, "Unexpected option id"); } // Include conversation history (same as non-streaming) // Fix orphaned function_call items (can occur when agent is stopped mid-tool-execution) let conversation = fixOrphanedToolUse(updateConversation(options.conversation, prompt)); const toolDefs = getToolDefinitions(options.tools); const useTools = toolDefs ? (0, core_1.supportsToolUse)(options.model, this.provider, true) : false; // When no tools are provided but conversation contains function_call/function_call_output // items (e.g. checkpoint summary calls), convert them to text to avoid API errors if (!useTools) { conversation = convertOpenAIFunctionItemsToText(conversation); } convertRoles(prompt, options.model); const model_options = options.model_options; insert_image_detail(prompt, model_options?.image_detail ?? "auto"); let parsedSchema = undefined; let strictMode = false; if (options.result_schema && supportsSchema(options.model)) { try { parsedSchema = openAISchemaFormat(options.result_schema); strictMode = true; } catch (e) { parsedSchema = limitedSchemaFormat(options.result_schema); strictMode = false; } } const isReasoningModel = isOpenAIReasoningModel(options.model); const effort = openAIReasoningEffort(options.model, model_options?.effort ?? model_options?.reasoning_effort); const reasoning = effort ? { effort } : undefined; const stream = await this.service.responses.create({ stream: true, model: options.model, input: conversation, reasoning, temperature: isReasoningModel ? undefined : model_options?.temperature, top_p: isReasoningModel ? undefined : model_options?.top_p, max_output_tokens: model_options?.max_tokens, tools: useTools ? toolDefs : undefined, text: parsedSchema ? { format: { type: "json_schema", name: "format_output", schema: parsedSchema, strict: strictMode, } } : undefined, }); return mapResponseStream(stream); } async requestTextCompletion(prompt, options) { if (options.model_options?._option_id !== undefined && options.model_options?._option_id !== "openai-text" && options.model_options?._option_id !== "openai-thinking") { this.logger.debug({ options: options.model_options }, "Unexpected option id"); } convertRoles(prompt, options.model); const model_options = options.model_options; insert_image_detail(prompt, model_options?.image_detail ?? "auto"); const toolDefs = getToolDefinitions(options.tools); const useTools = toolDefs ? (0, core_1.supportsToolUse)(options.model, this.provider) : false; // Fix orphaned function_call items (can occur when agent is stopped mid-tool-execution) let conversation = fixOrphanedToolUse(updateConversation(options.conversation, prompt)); // When no tools are provided but conversation contains function_call/function_call_output // items (e.g. checkpoint summary calls), convert them to text to avoid API errors if (!useTools) { conversation = convertOpenAIFunctionItemsToText(conversation); } let parsedSchema = undefined; let strictMode = false; if (options.result_schema && supportsSchema(options.model)) { try { parsedSchema = openAISchemaFormat(options.result_schema); strictMode = true; } catch (e) { parsedSchema = limitedSchemaFormat(options.result_schema); strictMode = false; } } const isReasoningModel = isOpenAIReasoningModel(options.model); const effort = openAIReasoningEffort(options.model, model_options?.effort ?? model_options?.reasoning_effort); const reasoning = effort ? { effort } : undefined; const res = await this.service.responses.create({ stream: false, model: options.model, input: conversation, reasoning, temperature: isReasoningModel ? undefined : model_options?.temperature, top_p: isReasoningModel ? undefined : model_options?.top_p, max_output_tokens: model_options?.max_tokens, //TODO: use max_tokens for older models, currently relying on OpenAI to handle it tools: useTools ? toolDefs : undefined, text: parsedSchema ? { format: { type: "json_schema", name: "format_output", schema: parsedSchema, strict: strictMode, } } : undefined, }); const completion = this.extractDataFromResponse(options, res); if (options.include_original_response) { completion.original_response = res; } conversation = updateConversation(conversation, createAssistantMessageFromCompletion(completion)); // Increment turn counter for deferred stripping conversation = (0, core_1.incrementConversationTurn)(conversation); // Strip large base64 image data based on options.stripImagesAfterTurns const currentTurn = (0, core_1.getConversationMeta)(conversation).turnNumber; const stripOptions = { keepForTurns: options.stripImagesAfterTurns ?? Infinity, currentTurn, textMaxTokens: options.stripTextMaxTokens }; let processedConversation = (0, core_1.stripBase64ImagesFromConversation)(conversation, stripOptions); // Truncate large text content if configured processedConversation = (0, core_1.truncateLargeTextInConversation)(processedConversation, stripOptions); // Strip old heartbeat status messages processedConversation = (0, core_1.stripHeartbeatsFromConversation)(processedConversation, { keepForTurns: options.stripHeartbeatsAfterTurns ?? 1, currentTurn, }); completion.conversation = processedConversation; return completion; } canStream(_options) { // Image generation models don't support streaming if (_options.model.includes("dall-e") || _options.model.includes("gpt-image") || _options.model.includes("chatgpt-image")) { return Promise.resolve(false); } if (_options.model.includes("o1") && !(_options.model.includes("mini") || _options.model.includes("preview"))) { //o1 full does not support streaming //TODO: Update when OpenAI adds support for streaming, last check 16/02/2025 return Promise.resolve(false); } return Promise.resolve(true); } /** * Build conversation context after streaming completion. * Reconstructs the assistant message from accumulated results and applies stripping. */ buildStreamingConversation(prompt, result, toolUse, options) { // Build assistant message from accumulated CompletionResult[] const completionResults = result; const textContent = completionResultsToText(completionResults); // Start with the conversation from options or the prompt let conversation = updateConversation(options.conversation, prompt); // Add assistant message as EasyInputMessage if (textContent) { const assistantMessage = { role: 'assistant', content: textContent, }; conversation = updateConversation(conversation, [assistantMessage]); } // Add function calls as separate items (Response API format) if (toolUse && toolUse.length > 0) { const functionCalls = toolUse.map(t => ({ type: 'function_call', call_id: t.id, name: t.tool_name, arguments: typeof t.tool_input === 'string' ? t.tool_input : JSON.stringify(t.tool_input ?? {}), })); conversation = updateConversation(conversation, functionCalls); } // Increment turn counter conversation = (0, core_1.incrementConversationTurn)(conversation); // Apply stripping based on options const currentTurn = (0, core_1.getConversationMeta)(conversation).turnNumber; const stripOptions = { keepForTurns: options.stripImagesAfterTurns ?? Infinity, currentTurn, textMaxTokens: options.stripTextMaxTokens }; let processedConversation = (0, core_1.stripBase64ImagesFromConversation)(conversation, stripOptions); processedConversation = (0, core_1.truncateLargeTextInConversation)(processedConversation, stripOptions); processedConversation = (0, core_1.stripHeartbeatsFromConversation)(processedConversation, { keepForTurns: options.stripHeartbeatsAfterTurns ?? 1, currentTurn, }); return processedConversation; } createTrainingPrompt(options) { if (options.model.includes("gpt")) { return super.createTrainingPrompt(options); } else { // babbage, davinci not yet implemented throw new Error("Unsupported model for training: " + options.model); } } async startTraining(dataset, options) { const url = await dataset.getURL(); const file = await this.service.files.create({ file: await fetch(url), purpose: "fine-tune", }); const job = await this.service.fineTuning.jobs.create({ training_file: file.id, model: options.model, hyperparameters: options.params }); return jobInfo(job); } async cancelTraining(jobId) { const job = await this.service.fineTuning.jobs.cancel(jobId); return jobInfo(job); } async getTrainingJob(jobId) { const job = await this.service.fineTuning.jobs.retrieve(jobId); return jobInfo(job); } // ========= management API ============= async validateConnection() { try { await this.service.models.list(); return true; } catch (error) { return false; } } listTrainableModels() { return this._listModels((m) => supportFineTunning.has(m.id)); } async listModels() { return this._listModels(); } async _listModels(filter) { let result = (await this.service.models.list()).data; //Some of these use the completions API instead of the chat completions API. //Others are for non-text input modalities. Therefore common to both. const wordBlacklist = ["embed", "whisper", "transcribe", "audio", "moderation", "tts", "realtime", "babbage", "davinci", "codex", "o1-pro", "computer-use", "sora"]; //OpenAI has very little information, filtering based on name. result = result.filter((m) => { return !wordBlacklist.some((word) => m.id.includes(word)); }); const models = filter ? result.filter(filter) : result; const aiModels = models.map((m) => { const modelCapability = (0, core_1.getModelCapabilities)(m.id, "openai"); let owner = m.owned_by; if (owner == "system") { owner = "openai"; } // Determine model type based on capabilities let modelType = core_1.ModelType.Text; if (m.id.includes("dall-e") || m.id.includes("gpt-image")) { modelType = core_1.ModelType.Image; } return { id: m.id, name: m.id, provider: this.provider, owner: owner, type: modelType, input_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.input), output_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.output), tool_support: modelCapability.tool_support, }; }).sort((a, b) => a.id.localeCompare(b.id)); return aiModels; } async generateEmbeddings({ text, image, model = "text-embedding-3-small" }) { if (image) { throw new Error("Image embeddings not supported by OpenAI"); } if (!text) { throw new Error("No text provided"); } const res = await this.service.embeddings.create({ input: text, model: model, }); const embeddings = res.data[0].embedding; if (!embeddings || embeddings.length === 0) { throw new Error("No embedding found"); } return { values: embeddings, model }; } imageModels = ["dall-e", "gpt-image", "chatgpt-image"]; /** * Determine if a model is specifically an image generation model (not conversational image model) */ isImageModel(model) { // DALL-E models are standalone image generation // gpt-image models can generate images in conversations, not standalone return this.imageModels.some(imageModel => model.includes(imageModel)); } /** * Request image generation from standalone Images API * Supports: DALL-E 2, DALL-E 3, GPT-image models (for edit/variation) */ async requestImageGeneration(prompt, options) { this.logger.debug(`[${this.provider}] Generating image with model ${options.model}`); const model_options = options.model_options; // Extract prompt text from ResponseInputItem[] let promptText = ""; for (const item of prompt) { if ('content' in item && typeof item.content === 'string') { promptText += item.content + "\\n"; } else if ('content' in item && Array.isArray(item.content)) { // Extract text from content array for (const part of item.content) { if ('type' in part && part.type === 'input_text' && 'text' in part) { promptText += part.text + "\\n"; } } } } promptText = promptText.trim(); try { const generateParams = { model: options.model, prompt: promptText, size: model_options?.size || "1024x1024", }; // Add DALL-E specific options if (options.model.includes("dall-e") || model_options?._option_id === "openai-dalle") { const dalleOptions = model_options; generateParams.n = dalleOptions?.n || 1; generateParams.response_format = dalleOptions?.response_format || "b64_json"; if (options.model.includes("dall-e-3")) { generateParams.quality = dalleOptions?.image_quality || "standard"; if (dalleOptions?.style) { generateParams.style = dalleOptions.style; } } } else { // Default for other models generateParams.n = 1; } const response = await this.service.images.generate(generateParams); // Convert response to CompletionResults const results = []; if (response.data) { for (const image of response.data) { let imageValue; if (image.b64_json) { // Base64 format imageValue = `data:image/png;base64,${image.b64_json}`; } else if (image.url) { // URL format imageValue = image.url; } else { continue; } results.push({ type: "image", value: imageValue }); } } return { result: results }; } catch (error) { this.logger.error({ error }, `[${this.provider}] Image generation failed`); return { result: [], error: { message: error.message, code: error.code || 'GENERATION_FAILED' } }; } } /** * Format OpenAI API errors into LlumiverseError with proper status codes and retryability. * * OpenAI API errors have a specific structure: * - APIError.status: HTTP status code (400, 401, 403, 404, 409, 422, 429, 500+) * - APIError.error: Error object with type, message, param, code * - APIError.requestID: Request ID for support * - APIError.code: Error code (e.g., 'invalid_api_key', 'rate_limit_exceeded') * - APIError.param: Parameter that caused the error (optional) * - APIError.type: Error type (optional) * * Common error types: * - BadRequestError (400): Invalid request parameters * - AuthenticationError (401): Invalid API key * - PermissionDeniedError (403): Insufficient permissions * - NotFoundError (404): Resource not found * - ConflictError (409): Resource conflict * - UnprocessableEntityError (422): Validation error * - RateLimitError (429): Rate limit exceeded * - InternalServerError (500+): Server-side errors * - APIConnectionError: Connection issues (no status code) * - APIConnectionTimeoutError: Request timeout (no status code) * - LengthFinishReasonError: Response truncated due to length * - ContentFilterFinishReasonError: Content filtered * * This implementation works for: * - OpenAI API * - Azure OpenAI * - xAI (uses OpenAI-compatible API) * - Azure Foundry (OpenAI-compatible) * - Other OpenAI-compatible APIs * * @see https://platform.openai.com/docs/guides/error-codes */ formatLlumiverseError(error, context) { // Check if it's an OpenAI API error const isOpenAIError = this.isOpenAIApiError(error); if (!isOpenAIError) { // Not an OpenAI API error, use default handling throw error; } const apiError = error; const httpStatusCode = apiError.status; // Extract error message const message = apiError.message || String(error); // Extract additional error details (only available on APIError) const errorCode = apiError.code; const errorParam = apiError.param; const errorType = apiError.type; // Build user-facing message with status code let userMessage = message; // Include status code in message (for end-user visibility) if (httpStatusCode) { userMessage = `[${httpStatusCode}] ${userMessage}`; } // Add error code if available and not already in message if (errorCode && !userMessage.includes(errorCode)) { userMessage += ` (code: ${errorCode})`; } // Add parameter info if available and helpful if (errorParam && !userMessage.toLowerCase().includes(errorParam.toLowerCase())) { userMessage += ` [param: ${errorParam}]`; } // Add request ID if available (useful for OpenAI support) if (apiError.requestID) { userMessage += ` (Request ID: ${apiError.requestID})`; } // Determine retryability based on OpenAI error types const retryable = this.isOpenAIErrorRetryable(error, httpStatusCode, errorCode, errorType); // Use the error constructor name as the error name const errorName = error.constructor?.name || 'OpenAIError'; return new core_1.LlumiverseError(`[${context.provider}] ${userMessage}`, retryable, context, error, httpStatusCode, errorName); } /** * Type guard to check if error is an OpenAI API error or OpenAI-specific error. */ isOpenAIApiError(error) { return (error !== null && typeof error === 'object' && (error instanceof error_1.APIError || error instanceof error_1.OpenAIError)); } /** * Determine if an OpenAI API error is retryable. * * Retryable errors: * - RateLimitError (429): Rate limit exceeded, retry with backoff * - InternalServerError (500+): Server-side errors * - APIConnectionTimeoutError: Request timeout * - Error codes: 'timeout', 'server_error', 'service_unavailable' * - Status codes: 408, 429, 502, 503, 504, 529, 5xx * * Non-retryable errors: * - BadRequestError (400): Invalid request parameters * - AuthenticationError (401): Invalid API key * - PermissionDeniedError (403): Insufficient permissions * - NotFoundError (404): Resource not found * - ConflictError (409): Resource conflict * - UnprocessableEntityError (422): Validation error * - LengthFinishReasonError: Length limit reached * - ContentFilterFinishReasonError: Content filtered * - Error codes: 'invalid_api_key', 'invalid_request_error', 'model_not_found' * - Other 4xx client errors * * @param error - The error object * @param httpStatusCode - The HTTP status code if available * @param errorCode - The error code if available * @param errorType - The error type if available * @returns True if retryable, false if not retryable, undefined if unknown */ isOpenAIErrorRetryable(error, httpStatusCode, errorCode, errorType) { // Check specific OpenAI error types by class if (error instanceof error_1.RateLimitError) return true; if (error instanceof error_1.InternalServerError) return true; if (error instanceof error_1.APIConnectionTimeoutError) return true; // Non-retryable by error type if (error instanceof error_1.BadRequestError) return false; if (error instanceof error_1.AuthenticationError) return false; if (error instanceof error_1.PermissionDeniedError) return false; if (error instanceof error_1.NotFoundError) return false; if (error instanceof error_1.ConflictError) return false; if (error instanceof error_1.UnprocessableEntityError) return false; if (error instanceof error_1.LengthFinishReasonError) return false; if (error instanceof error_1.ContentFilterFinishReasonError) return false; // Check error codes (OpenAI specific) if (errorCode) { // Retryable error codes if (errorCode === 'timeout') return true; if (errorCode === 'server_error') return true; if (errorCode === 'service_unavailable') return true; if (errorCode === 'rate_limit_exceeded') return true; // Non-retryable error codes if (errorCode === 'invalid_api_key') return false; if (errorCode === 'invalid_request_error') return false; if (errorCode === 'model_not_found') return false; if (errorCode === 'insufficient_quota') return false; if (errorCode === 'invalid_model') return false; if (errorCode.includes('invalid_')) return false; } // Check error type if (errorType === 'invalid_request_error') return false; if (errorType === 'authentication_error') return false; // Use HTTP status code if (httpStatusCode !== undefined) { if (httpStatusCode === 429) return true; // Rate limit if (httpStatusCode === 408) return true; // Request timeout if (httpStatusCode === 502) return true; // Bad gateway if (httpStatusCode === 503) return true; // Service unavailable if (httpStatusCode === 504) return true; // Gateway timeout if (httpStatusCode === 529) return true; // Overloaded if (httpStatusCode >= 500 && httpStatusCode < 600) return true; // Server errors if (httpStatusCode >= 400 && httpStatusCode < 500) return false; // Client errors } // Connection errors without status codes if (error instanceof error_1.APIConnectionError && !(error instanceof error_1.APIConnectionTimeoutError)) { // Generic connection errors might be retryable (network issues) return true; } // Unknown error type - let consumer decide retry strategy return undefined; } } exports.BaseOpenAIDriver = BaseOpenAIDriver; function jobInfo(job) { //validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`. const jobStatus = job.status; let status = core_1.TrainingJobStatus.running; let details; if (jobStatus === 'succeeded') { status = core_1.TrainingJobStatus.succeeded; } else if (jobStatus === 'failed') { status = core_1.TrainingJobStatus.failed; details = job.error ? `${job.error.code} - ${job.error.message} ${job.error.param ? " [" + job.error.param + "]" : ""}` : "error"; } else if (jobStatus === 'cancelled') { status = core_1.TrainingJobStatus.cancelled; } else { status = core_1.TrainingJobStatus.running; details = jobStatus; } return { id: job.id, model: job.fine_tuned_model || undefined, status, details }; } function mapUsage(usage) { if (!usage) { return undefined; } return { prompt: usage.input_tokens, result: usage.output_tokens, total: usage.total_tokens, prompt_cached: usage.input_tokens_details?.cached_tokens ?? undefined, prompt_new: usage.input_tokens - (usage.input_tokens_details?.cached_tokens ?? 0), }; } function completionResultsToText(completionResults) { if (!completionResults) { return ''; } return completionResults .map(r => { switch (r.type) { case 'text': return r.value; case 'json': return typeof r.value === 'string' ? r.value : JSON.stringify(r.value); case 'image': // Skip images in conversation - they're in the result return ''; default: return String(r.value || ''); } }) .join(''); } function createAssistantMessageFromCompletion(completion) { const textContent = completionResultsToText(completion.result); const result = []; // Add assistant text message if present if (textContent) { const assistantMessage = { role: 'assistant', content: textContent, }; result.push(assistantMessage); } // Add function calls as separate items (Response API format) if (completion.tool_use && completion.tool_use.length > 0) { for (const t of completion.tool_use) { const functionCall = { type: 'function_call', call_id: t.id, name: t.tool_name, arguments: typeof t.tool_input === 'string' ? t.tool_input : JSON.stringify(t.tool_input ?? {}), }; result.push(functionCall); } } return result; } function mapResponseStream(stream) { const toolCallMetadata = new Map(); return { async *[Symbol.asyncIterator]() { for await (const event of stream) { if (event.type === 'response.output_item.added' && event.item.type === 'function_call') { const syntheticId = `tool_${event.output_index}`; const callId = event.item.call_id ?? event.item.id; const metadata = { syntheticId, callId, name: event.item.name }; if (event.item.id) { toolCallMetadata.set(event.item.id, metadata); } if (event.item.call_id) { toolCallMetadata.set(event.item.call_id, metadata); } const toolUse = { id: syntheticId, _actual_id: callId, tool_name: event.item.name, tool_input: '', }; yield { result: [], tool_use: [toolUse], }; } else if (event.type === 'response.function_call_arguments.delta') { const metadata = toolCallMetadata.get(event.item_id); const syntheticId = metadata?.syntheticId ?? `tool_${event.output_index}`; const callId = metadata?.callId ?? event.item_id; const toolUse = { id: syntheticId, _actual_id: callId, tool_name: metadata?.name ?? '', tool_input: event.delta, }; yield { result: [], tool_use: [toolUse], }; } // Note: We don't emit response.function_call_arguments.done because the arguments were already // streamed via delta events. Emitting it again would duplicate the tool_input content. // We only update the metadata to ensure the tool name is captured. else if (event.type === 'response.function_call_arguments.done') { // Just update metadata, don't yield (arguments already accumulated from delta events) const metadata = toolCallMetadata.get(event.item_id); const syntheticId = metadata?.syntheticId ?? `tool_${event.output_index}`; const tool_name = metadata?.name ?? event.name ?? ''; if (event.item_id) { toolCallMetadata.set(event.item_id, { syntheticId, callId: metadata?.callId, name: tool_name }); } } else if (event.type === 'response.output_text.delta') { yield { result: textToCompletionResult(event.delta), }; } // Note: We don't emit response.output_text.done because the text was already // streamed via delta events. Emitting it again would duplicate the content. else if (event.type === 'response.completed' || event.type === 'response.incomplete' || event.type === 'response.failed') { const finalTools = collectTools(event.response.output); yield { result: [], finish_reason: responseFinishReason(event.response, finalTools), token_usage: mapUsage(event.response.usage), }; } } } }; } function insert_image_detail(items, detail_level) { if (detail_level === "auto" || detail_level === "low" || detail_level === "high") { for (const item of items) { // Check if it's an EasyInputMessage or Message with content array if ('role' in item && 'content' in item && item.role !== 'assistant') { const content = item.content; if (Array.isArray(content)) { for (const part of content) { if (typeof part === 'object' && part.type === 'input_image') { part.detail = detail_level; } } } } } } return items; } function convertRoles(items, model) { //New openai models use developer role instead of system if (model.includes("o1") || model.includes("o3")) { if (model.includes("o1-mini") || model.includes("o1-preview")) { //o1-mini and o1-preview support neither system nor developer for (const item of items) { if ('role' in item && item.role === 'system') { item.role = 'user'; } } } else { //Models newer than o1 use developer role for (const item of items) { if ('role' in item && item.role === 'system') { item.role = 'developer'; } } } } return items; } //Structured output support is typically aligned with tool use support //Not true for realtime models, which do not support structured output, but do support tool use. function supportsSchema(model) { const realtimeModel = model.includes("realtime"); if (realtimeModel) { return false; } return (0, core_1.supportsToolUse)(model, "openai"); } /** * Converts function_call and function_call_output items to text messages in OpenAI conversation. * Preserves tool call information while removing structured items that require * tools to be defined in the API request. */ function convertOpenAIFunctionItemsToText(items) { const hasFunctionItems = items.some(item => { const type = item.type; return type === 'function_call' || type === 'function_call_output'; }); if (!hasFunctionItems) return items; return items.map(item => { const typed = item; if (typed.type === 'function_call') { const argsStr = typed.arguments || ''; const truncated = argsStr.length > 500 ? argsStr.substring(0, 500) + '...' : argsStr; return { role: 'assistant', content: `[Tool call: ${typed.name}(${truncated})]`, }; } if (typed.type === 'function_call_output') { const output = typed.output || 'No output'; const truncated = output.length > 500 ? output.substring(0, 500) + '...' : output; return { role: 'user', content: `[Tool result: ${truncated}]`, }; } return item; }); } function getToolDefinitions(tools) { return tools ? tools.map(getToolDefinition) : undefined; } function getToolDefinition(toolDef) { let parsedSchema = undefined; let strictMode = false; if (toolDef.input_schema) { try { //TODO: type assertion here is not safe, does not work with satisfies parsedSchema = openAISchemaFormat(toolDef.input_schema); strictMode = true; } catch (e) { //TODO: type assertion here is not safe, does not work with satisfies parsedSchema = limitedSchemaFormat(toolDef.input_schema); strictMode = false; } } return { type: "function", name: toolDef.name, description: toolDef.description, parameters: parsedSchema ?? null, strict: strictMode, }; } function updateConversation(conversation, items) { if (!items) { // Unwrap array if wrapped, otherwise treat as array const unwrapped = (0, core_1.unwrapConversationArray)(conversation); return unwrapped ?? (conversation || []); } if (!conversation) { return items; } // Unwrap array if wrapped, otherwise treat as array const unwrapped = (0, core_1.unwrapConversationArray)(conversation); const convArray = unwrapped ?? conversation; return [...convArray, ...items]; } function collectTools(output) { if (!output) { return undefined; } const tools = []; for (const item of output) { if (item.type === 'function_call') { const id = item.call_id || item.id; if (!id) { continue; } tools.push({ id, tool_name: item.name ?? '', tool_input: safeJsonParse(item.arguments), }); } } return tools.length > 0 ? tools : undefined; } /** * Collect all parts (text and images) from response output in order. * This preserves the original ordering of text and image parts. */ function extractCompletionResults(output) { if (!output) { return []; } const results = []; for (const item of output) { if (item.type === 'message') { // Extract text from message content for (const part of item.content) { if (part.type === 'output_text' && part.text) { results.push({ type: "text", value: part.text }); } } } else if (item.type === 'image_generation_call' && 'result' in item && item.result) { // GPT-image models return base64 encoded images in result field const base64Data = item.result; // Format as data URL for consistency with other image outputs const imageUrl = base64Data.startsWith('data:') ? base64Data : `data:image/png;base64,${base64Data}`; results.push({ type: "image", value: imageUrl }); } } return results; } //For strict mode false function limitedSchemaFormat(schema) { const formattedSchema = { ...schema }; // Defaults not supported delete formattedSchema.default; // OpenAI requires type field even in non-strict mode // If no type is specified, default to 'object' for properties with format/editor hints, // otherwise 'string' as a safe fallback if (!formattedSchema.type && formattedSchema.description) { // Properties with format: "document" or editor hints are typically objects if (formattedSchema.format === 'document' || formattedSchema.editor) { formattedSchema.type = 'object'; } else { formattedSchema.type = 'string'; } } if (formattedSchema?.properties) { // Process each property recursively for (const propName of Object.keys(formattedSchema.properties)) { const property = formattedSchema.properties[propName]; // Recursively process properties formattedSchema.properties[propName] = limitedSchemaFormat(property); // Process arrays with items of type object if (property?.type === 'array' && property.items && property.items?.type === 'object') { formattedSchema.properties[propName] = { ...property, items: limitedSchemaFormat(property.items), }; } } } return formattedSchema; } //For strict mode true function openAISchemaFormat(schema, nesting = 0) { if (nesting > 5) { throw new Error("OpenAI schema nesting too deep"); } const formattedSchema = { ...schema }; // Defaults not supported delete formattedSchema.default; // Additional properties not supported, required to be set. if (formattedSchema?.type === "object") { formattedSchema.additionalProperties = false; } if (formattedSchema?.properties) { // Set all properties as required formattedSchema.required = Object.keys(formattedSchema.properties); // Process each property recursively for (const propName of Object.keys(formattedSchema.properties)) { const property = formattedSchema.properties[propName]; // OpenAI strict mode requires all properties to have a type if (!property?.type) { throw new Error(`Property '${propName}' is missing required 'type' field for OpenAI strict mode`); } // Recursively process properties formattedSchema.properties[propName] = openAISchemaFormat(property, nesting + 1); // Process arrays with items of type object if (property?.type === 'array' && property.items && property.items?.type === 'object') { formattedSchema.properties[propName] = { ...property, items: openAISchemaFormat(property.items, nesting + 1), }; } } } if (formattedSchema?.type === 'object' && (!formattedSchema?.properties || Object.keys(formattedSchema?.properties ?? {}).length == 0)) { //If no properties are defined, then additionalProperties: true was set or the object would be empty. //OpenAI does not support this on structured output/ strict mode. throw new Error("OpenAI does not support empty objects or objects with additionalProperties set to true"); } return formattedSchema; } function responseFinishReason(response, tools) { if (tools && tools.length > 0) { return "tool_use"; } if (response.status === 'incomplete') { if (response.incomplete_details?.reason === 'max_output_tokens') { return 'length'; } return response.incomplete_details?.reason ?? 'incomplete'; } if (response.status && response.status !== 'completed') { return response.status; } return 'stop'; } /** * Fix orphaned function_call items in the OpenAI Responses API conversation. * * When an agent is stopped mid-tool-execution, the conversation may contain * function_call items without matching function_call_output items. The OpenAI * Responses API requires every function_call to have a matching function_call_output. * * This function detects such cases and injects synthetic function_call_output items * indicating the tools were interrupted, allowing the conversation to continue. */ function fixOrphanedToolUse(items) { if (items.length < 2) return items; // First pass: collect all function_call_output call_ids const outputCallIds = new Set(); for (const item of items) { if ('type' in item && item.type === 'function_call_output') { outputCallIds.add(item.call_id); } } // Second pass: build result, injecting synthetic outputs for orphaned function_calls const result = []; const pendingCalls = new Map(); // call_id -> tool name for (const item of items) { if ('type' in item && item.type === 'function_call') { const fc = item; // Only track if there's no matching output anywhere in the conversation if (!outputCallIds.has(fc.call_id)) { pendingCalls.set(fc.call_id, fc.name ?? 'unknown'); } result.push(item); } else if ('type' in item && item.type === 'function_call_output') { result.push(item); } else { // Before any non-function item, flush pending orphaned calls if (pendingCalls.size > 0) { for (const [callId, toolName] of pendingCalls) { result.push({ type: 'function_call_output', call_id: callId, output: `[Tool interrupted: The user stopped the operation before "${toolName}" could execute.]`, }); } pendingCalls.clear(); } result.push(item); } } // Handle trailing orphans at the end of the conversation if (pendingCalls.size > 0) { for (const [callId, toolName] of pendingCalls) { result.push({ type: 'function_call_output', call_id: callId, output: `[Tool interrupted: The user stopped the operation before "${toolName}" could execute.]`, }); } } return result; } function safeJsonParse(value) { if (typeof value !== 'string') { return value; } try { return JSON.parse(value); } catch { return value; } } //# sourceMappingURL=index.js.map