UNPKG

@just-every/ensemble

Version:

LLM provider abstraction layer with unified streaming interface

1,128 lines (1,127 loc) 50.8 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.openaiProvider = exports.OpenAIProvider = void 0; const base_provider_js_1 = require("./base_provider.cjs"); const openai_1 = __importStar(require("openai")); const index_js_1 = require("../index.cjs"); const llm_logger_js_1 = require("../utils/llm_logger.cjs"); const pause_controller_js_1 = require("../utils/pause_controller.cjs"); const image_utils_js_1 = require("../utils/image_utils.cjs"); const delta_buffer_js_1 = require("../utils/delta_buffer.cjs"); const citation_tracker_js_1 = require("../utils/citation_tracker.cjs"); const event_controller_js_1 = require("../utils/event_controller.cjs"); const BROWSER_WIDTH = 1024; const BROWSER_HEIGHT = 1536; function processSchemaForOpenAI(schema, originalProperties) { const processedSchema = JSON.parse(JSON.stringify(schema)); const processSchemaRecursively = (schema) => { if (!schema || typeof schema !== 'object') return; if (schema.optional === true) { delete schema.optional; } if (Array.isArray(schema.oneOf)) { schema.anyOf = schema.oneOf; delete schema.oneOf; } const unsupportedKeywords = [ 'minimum', 'maximum', 'minItems', 'maxItems', 'minLength', 'maxLength', 'pattern', 'format', 'multipleOf', 'patternProperties', 'unevaluatedProperties', 'propertyNames', 'minProperties', 'maxProperties', 'unevaluatedItems', 'contains', 'minContains', 'maxContains', 'uniqueItems', 'default', ]; unsupportedKeywords.forEach(keyword => { if (schema[keyword] !== undefined) { delete schema[keyword]; } }); const isObject = schema.type === 'object' || (schema.type === undefined && schema.properties !== undefined); for (const key of ['anyOf', 'allOf']) { if (Array.isArray(schema[key])) { schema[key].forEach((variantSchema) => processSchemaRecursively(variantSchema)); } } if (isObject && schema.properties) { for (const propName in schema.properties) { processSchemaRecursively(schema.properties[propName]); } } if (schema.type === 'array' && schema.items !== undefined) { if (Array.isArray(schema.items)) { schema.items.forEach((itemSchema) => processSchemaRecursively(itemSchema)); } else if (typeof schema.items === 'object') { processSchemaRecursively(schema.items); } } if (isObject) { schema.additionalProperties = false; if (schema.properties) { const currentRequired = Object.keys(schema.properties); if (currentRequired.length > 0) { schema.required = currentRequired; } else { delete schema.required; } } else { delete schema.required; } } }; processSchemaRecursively(processedSchema); if (originalProperties) { const topLevelRequired = []; for (const propName in originalProperties) { if (!originalProperties[propName].optional) { topLevelRequired.push(propName); } } if (topLevelRequired.length > 0) { processedSchema.required = topLevelRequired; } else { delete processedSchema.required; } } if (processedSchema.properties && processedSchema.additionalProperties === undefined) { processedSchema.additionalProperties = false; } return processedSchema; } async function resolveAsyncEnums(params) { if (!params || typeof params !== 'object') { return params; } const resolved = { ...params }; if (resolved.properties) { const resolvedProps = {}; for (const [key, value] of Object.entries(resolved.properties)) { if (value && typeof value === 'object') { const propCopy = { ...value }; if (typeof propCopy.enum === 'function') { try { const enumValue = await propCopy.enum(); if (Array.isArray(enumValue) && enumValue.length > 0) { propCopy.enum = enumValue; } else { delete propCopy.enum; } } catch { delete propCopy.enum; } } resolvedProps[key] = await resolveAsyncEnums(propCopy); } else { resolvedProps[key] = value; } } resolved.properties = resolvedProps; } return resolved; } async function convertToOpenAITools(requestParams, tools) { requestParams.tools = await Promise.all(tools.map(async (tool) => { if (tool.definition.function.name === 'openai_web_search') { delete requestParams.reasoning; return { type: 'web_search_preview', search_context_size: 'high', }; } const resolvedParams = await resolveAsyncEnums(tool.definition.function.parameters); const originalToolProperties = resolvedParams.properties; const paramSchema = processSchemaForOpenAI(resolvedParams, originalToolProperties); return { type: 'function', name: tool.definition.function.name, description: tool.definition.function.description, parameters: paramSchema, strict: true, }; })); if (requestParams.model === 'computer-use-preview') { requestParams.tools.push({ type: 'computer_use_preview', display_width: BROWSER_WIDTH, display_height: BROWSER_HEIGHT, environment: 'browser', }); } requestParams.truncation = 'auto'; return requestParams; } async function addImagesToInput(input, images, source) { for (const [image_id, imageData] of Object.entries(images)) { try { const processedImages = await (0, image_utils_js_1.resizeAndSplitForOpenAI)(imageData); const messageContent = []; if (processedImages.length === 1) { messageContent.push({ type: 'input_text', text: `This is [image #${image_id}] from the ${source}`, }); } else { messageContent.push({ type: 'input_text', text: `This is [image #${image_id}] from the ${source} (split into ${processedImages.length} parts, each up to 768px high)`, }); } for (const imageSegment of processedImages) { messageContent.push({ type: 'input_image', image_url: imageSegment, detail: 'high', }); } input.push({ type: 'message', role: 'user', content: messageContent, }); } catch (error) { console.error(`Error processing image ${image_id}:`, error); input.push({ type: 'message', role: 'user', content: [ { type: 'input_text', text: `This is [image #${image_id}] from the ${source} (raw image)`, }, { type: 'input_image', image_url: imageData, detail: 'high', }, ], }); } } return input; } class OpenAIProvider extends base_provider_js_1.BaseModelProvider { _client; apiKey; constructor(apiKey) { super('openai'); this.apiKey = apiKey; } get client() { if (!this._client) { const apiKey = this.apiKey || process.env.OPENAI_API_KEY; if (!apiKey) { throw new Error('Failed to initialize OpenAI client. Make sure OPENAI_API_KEY is set.'); } this._client = new openai_1.default({ apiKey: apiKey, }); } return this._client; } async createEmbedding(input, model, opts) { try { const options = { model, input: input, encoding_format: 'float', }; if (opts?.dimensions) { options.dimensions = opts.dimensions; } const response = await this.client.embeddings.create(options); const inputTokens = response.usage?.prompt_tokens || (typeof input === 'string' ? Math.ceil(input.length / 4) : input.reduce((sum, text) => sum + Math.ceil(text.length / 4), 0)); index_js_1.costTracker.addUsage({ model, input_tokens: inputTokens, output_tokens: 0, metadata: { dimensions: response.data[0]?.embedding.length || options.dimensions, }, }); if (Array.isArray(input) && input.length > 1) { return response.data.map(item => item.embedding); } else { return response.data[0].embedding; } } catch (error) { console.error('[OpenAI] Error generating embedding:', error); throw error; } } async createImage(prompt, model, opts) { try { model = model || 'gpt-image-1'; const number_of_images = opts?.n || 1; let quality = 'auto'; if (opts?.quality === 'standard') quality = 'medium'; else if (opts?.quality === 'hd') quality = 'high'; else if (opts?.quality === 'low' || opts?.quality === 'medium' || opts?.quality === 'high') { quality = opts.quality; } let size = 'auto'; if (opts?.size === 'square' || opts?.size === '1024x1024') { size = '1024x1024'; } else if (opts?.size === 'landscape' || opts?.size === '1536x1024') { size = '1536x1024'; } else if (opts?.size === 'portrait' || opts?.size === '1024x1536') { size = '1024x1536'; } const background = 'auto'; const source_images = opts?.source_images; console.log(`[OpenAI] Generating ${number_of_images} image(s) with model ${model}, prompt: "${prompt.substring(0, 100)}${prompt.length > 100 ? '...' : ''}"`); let response; if (source_images) { console.log('[OpenAI] Using images.edit with source_images'); const imageArray = Array.isArray(source_images) ? source_images : [source_images]; const imageFiles = []; for (const sourceImg of imageArray) { let imageFile; if (sourceImg.startsWith('http://') || sourceImg.startsWith('https://')) { const imageResponse = await fetch(sourceImg); const imageBuffer = await imageResponse.arrayBuffer(); imageFile = await (0, openai_1.toFile)(new Uint8Array(imageBuffer), `image_${imageFiles.length}.png`, { type: 'image/png', }); } else { let base64Data = sourceImg; if (sourceImg.startsWith('data:')) { base64Data = sourceImg.split(',')[1]; } const binaryData = Buffer.from(base64Data, 'base64'); imageFile = await (0, openai_1.toFile)(new Uint8Array(binaryData), `image_${imageFiles.length}.png`, { type: 'image/png', }); } imageFiles.push(imageFile); } let maskFile; if (opts?.mask) { let maskBase64 = opts.mask; if (opts.mask.startsWith('data:')) { maskBase64 = opts.mask.split(',')[1]; } const maskBinary = Buffer.from(maskBase64, 'base64'); maskFile = await (0, openai_1.toFile)(new Uint8Array(maskBinary), 'mask.png', { type: 'image/png', }); } const editParams = { model, prompt, image: imageFiles, n: number_of_images, quality, size, }; if (maskFile) { editParams.mask = maskFile; } response = await this.client.images.edit(editParams); } else { response = await this.client.images.generate({ model, prompt, n: number_of_images, background, quality, size, moderation: 'low', output_format: 'png', }); } if (response.data && response.data.length > 0) { const perImageCost = this.getImageCost(model, quality); index_js_1.costTracker.addUsage({ model, image_count: response.data.length, metadata: { quality, size, cost_per_image: perImageCost, is_edit: !!source_images, }, }); } const imageDataUrls = response.data.map(item => { const imageData = item?.b64_json; if (!imageData) { throw new Error('No image data returned from OpenAI'); } return `data:image/png;base64,${imageData}`; }); if (imageDataUrls.length === 0) { throw new Error('No images returned from OpenAI'); } return imageDataUrls; } catch (error) { console.error('[OpenAI] Error generating image:', error); throw error; } } getImageCost(model, quality) { if (model === 'gpt-image-1') { if (quality === 'high') { return 0.08; } else if (quality === 'medium' || quality === 'auto') { return 0.04; } else if (quality === 'low') { return 0.02; } } return 0.04; } async createVoice(text, model, opts) { try { const voice = opts?.voice || 'alloy'; const speed = opts?.speed || 1.0; let response_format = opts?.response_format || 'mp3'; if (response_format.includes('pcm')) { response_format = 'pcm'; } if (response_format.includes('mp3')) { response_format = 'mp3'; } let instructions = opts?.instructions || undefined; if (opts?.affect) { instructions = `Sound ${opts.affect}${instructions ? ' and ' + instructions : ''}`; } const response = await this.client.audio.speech.create({ model, input: text, instructions, voice, speed, response_format: response_format, }); const characterCount = text.length; const costPerThousandChars = model === 'tts-1-hd' ? 0.03 : 0.015; const cost = (characterCount / 1000) * costPerThousandChars; index_js_1.costTracker.addUsage({ model, cost, metadata: { character_count: characterCount, voice, format: response_format, }, }); if (opts?.stream) { const nodeStream = response.body; return new ReadableStream({ async start(controller) { for await (const chunk of nodeStream) { controller.enqueue(new Uint8Array(chunk)); } controller.close(); }, }); } else { const buffer = await response.arrayBuffer(); return buffer; } } catch (error) { console.error('[OpenAI] Error generating speech:', error); throw error; } } async *createResponseStream(messages, model, agent) { const { getToolsFromAgent } = await Promise.resolve().then(() => __importStar(require("../utils/agent.cjs"))); const tools = agent ? await getToolsFromAgent(agent) : []; const settings = agent?.modelSettings; let requestId; try { let input = []; for (const messageFull of messages) { let message = { ...messageFull }; const originalModel = message.model; const allowedMessageProps = [ 'type', 'role', 'content', 'status', 'id', 'name', 'thinking_id', 'signature', 'arguments', 'call_id', 'output', 'images', 'image_detail', 'image_url', 'detail', 'action', 'command', 'env', 'timeout_ms', 'user', 'working_directory', 'server_label', 'tools', 'error', 'approval_request_id', 'approve', 'reason', 'acknowledged_safety_checks', 'annotations', 'input_schema', 'description', 'result', 'generated', ]; Object.keys(message).forEach(key => { if (!allowedMessageProps.includes(key)) { delete message[key]; } }); if (message.type === 'thinking') { if (model.startsWith('o') && message.thinking_id && model === originalModel) { const match = message.thinking_id.match(/^(rs_[A-Za-z0-9]+)-(\d)$/); if (match) { const reasoningId = match[1]; const summaryIndex = parseInt(match[2], 10); const summaryText = typeof message.content === 'string' ? message.content : JSON.stringify(message.content); const summaryEntry = { type: 'summary_text', text: summaryText, }; const existingIndex = input.findIndex((item) => item.type === 'reasoning' && item.id === reasoningId); if (existingIndex !== -1) { const existingItem = input[existingIndex]; if (!existingItem.summary) { existingItem.summary = []; } existingItem.summary[summaryIndex] = summaryEntry; input[existingIndex] = existingItem; } else { const newItem = { type: 'reasoning', id: reasoningId, summary: [], }; newItem.summary[summaryIndex] = summaryEntry; input.push(newItem); } continue; } } input.push({ type: 'message', role: 'user', content: 'Thinking: ' + message.content, status: message.status || 'completed', }); continue; } if (message.type === 'function_call') { if (message.id && (!message.id.startsWith('fc_') || model !== originalModel)) { const { id, ...rest } = message; message = rest; if (!message.call_id && id) { message.call_id = id; } } message.status = message.status || 'completed'; input.push(message); continue; } if (message.type === 'function_call_output') { const { name, id, ...messageToAdd } = message; input = await (0, image_utils_js_1.appendMessageWithImage)(model, input, messageToAdd, 'output', addImagesToInput, `function call output of ${message.name}`); continue; } if ((message.type ?? 'message') === 'message' && 'content' in message) { if ('id' in message && message.id && (!message.id.startsWith('msg_') || model !== originalModel)) { const { id, ...rest } = message; message = rest; } input = await (0, image_utils_js_1.appendMessageWithImage)(model, input, { ...message, type: 'message' }, 'content', addImagesToInput); continue; } } if (input.length === 0) { input.push({ type: 'message', role: 'user', content: 'Please proceed.', }); } let requestParams = { model, stream: true, user: 'magi', input, }; if (!model.startsWith('o3-')) { if (settings?.temperature !== undefined) { requestParams.temperature = settings.temperature; } if (settings?.top_p !== undefined) { requestParams.top_p = settings.top_p; } } const REASONING_EFFORT_CONFIGS = ['low', 'medium', 'high']; let hasEffortSuffix = false; for (const effort of REASONING_EFFORT_CONFIGS) { const suffix = `-${effort}`; if (model.endsWith(suffix)) { hasEffortSuffix = true; requestParams.reasoning = { effort: effort, summary: 'auto', }; model = model.slice(0, -suffix.length); requestParams.model = model; break; } } if (model.startsWith('o') && !hasEffortSuffix) { requestParams.reasoning = { effort: 'high', summary: 'auto', }; } if (settings?.tool_choice) { if (typeof settings.tool_choice === 'object' && settings.tool_choice?.type === 'function' && settings.tool_choice?.function?.name) { requestParams.tool_choice = { type: settings.tool_choice.type, name: settings.tool_choice.function.name, }; } else if (typeof settings.tool_choice === 'string') { requestParams.tool_choice = settings.tool_choice; } } if (settings?.json_schema?.schema) { const { schema, ...wrapperWithoutSchema } = settings.json_schema; requestParams.text = { format: { ...wrapperWithoutSchema, schema: processSchemaForOpenAI(schema), }, }; } if (tools && tools.length > 0) { requestParams = await convertToOpenAITools(requestParams, tools); } requestId = (0, llm_logger_js_1.log_llm_request)(agent.agent_id, 'openai', model, requestParams); const { waitWhilePaused } = await Promise.resolve().then(() => __importStar(require("../utils/pause_controller.cjs"))); await waitWhilePaused(100, agent.abortSignal); const stream = await this.client.responses.create(requestParams); const messagePositions = new Map(); const reasoningPositions = new Map(); const reasoningAggregates = new Map(); const deltaBuffers = new Map(); const citationTracker = (0, citation_tracker_js_1.createCitationTracker)(); const toolCallStates = new Map(); const events = []; try { for await (const event of stream) { events.push(event); if ((0, pause_controller_js_1.isPaused)()) { await waitWhilePaused(100, agent.abortSignal); } if (event.type === 'response.in_progress') { } else if (event.type === 'response.completed' && event.response?.usage) { const calculatedUsage = index_js_1.costTracker.addUsage({ model, input_tokens: event.response.usage.input_tokens || 0, output_tokens: event.response.usage.output_tokens || 0, cached_tokens: event.response.usage.input_tokens_details?.cached_tokens || 0, metadata: { reasoning_tokens: event.response.usage.output_tokens_details?.reasoning_tokens || 0, }, }); if (!(0, event_controller_js_1.hasEventHandler)()) { yield { type: 'cost_update', usage: { ...calculatedUsage, total_tokens: event.response.usage.input_tokens + event.response.usage.output_tokens, }, }; } } else if (event.type === 'response.failed' && event.response?.error) { const errorInfo = event.response.error; (0, llm_logger_js_1.log_llm_error)(requestId, errorInfo); console.error(`Response ${event.response.id} failed: [${errorInfo.code}] ${errorInfo.message}`); yield { type: 'error', error: `OpenAI response failed: [${errorInfo.code}] ${errorInfo.message}`, }; } else if (event.type === 'response.incomplete' && event.response?.incomplete_details) { const reason = event.response.incomplete_details.reason; (0, llm_logger_js_1.log_llm_error)(requestId, 'OpenAI response incomplete: ' + reason); console.warn(`Response ${event.response.id} incomplete: ${reason}`); yield { type: 'error', error: 'OpenAI response incomplete: ' + reason, }; } else if (event.type === 'response.output_item.added' && event.item) { if (event.item.type === 'function_call') { if (!toolCallStates.has(event.item.id)) { toolCallStates.set(event.item.id, { id: event.item.id, call_id: event.item.call_id, type: 'function', function: { name: event.item.name || '', arguments: '', }, }); } else { console.warn(`Received output_item.added for already tracked function call ID: ${event.item.id}`); } } } else if (event.type === 'response.output_item.done' && event.item) { if (event.item.type === 'reasoning' && !event.item.summary.length) { yield { type: 'message_complete', content: '', message_id: event.item.id + '-0', thinking_content: '', }; } } else if (event.type === 'response.content_part.added' && event.part) { } else if (event.type === 'response.content_part.done' && event.part) { } else if (event.type === 'response.output_text.delta' && event.delta) { const itemId = event.item_id; let position = messagePositions.get(itemId) ?? 0; for (const ev of (0, delta_buffer_js_1.bufferDelta)(deltaBuffers, itemId, event.delta, content => ({ type: 'message_delta', content, message_id: itemId, order: position++, }))) { yield ev; } messagePositions.set(itemId, position); } else if (event.type === 'response.output_text.annotation.added' && event.annotation) { const eventData = event; if (eventData.annotation?.type === 'url_citation' && eventData.annotation.url) { const marker = (0, citation_tracker_js_1.formatCitation)(citationTracker, { title: eventData.annotation.title || eventData.annotation.url, url: eventData.annotation.url, }); let position = messagePositions.get(eventData.item_id) ?? 0; yield { type: 'message_delta', content: marker, message_id: eventData.item_id, order: position++, }; messagePositions.set(eventData.item_id, position); } else { } } else if (event.type === 'response.output_text.done' && event.text !== undefined) { const itemId = event.item_id; let finalText = event.text; if (citationTracker.citations.size > 0) { const footnotes = (0, citation_tracker_js_1.generateFootnotes)(citationTracker); finalText += footnotes; } yield { type: 'message_complete', content: finalText, message_id: itemId, }; messagePositions.delete(itemId); } else if (event.type === 'response.refusal.delta' && event.delta) { } else if (event.type === 'response.refusal.done' && event.refusal) { (0, llm_logger_js_1.log_llm_error)(requestId, 'OpenAI refusal error: ' + event.refusal); console.error(`Refusal for item ${event.item_id}: ${event.refusal}`); yield { type: 'error', error: 'OpenAI refusal error: ' + event.refusal, }; } else if (event.type === 'response.function_call_arguments.delta' && event.delta) { const currentCall = toolCallStates.get(event.item_id); if (currentCall) { currentCall.function.arguments += event.delta; } else { console.warn(`Received function_call_arguments.delta for unknown item_id: ${event.item_id}`); } } else if (event.type === 'response.function_call_arguments.done' && event.arguments !== undefined) { const currentCall = toolCallStates.get(event.item_id); if (currentCall) { currentCall.function.arguments = event.arguments; yield { type: 'tool_start', tool_call: currentCall, }; toolCallStates.delete(event.item_id); } else { console.warn(`Received function_call_arguments.done for unknown or already yielded item_id: ${event.item_id}`); } } else if (event.type === 'response.file_search_call.in_progress') { } else if (event.type === 'response.file_search_call.searching') { } else if (event.type === 'response.file_search_call.completed') { } else if (event.type === 'response.web_search_call.in_progress') { } else if (event.type === 'response.web_search_call.searching') { } else if (event.type === 'response.web_search_call.completed') { } else if (event.type === 'response.reasoning_summary_part.added') { } else if (event.type === 'response.reasoning_summary_part.done') { } else if (event.type === 'response.reasoning_summary_text.delta' && event.delta) { const itemId = event.item_id + '-' + event.summary_index; let position = reasoningPositions.get(itemId) ?? 0; reasoningAggregates.set(itemId, reasoningAggregates.get(itemId) + event.delta); yield { type: 'message_delta', content: '', message_id: itemId, thinking_content: event.delta, order: position++, }; reasoningPositions.set(itemId, position); } else if (event.type === 'response.reasoning_summary_text.done' && event.text !== undefined) { const itemId = event.item_id + '-' + event.summary_index; const aggregatedThinking = event.text; yield { type: 'message_complete', content: '', message_id: itemId, thinking_content: aggregatedThinking, }; reasoningPositions.delete(itemId); reasoningAggregates.delete(itemId); } else if (event.type === 'error' && event.message) { (0, llm_logger_js_1.log_llm_error)(requestId, event); console.error(`API Stream Error (${model}): [${event.code || 'N/A'}] ${event.message}`); yield { type: 'error', error: `OpenAI API error (${model}): [${event.code || 'N/A'}] ${event.message}`, }; } } } catch (streamError) { (0, llm_logger_js_1.log_llm_error)(requestId, streamError); console.error('Error processing response stream:', streamError); yield { type: 'error', error: `OpenAI stream request error (${model}): ${streamError}`, }; } finally { if (toolCallStates.size > 0) { console.warn(`Stream ended with ${toolCallStates.size} incomplete tool call(s).`); for (const [, toolCall] of toolCallStates.entries()) { if (toolCall.function.name) { yield { type: 'tool_start', tool_call: toolCall, }; } } toolCallStates.clear(); } for (const ev of (0, delta_buffer_js_1.flushBufferedDeltas)(deltaBuffers, (id, content) => { let position = messagePositions.get(id) ?? 0; position++; messagePositions.set(id, position); return { type: 'message_delta', content, message_id: id, order: position, }; })) { yield ev; } messagePositions.clear(); (0, llm_logger_js_1.log_llm_response)(requestId, events); } } catch (error) { (0, llm_logger_js_1.log_llm_error)(requestId, error); console.error('Error in OpenAI streaming response:', error); yield { type: 'error', error: 'OpenAI streaming error: ' + (error instanceof Error ? error.stack : String(error)), }; } } async *createTranscription(audio, agent, model, opts) { const transcriptionModels = ['gpt-4o-transcribe', 'gpt-4o-mini-transcribe', 'whisper-1']; if (!transcriptionModels.includes(model)) { throw new Error(`Model ${model} does not support transcription. Supported models: ${transcriptionModels.join(', ')}`); } let ws = null; let isConnected = false; let connectionError = null; try { const { WebSocket } = await Promise.resolve().then(() => __importStar(require('ws'))); const apiKey = this.apiKey || process.env.OPENAI_API_KEY; if (!apiKey) { throw new Error('Failed to initialize OpenAI transcription. Make sure OPENAI_API_KEY is set.'); } const wsUrl = 'wss://api.openai.com/v1/realtime?intent=transcription'; ws = new WebSocket(wsUrl, { headers: { Authorization: 'Bearer ' + apiKey, 'OpenAI-Beta': 'realtime=v1', }, }); const transcriptEvents = []; const connectionPromise = new Promise((resolve, reject) => { const timeout = setTimeout(() => { reject(new Error('Connection timeout')); }, 10000); ws.on('open', () => { clearTimeout(timeout); isConnected = true; resolve(); }); ws.on('error', error => { clearTimeout(timeout); connectionError = error; reject(error); }); }); ws.on('message', (data) => { try { const event = JSON.parse(data.toString()); console.dir(event, { depth: null }); switch (event.type) { case 'transcription_session.created': case 'session.created': { const sessionUpdate = { type: 'transcription_session.update', session: { input_audio_format: opts?.audioFormat?.encoding === 'pcm' ? 'pcm16' : 'pcm16', input_audio_transcription: { model: model, prompt: opts?.prompt || 'You are a helpful assistant.', language: opts?.language || 'en', }, turn_detection: opts?.vad === false ? null : { type: 'semantic_vad', }, input_audio_noise_reduction: opts?.noiseReduction === null ? null : { type: opts?.noiseReduction || 'far_field', }, }, }; ws.send(JSON.stringify(sessionUpdate)); break; } case 'conversation.item.input_audio_transcription.delta': { if (model !== 'whisper-1') { const deltaEvent = { type: 'transcription_turn_delta', timestamp: new Date().toISOString(), delta: event.delta, partial: true, }; transcriptEvents.push(deltaEvent); } break; } case 'conversation.item.input_audio_transcription.completed': { const completeText = event.transcript; const turnEvent = { type: 'transcription_turn_complete', timestamp: new Date().toISOString(), text: completeText, }; transcriptEvents.push(turnEvent); break; } case 'input_audio_buffer.speech_started': { const previewEvent = { type: 'transcription_turn_start', timestamp: new Date().toISOString(), }; transcriptEvents.push(previewEvent); break; } case 'input_audio_buffer.speech_stopped': { break; } case 'error': { const errorEvent = { type: 'error', timestamp: new Date().toISOString(), error: event.error?.message || 'Unknown error', }; transcriptEvents.push(errorEvent); break; } } } catch (error) { console.error('[OpenAI] Error processing message:', error); } }); ws.on('close', () => { isConnected = false; }); await connectionPromise; const audioStream = normalizeAudioSource(audio); const reader = audioStream.getReader(); try { while (true) { const { done, value } = await reader.read(); if (done) break; if (value && ws && isConnected) { const audioEvent = { type: 'input_audio_buffer.append', audio: Buffer.from(value).toString('base64'), }; ws.send(JSON.stringify(audioEvent)); } if (transcriptEvents.length > 0) { const events = transcriptEvents.splice(0, transcriptEvents.length); for (const event of events) { yield event; } } if (connectionError) { throw connectionError; } } if (opts?.vad === false && ws && isConnected) { ws.send(JSON.stringify({ type: 'input_audio_buffer.commit' })); } await new Promise(resolve => setTimeout(resolve, 1000)); if (transcriptEvents.length > 0) { const events = transcriptEvents.splice(0, transcriptEvents.length); for (const event of events) { yield event; } } const completeEvent = { type: 'transcription_complete', timestamp: new Date().toISOString(), }; yield completeEvent; } finally { reader.releaseLock(); if (ws && ws.readyState === ws.OPEN) { ws.close(); } } } catch (error) { console.error('[OpenAI] Transcription error:', error); const errorEvent = { type: 'error', timestamp: new Date().toISOString(), error: error instanceof Error ? error.message : 'Transcription failed', }; yield errorEvent; } } }