@just-every/ensemble
Version:
LLM provider abstraction layer with unified streaming interface
1,128 lines (1,127 loc) • 50.8 kB
JavaScript
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.openaiProvider = exports.OpenAIProvider = void 0;
const base_provider_js_1 = require("./base_provider.cjs");
const openai_1 = __importStar(require("openai"));
const index_js_1 = require("../index.cjs");
const llm_logger_js_1 = require("../utils/llm_logger.cjs");
const pause_controller_js_1 = require("../utils/pause_controller.cjs");
const image_utils_js_1 = require("../utils/image_utils.cjs");
const delta_buffer_js_1 = require("../utils/delta_buffer.cjs");
const citation_tracker_js_1 = require("../utils/citation_tracker.cjs");
const event_controller_js_1 = require("../utils/event_controller.cjs");
const BROWSER_WIDTH = 1024;
const BROWSER_HEIGHT = 1536;
function processSchemaForOpenAI(schema, originalProperties) {
const processedSchema = JSON.parse(JSON.stringify(schema));
const processSchemaRecursively = (schema) => {
if (!schema || typeof schema !== 'object')
return;
if (schema.optional === true) {
delete schema.optional;
}
if (Array.isArray(schema.oneOf)) {
schema.anyOf = schema.oneOf;
delete schema.oneOf;
}
const unsupportedKeywords = [
'minimum',
'maximum',
'minItems',
'maxItems',
'minLength',
'maxLength',
'pattern',
'format',
'multipleOf',
'patternProperties',
'unevaluatedProperties',
'propertyNames',
'minProperties',
'maxProperties',
'unevaluatedItems',
'contains',
'minContains',
'maxContains',
'uniqueItems',
'default',
];
unsupportedKeywords.forEach(keyword => {
if (schema[keyword] !== undefined) {
delete schema[keyword];
}
});
const isObject = schema.type === 'object' || (schema.type === undefined && schema.properties !== undefined);
for (const key of ['anyOf', 'allOf']) {
if (Array.isArray(schema[key])) {
schema[key].forEach((variantSchema) => processSchemaRecursively(variantSchema));
}
}
if (isObject && schema.properties) {
for (const propName in schema.properties) {
processSchemaRecursively(schema.properties[propName]);
}
}
if (schema.type === 'array' && schema.items !== undefined) {
if (Array.isArray(schema.items)) {
schema.items.forEach((itemSchema) => processSchemaRecursively(itemSchema));
}
else if (typeof schema.items === 'object') {
processSchemaRecursively(schema.items);
}
}
if (isObject) {
schema.additionalProperties = false;
if (schema.properties) {
const currentRequired = Object.keys(schema.properties);
if (currentRequired.length > 0) {
schema.required = currentRequired;
}
else {
delete schema.required;
}
}
else {
delete schema.required;
}
}
};
processSchemaRecursively(processedSchema);
if (originalProperties) {
const topLevelRequired = [];
for (const propName in originalProperties) {
if (!originalProperties[propName].optional) {
topLevelRequired.push(propName);
}
}
if (topLevelRequired.length > 0) {
processedSchema.required = topLevelRequired;
}
else {
delete processedSchema.required;
}
}
if (processedSchema.properties && processedSchema.additionalProperties === undefined) {
processedSchema.additionalProperties = false;
}
return processedSchema;
}
async function resolveAsyncEnums(params) {
if (!params || typeof params !== 'object') {
return params;
}
const resolved = { ...params };
if (resolved.properties) {
const resolvedProps = {};
for (const [key, value] of Object.entries(resolved.properties)) {
if (value && typeof value === 'object') {
const propCopy = { ...value };
if (typeof propCopy.enum === 'function') {
try {
const enumValue = await propCopy.enum();
if (Array.isArray(enumValue) && enumValue.length > 0) {
propCopy.enum = enumValue;
}
else {
delete propCopy.enum;
}
}
catch {
delete propCopy.enum;
}
}
resolvedProps[key] = await resolveAsyncEnums(propCopy);
}
else {
resolvedProps[key] = value;
}
}
resolved.properties = resolvedProps;
}
return resolved;
}
async function convertToOpenAITools(requestParams, tools) {
requestParams.tools = await Promise.all(tools.map(async (tool) => {
if (tool.definition.function.name === 'openai_web_search') {
delete requestParams.reasoning;
return {
type: 'web_search_preview',
search_context_size: 'high',
};
}
const resolvedParams = await resolveAsyncEnums(tool.definition.function.parameters);
const originalToolProperties = resolvedParams.properties;
const paramSchema = processSchemaForOpenAI(resolvedParams, originalToolProperties);
return {
type: 'function',
name: tool.definition.function.name,
description: tool.definition.function.description,
parameters: paramSchema,
strict: true,
};
}));
if (requestParams.model === 'computer-use-preview') {
requestParams.tools.push({
type: 'computer_use_preview',
display_width: BROWSER_WIDTH,
display_height: BROWSER_HEIGHT,
environment: 'browser',
});
}
requestParams.truncation = 'auto';
return requestParams;
}
async function addImagesToInput(input, images, source) {
for (const [image_id, imageData] of Object.entries(images)) {
try {
const processedImages = await (0, image_utils_js_1.resizeAndSplitForOpenAI)(imageData);
const messageContent = [];
if (processedImages.length === 1) {
messageContent.push({
type: 'input_text',
text: `This is [image #${image_id}] from the ${source}`,
});
}
else {
messageContent.push({
type: 'input_text',
text: `This is [image #${image_id}] from the ${source} (split into ${processedImages.length} parts, each up to 768px high)`,
});
}
for (const imageSegment of processedImages) {
messageContent.push({
type: 'input_image',
image_url: imageSegment,
detail: 'high',
});
}
input.push({
type: 'message',
role: 'user',
content: messageContent,
});
}
catch (error) {
console.error(`Error processing image ${image_id}:`, error);
input.push({
type: 'message',
role: 'user',
content: [
{
type: 'input_text',
text: `This is [image #${image_id}] from the ${source} (raw image)`,
},
{
type: 'input_image',
image_url: imageData,
detail: 'high',
},
],
});
}
}
return input;
}
class OpenAIProvider extends base_provider_js_1.BaseModelProvider {
_client;
apiKey;
constructor(apiKey) {
super('openai');
this.apiKey = apiKey;
}
get client() {
if (!this._client) {
const apiKey = this.apiKey || process.env.OPENAI_API_KEY;
if (!apiKey) {
throw new Error('Failed to initialize OpenAI client. Make sure OPENAI_API_KEY is set.');
}
this._client = new openai_1.default({
apiKey: apiKey,
});
}
return this._client;
}
async createEmbedding(input, model, opts) {
try {
const options = {
model,
input: input,
encoding_format: 'float',
};
if (opts?.dimensions) {
options.dimensions = opts.dimensions;
}
const response = await this.client.embeddings.create(options);
const inputTokens = response.usage?.prompt_tokens ||
(typeof input === 'string'
? Math.ceil(input.length / 4)
: input.reduce((sum, text) => sum + Math.ceil(text.length / 4), 0));
index_js_1.costTracker.addUsage({
model,
input_tokens: inputTokens,
output_tokens: 0,
metadata: {
dimensions: response.data[0]?.embedding.length || options.dimensions,
},
});
if (Array.isArray(input) && input.length > 1) {
return response.data.map(item => item.embedding);
}
else {
return response.data[0].embedding;
}
}
catch (error) {
console.error('[OpenAI] Error generating embedding:', error);
throw error;
}
}
async createImage(prompt, model, opts) {
try {
model = model || 'gpt-image-1';
const number_of_images = opts?.n || 1;
let quality = 'auto';
if (opts?.quality === 'standard')
quality = 'medium';
else if (opts?.quality === 'hd')
quality = 'high';
else if (opts?.quality === 'low' || opts?.quality === 'medium' || opts?.quality === 'high') {
quality = opts.quality;
}
let size = 'auto';
if (opts?.size === 'square' || opts?.size === '1024x1024') {
size = '1024x1024';
}
else if (opts?.size === 'landscape' || opts?.size === '1536x1024') {
size = '1536x1024';
}
else if (opts?.size === 'portrait' || opts?.size === '1024x1536') {
size = '1024x1536';
}
const background = 'auto';
const source_images = opts?.source_images;
console.log(`[OpenAI] Generating ${number_of_images} image(s) with model ${model}, prompt: "${prompt.substring(0, 100)}${prompt.length > 100 ? '...' : ''}"`);
let response;
if (source_images) {
console.log('[OpenAI] Using images.edit with source_images');
const imageArray = Array.isArray(source_images) ? source_images : [source_images];
const imageFiles = [];
for (const sourceImg of imageArray) {
let imageFile;
if (sourceImg.startsWith('http://') || sourceImg.startsWith('https://')) {
const imageResponse = await fetch(sourceImg);
const imageBuffer = await imageResponse.arrayBuffer();
imageFile = await (0, openai_1.toFile)(new Uint8Array(imageBuffer), `image_${imageFiles.length}.png`, {
type: 'image/png',
});
}
else {
let base64Data = sourceImg;
if (sourceImg.startsWith('data:')) {
base64Data = sourceImg.split(',')[1];
}
const binaryData = Buffer.from(base64Data, 'base64');
imageFile = await (0, openai_1.toFile)(new Uint8Array(binaryData), `image_${imageFiles.length}.png`, {
type: 'image/png',
});
}
imageFiles.push(imageFile);
}
let maskFile;
if (opts?.mask) {
let maskBase64 = opts.mask;
if (opts.mask.startsWith('data:')) {
maskBase64 = opts.mask.split(',')[1];
}
const maskBinary = Buffer.from(maskBase64, 'base64');
maskFile = await (0, openai_1.toFile)(new Uint8Array(maskBinary), 'mask.png', {
type: 'image/png',
});
}
const editParams = {
model,
prompt,
image: imageFiles,
n: number_of_images,
quality,
size,
};
if (maskFile) {
editParams.mask = maskFile;
}
response = await this.client.images.edit(editParams);
}
else {
response = await this.client.images.generate({
model,
prompt,
n: number_of_images,
background,
quality,
size,
moderation: 'low',
output_format: 'png',
});
}
if (response.data && response.data.length > 0) {
const perImageCost = this.getImageCost(model, quality);
index_js_1.costTracker.addUsage({
model,
image_count: response.data.length,
metadata: {
quality,
size,
cost_per_image: perImageCost,
is_edit: !!source_images,
},
});
}
const imageDataUrls = response.data.map(item => {
const imageData = item?.b64_json;
if (!imageData) {
throw new Error('No image data returned from OpenAI');
}
return `data:image/png;base64,${imageData}`;
});
if (imageDataUrls.length === 0) {
throw new Error('No images returned from OpenAI');
}
return imageDataUrls;
}
catch (error) {
console.error('[OpenAI] Error generating image:', error);
throw error;
}
}
getImageCost(model, quality) {
if (model === 'gpt-image-1') {
if (quality === 'high') {
return 0.08;
}
else if (quality === 'medium' || quality === 'auto') {
return 0.04;
}
else if (quality === 'low') {
return 0.02;
}
}
return 0.04;
}
async createVoice(text, model, opts) {
try {
const voice = opts?.voice || 'alloy';
const speed = opts?.speed || 1.0;
let response_format = opts?.response_format || 'mp3';
if (response_format.includes('pcm')) {
response_format = 'pcm';
}
if (response_format.includes('mp3')) {
response_format = 'mp3';
}
let instructions = opts?.instructions || undefined;
if (opts?.affect) {
instructions = `Sound ${opts.affect}${instructions ? ' and ' + instructions : ''}`;
}
const response = await this.client.audio.speech.create({
model,
input: text,
instructions,
voice,
speed,
response_format: response_format,
});
const characterCount = text.length;
const costPerThousandChars = model === 'tts-1-hd' ? 0.03 : 0.015;
const cost = (characterCount / 1000) * costPerThousandChars;
index_js_1.costTracker.addUsage({
model,
cost,
metadata: {
character_count: characterCount,
voice,
format: response_format,
},
});
if (opts?.stream) {
const nodeStream = response.body;
return new ReadableStream({
async start(controller) {
for await (const chunk of nodeStream) {
controller.enqueue(new Uint8Array(chunk));
}
controller.close();
},
});
}
else {
const buffer = await response.arrayBuffer();
return buffer;
}
}
catch (error) {
console.error('[OpenAI] Error generating speech:', error);
throw error;
}
}
async *createResponseStream(messages, model, agent) {
const { getToolsFromAgent } = await Promise.resolve().then(() => __importStar(require("../utils/agent.cjs")));
const tools = agent ? await getToolsFromAgent(agent) : [];
const settings = agent?.modelSettings;
let requestId;
try {
let input = [];
for (const messageFull of messages) {
let message = { ...messageFull };
const originalModel = message.model;
const allowedMessageProps = [
'type',
'role',
'content',
'status',
'id',
'name',
'thinking_id',
'signature',
'arguments',
'call_id',
'output',
'images',
'image_detail',
'image_url',
'detail',
'action',
'command',
'env',
'timeout_ms',
'user',
'working_directory',
'server_label',
'tools',
'error',
'approval_request_id',
'approve',
'reason',
'acknowledged_safety_checks',
'annotations',
'input_schema',
'description',
'result',
'generated',
];
Object.keys(message).forEach(key => {
if (!allowedMessageProps.includes(key)) {
delete message[key];
}
});
if (message.type === 'thinking') {
if (model.startsWith('o') && message.thinking_id && model === originalModel) {
const match = message.thinking_id.match(/^(rs_[A-Za-z0-9]+)-(\d)$/);
if (match) {
const reasoningId = match[1];
const summaryIndex = parseInt(match[2], 10);
const summaryText = typeof message.content === 'string' ? message.content : JSON.stringify(message.content);
const summaryEntry = {
type: 'summary_text',
text: summaryText,
};
const existingIndex = input.findIndex((item) => item.type === 'reasoning' && item.id === reasoningId);
if (existingIndex !== -1) {
const existingItem = input[existingIndex];
if (!existingItem.summary) {
existingItem.summary = [];
}
existingItem.summary[summaryIndex] = summaryEntry;
input[existingIndex] = existingItem;
}
else {
const newItem = {
type: 'reasoning',
id: reasoningId,
summary: [],
};
newItem.summary[summaryIndex] = summaryEntry;
input.push(newItem);
}
continue;
}
}
input.push({
type: 'message',
role: 'user',
content: 'Thinking: ' + message.content,
status: message.status || 'completed',
});
continue;
}
if (message.type === 'function_call') {
if (message.id && (!message.id.startsWith('fc_') || model !== originalModel)) {
const { id, ...rest } = message;
message = rest;
if (!message.call_id && id) {
message.call_id = id;
}
}
message.status = message.status || 'completed';
input.push(message);
continue;
}
if (message.type === 'function_call_output') {
const { name, id, ...messageToAdd } = message;
input = await (0, image_utils_js_1.appendMessageWithImage)(model, input, messageToAdd, 'output', addImagesToInput, `function call output of ${message.name}`);
continue;
}
if ((message.type ?? 'message') === 'message' && 'content' in message) {
if ('id' in message && message.id && (!message.id.startsWith('msg_') || model !== originalModel)) {
const { id, ...rest } = message;
message = rest;
}
input = await (0, image_utils_js_1.appendMessageWithImage)(model, input, { ...message, type: 'message' }, 'content', addImagesToInput);
continue;
}
}
if (input.length === 0) {
input.push({
type: 'message',
role: 'user',
content: 'Please proceed.',
});
}
let requestParams = {
model,
stream: true,
user: 'magi',
input,
};
if (!model.startsWith('o3-')) {
if (settings?.temperature !== undefined) {
requestParams.temperature = settings.temperature;
}
if (settings?.top_p !== undefined) {
requestParams.top_p = settings.top_p;
}
}
const REASONING_EFFORT_CONFIGS = ['low', 'medium', 'high'];
let hasEffortSuffix = false;
for (const effort of REASONING_EFFORT_CONFIGS) {
const suffix = `-${effort}`;
if (model.endsWith(suffix)) {
hasEffortSuffix = true;
requestParams.reasoning = {
effort: effort,
summary: 'auto',
};
model = model.slice(0, -suffix.length);
requestParams.model = model;
break;
}
}
if (model.startsWith('o') && !hasEffortSuffix) {
requestParams.reasoning = {
effort: 'high',
summary: 'auto',
};
}
if (settings?.tool_choice) {
if (typeof settings.tool_choice === 'object' &&
settings.tool_choice?.type === 'function' &&
settings.tool_choice?.function?.name) {
requestParams.tool_choice = {
type: settings.tool_choice.type,
name: settings.tool_choice.function.name,
};
}
else if (typeof settings.tool_choice === 'string') {
requestParams.tool_choice = settings.tool_choice;
}
}
if (settings?.json_schema?.schema) {
const { schema, ...wrapperWithoutSchema } = settings.json_schema;
requestParams.text = {
format: {
...wrapperWithoutSchema,
schema: processSchemaForOpenAI(schema),
},
};
}
if (tools && tools.length > 0) {
requestParams = await convertToOpenAITools(requestParams, tools);
}
requestId = (0, llm_logger_js_1.log_llm_request)(agent.agent_id, 'openai', model, requestParams);
const { waitWhilePaused } = await Promise.resolve().then(() => __importStar(require("../utils/pause_controller.cjs")));
await waitWhilePaused(100, agent.abortSignal);
const stream = await this.client.responses.create(requestParams);
const messagePositions = new Map();
const reasoningPositions = new Map();
const reasoningAggregates = new Map();
const deltaBuffers = new Map();
const citationTracker = (0, citation_tracker_js_1.createCitationTracker)();
const toolCallStates = new Map();
const events = [];
try {
for await (const event of stream) {
events.push(event);
if ((0, pause_controller_js_1.isPaused)()) {
await waitWhilePaused(100, agent.abortSignal);
}
if (event.type === 'response.in_progress') {
}
else if (event.type === 'response.completed' && event.response?.usage) {
const calculatedUsage = index_js_1.costTracker.addUsage({
model,
input_tokens: event.response.usage.input_tokens || 0,
output_tokens: event.response.usage.output_tokens || 0,
cached_tokens: event.response.usage.input_tokens_details?.cached_tokens || 0,
metadata: {
reasoning_tokens: event.response.usage.output_tokens_details?.reasoning_tokens || 0,
},
});
if (!(0, event_controller_js_1.hasEventHandler)()) {
yield {
type: 'cost_update',
usage: {
...calculatedUsage,
total_tokens: event.response.usage.input_tokens + event.response.usage.output_tokens,
},
};
}
}
else if (event.type === 'response.failed' && event.response?.error) {
const errorInfo = event.response.error;
(0, llm_logger_js_1.log_llm_error)(requestId, errorInfo);
console.error(`Response ${event.response.id} failed: [${errorInfo.code}] ${errorInfo.message}`);
yield {
type: 'error',
error: `OpenAI response failed: [${errorInfo.code}] ${errorInfo.message}`,
};
}
else if (event.type === 'response.incomplete' && event.response?.incomplete_details) {
const reason = event.response.incomplete_details.reason;
(0, llm_logger_js_1.log_llm_error)(requestId, 'OpenAI response incomplete: ' + reason);
console.warn(`Response ${event.response.id} incomplete: ${reason}`);
yield {
type: 'error',
error: 'OpenAI response incomplete: ' + reason,
};
}
else if (event.type === 'response.output_item.added' && event.item) {
if (event.item.type === 'function_call') {
if (!toolCallStates.has(event.item.id)) {
toolCallStates.set(event.item.id, {
id: event.item.id,
call_id: event.item.call_id,
type: 'function',
function: {
name: event.item.name || '',
arguments: '',
},
});
}
else {
console.warn(`Received output_item.added for already tracked function call ID: ${event.item.id}`);
}
}
}
else if (event.type === 'response.output_item.done' && event.item) {
if (event.item.type === 'reasoning' && !event.item.summary.length) {
yield {
type: 'message_complete',
content: '',
message_id: event.item.id + '-0',
thinking_content: '',
};
}
}
else if (event.type === 'response.content_part.added' && event.part) {
}
else if (event.type === 'response.content_part.done' && event.part) {
}
else if (event.type === 'response.output_text.delta' && event.delta) {
const itemId = event.item_id;
let position = messagePositions.get(itemId) ?? 0;
for (const ev of (0, delta_buffer_js_1.bufferDelta)(deltaBuffers, itemId, event.delta, content => ({
type: 'message_delta',
content,
message_id: itemId,
order: position++,
}))) {
yield ev;
}
messagePositions.set(itemId, position);
}
else if (event.type === 'response.output_text.annotation.added' &&
event.annotation) {
const eventData = event;
if (eventData.annotation?.type === 'url_citation' && eventData.annotation.url) {
const marker = (0, citation_tracker_js_1.formatCitation)(citationTracker, {
title: eventData.annotation.title || eventData.annotation.url,
url: eventData.annotation.url,
});
let position = messagePositions.get(eventData.item_id) ?? 0;
yield {
type: 'message_delta',
content: marker,
message_id: eventData.item_id,
order: position++,
};
messagePositions.set(eventData.item_id, position);
}
else {
}
}
else if (event.type === 'response.output_text.done' && event.text !== undefined) {
const itemId = event.item_id;
let finalText = event.text;
if (citationTracker.citations.size > 0) {
const footnotes = (0, citation_tracker_js_1.generateFootnotes)(citationTracker);
finalText += footnotes;
}
yield {
type: 'message_complete',
content: finalText,
message_id: itemId,
};
messagePositions.delete(itemId);
}
else if (event.type === 'response.refusal.delta' && event.delta) {
}
else if (event.type === 'response.refusal.done' && event.refusal) {
(0, llm_logger_js_1.log_llm_error)(requestId, 'OpenAI refusal error: ' + event.refusal);
console.error(`Refusal for item ${event.item_id}: ${event.refusal}`);
yield {
type: 'error',
error: 'OpenAI refusal error: ' + event.refusal,
};
}
else if (event.type === 'response.function_call_arguments.delta' && event.delta) {
const currentCall = toolCallStates.get(event.item_id);
if (currentCall) {
currentCall.function.arguments += event.delta;
}
else {
console.warn(`Received function_call_arguments.delta for unknown item_id: ${event.item_id}`);
}
}
else if (event.type === 'response.function_call_arguments.done' &&
event.arguments !== undefined) {
const currentCall = toolCallStates.get(event.item_id);
if (currentCall) {
currentCall.function.arguments = event.arguments;
yield {
type: 'tool_start',
tool_call: currentCall,
};
toolCallStates.delete(event.item_id);
}
else {
console.warn(`Received function_call_arguments.done for unknown or already yielded item_id: ${event.item_id}`);
}
}
else if (event.type === 'response.file_search_call.in_progress') {
}
else if (event.type === 'response.file_search_call.searching') {
}
else if (event.type === 'response.file_search_call.completed') {
}
else if (event.type === 'response.web_search_call.in_progress') {
}
else if (event.type === 'response.web_search_call.searching') {
}
else if (event.type === 'response.web_search_call.completed') {
}
else if (event.type === 'response.reasoning_summary_part.added') {
}
else if (event.type === 'response.reasoning_summary_part.done') {
}
else if (event.type === 'response.reasoning_summary_text.delta' && event.delta) {
const itemId = event.item_id + '-' + event.summary_index;
let position = reasoningPositions.get(itemId) ?? 0;
reasoningAggregates.set(itemId, reasoningAggregates.get(itemId) + event.delta);
yield {
type: 'message_delta',
content: '',
message_id: itemId,
thinking_content: event.delta,
order: position++,
};
reasoningPositions.set(itemId, position);
}
else if (event.type === 'response.reasoning_summary_text.done' && event.text !== undefined) {
const itemId = event.item_id + '-' + event.summary_index;
const aggregatedThinking = event.text;
yield {
type: 'message_complete',
content: '',
message_id: itemId,
thinking_content: aggregatedThinking,
};
reasoningPositions.delete(itemId);
reasoningAggregates.delete(itemId);
}
else if (event.type === 'error' && event.message) {
(0, llm_logger_js_1.log_llm_error)(requestId, event);
console.error(`API Stream Error (${model}): [${event.code || 'N/A'}] ${event.message}`);
yield {
type: 'error',
error: `OpenAI API error (${model}): [${event.code || 'N/A'}] ${event.message}`,
};
}
}
}
catch (streamError) {
(0, llm_logger_js_1.log_llm_error)(requestId, streamError);
console.error('Error processing response stream:', streamError);
yield {
type: 'error',
error: `OpenAI stream request error (${model}): ${streamError}`,
};
}
finally {
if (toolCallStates.size > 0) {
console.warn(`Stream ended with ${toolCallStates.size} incomplete tool call(s).`);
for (const [, toolCall] of toolCallStates.entries()) {
if (toolCall.function.name) {
yield {
type: 'tool_start',
tool_call: toolCall,
};
}
}
toolCallStates.clear();
}
for (const ev of (0, delta_buffer_js_1.flushBufferedDeltas)(deltaBuffers, (id, content) => {
let position = messagePositions.get(id) ?? 0;
position++;
messagePositions.set(id, position);
return {
type: 'message_delta',
content,
message_id: id,
order: position,
};
})) {
yield ev;
}
messagePositions.clear();
(0, llm_logger_js_1.log_llm_response)(requestId, events);
}
}
catch (error) {
(0, llm_logger_js_1.log_llm_error)(requestId, error);
console.error('Error in OpenAI streaming response:', error);
yield {
type: 'error',
error: 'OpenAI streaming error: ' + (error instanceof Error ? error.stack : String(error)),
};
}
}
async *createTranscription(audio, agent, model, opts) {
const transcriptionModels = ['gpt-4o-transcribe', 'gpt-4o-mini-transcribe', 'whisper-1'];
if (!transcriptionModels.includes(model)) {
throw new Error(`Model ${model} does not support transcription. Supported models: ${transcriptionModels.join(', ')}`);
}
let ws = null;
let isConnected = false;
let connectionError = null;
try {
const { WebSocket } = await Promise.resolve().then(() => __importStar(require('ws')));
const apiKey = this.apiKey || process.env.OPENAI_API_KEY;
if (!apiKey) {
throw new Error('Failed to initialize OpenAI transcription. Make sure OPENAI_API_KEY is set.');
}
const wsUrl = 'wss://api.openai.com/v1/realtime?intent=transcription';
ws = new WebSocket(wsUrl, {
headers: {
Authorization: 'Bearer ' + apiKey,
'OpenAI-Beta': 'realtime=v1',
},
});
const transcriptEvents = [];
const connectionPromise = new Promise((resolve, reject) => {
const timeout = setTimeout(() => {
reject(new Error('Connection timeout'));
}, 10000);
ws.on('open', () => {
clearTimeout(timeout);
isConnected = true;
resolve();
});
ws.on('error', error => {
clearTimeout(timeout);
connectionError = error;
reject(error);
});
});
ws.on('message', (data) => {
try {
const event = JSON.parse(data.toString());
console.dir(event, { depth: null });
switch (event.type) {
case 'transcription_session.created':
case 'session.created': {
const sessionUpdate = {
type: 'transcription_session.update',
session: {
input_audio_format: opts?.audioFormat?.encoding === 'pcm' ? 'pcm16' : 'pcm16',
input_audio_transcription: {
model: model,
prompt: opts?.prompt || 'You are a helpful assistant.',
language: opts?.language || 'en',
},
turn_detection: opts?.vad === false
? null
: {
type: 'semantic_vad',
},
input_audio_noise_reduction: opts?.noiseReduction === null
? null
: {
type: opts?.noiseReduction || 'far_field',
},
},
};
ws.send(JSON.stringify(sessionUpdate));
break;
}
case 'conversation.item.input_audio_transcription.delta': {
if (model !== 'whisper-1') {
const deltaEvent = {
type: 'transcription_turn_delta',
timestamp: new Date().toISOString(),
delta: event.delta,
partial: true,
};
transcriptEvents.push(deltaEvent);
}
break;
}
case 'conversation.item.input_audio_transcription.completed': {
const completeText = event.transcript;
const turnEvent = {
type: 'transcription_turn_complete',
timestamp: new Date().toISOString(),
text: completeText,
};
transcriptEvents.push(turnEvent);
break;
}
case 'input_audio_buffer.speech_started': {
const previewEvent = {
type: 'transcription_turn_start',
timestamp: new Date().toISOString(),
};
transcriptEvents.push(previewEvent);
break;
}
case 'input_audio_buffer.speech_stopped': {
break;
}
case 'error': {
const errorEvent = {
type: 'error',
timestamp: new Date().toISOString(),
error: event.error?.message || 'Unknown error',
};
transcriptEvents.push(errorEvent);
break;
}
}
}
catch (error) {
console.error('[OpenAI] Error processing message:', error);
}
});
ws.on('close', () => {
isConnected = false;
});
await connectionPromise;
const audioStream = normalizeAudioSource(audio);
const reader = audioStream.getReader();
try {
while (true) {
const { done, value } = await reader.read();
if (done)
break;
if (value && ws && isConnected) {
const audioEvent = {
type: 'input_audio_buffer.append',
audio: Buffer.from(value).toString('base64'),
};
ws.send(JSON.stringify(audioEvent));
}
if (transcriptEvents.length > 0) {
const events = transcriptEvents.splice(0, transcriptEvents.length);
for (const event of events) {
yield event;
}
}
if (connectionError) {
throw connectionError;
}
}
if (opts?.vad === false && ws && isConnected) {
ws.send(JSON.stringify({ type: 'input_audio_buffer.commit' }));
}
await new Promise(resolve => setTimeout(resolve, 1000));
if (transcriptEvents.length > 0) {
const events = transcriptEvents.splice(0, transcriptEvents.length);
for (const event of events) {
yield event;
}
}
const completeEvent = {
type: 'transcription_complete',
timestamp: new Date().toISOString(),
};
yield completeEvent;
}
finally {
reader.releaseLock();
if (ws && ws.readyState === ws.OPEN) {
ws.close();
}
}
}
catch (error) {
console.error('[OpenAI] Transcription error:', error);
const errorEvent = {
type: 'error',
timestamp: new Date().toISOString(),
error: error instanceof Error ? error.message : 'Transcription failed',
};
yield errorEvent;
}
}
}