@maximai/maxim-js
Version:
Maxim AI JS SDK. Visit https://getmaxim.ai for more info.
1,073 lines • 47 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.MaximOpenAIRealtimeWrapper = void 0;
exports.wrapOpenAIRealtime = wrapOpenAIRealtime;
const uuid_1 = require("uuid");
const containers_1 = require("../../models/containers");
const utils_1 = require("./realtime/utils");
const queue_1 = require("./realtime/queue");
class MaximOpenAIRealtimeWrapper {
constructor(realtimeClient, logger, headers) {
this.realtimeClient = realtimeClient;
this.logger = logger;
this.boundHandlers = new Map();
this.containerManager = new containers_1.ContainerManager();
this.eventQueue = new queue_1.AsyncQueue();
this.originalSend = null;
this.modelParametersToIgnore = [];
// Parse headers
const sessionId = (headers === null || headers === void 0 ? void 0 : headers["maxim-session-id"]) || (0, uuid_1.v4)();
const generationName = headers === null || headers === void 0 ? void 0 : headers["maxim-generation-name"];
const sessionName = headers === null || headers === void 0 ? void 0 : headers["maxim-session-name"];
let sessionTags;
if (headers === null || headers === void 0 ? void 0 : headers["maxim-session-tags"]) {
const tagsRaw = headers["maxim-session-tags"];
if (typeof tagsRaw === "object") {
sessionTags = tagsRaw;
}
else if (typeof tagsRaw === "string") {
try {
sessionTags = JSON.parse(tagsRaw);
}
catch {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Failed to parse maxim-session-tags as JSON: ${tagsRaw}`);
}
}
}
// Initialize state
this.state = {
sessionId,
sessionName,
sessionTags,
generationName,
isLocalSession: !(headers === null || headers === void 0 ? void 0 : headers["maxim-session-id"]),
session: null,
currentTraceId: null,
currentGenerationId: null,
sttGenerationId: null,
llmGenerationId: null,
currentGenerationType: null,
sessionModel: null,
sessionConfig: null,
systemInstructions: null,
transcriptionModel: null,
transcriptionLanguage: null,
lastUserMessage: null,
outputAudio: null,
currentModelParameters: null,
toolsConfig: new Map(),
functionCallArguments: new Map(),
toolCallIds: new Set(),
toolCallOutputs: new Map(),
pendingToolCallOutputs: new Map(),
hasPendingToolCalls: false,
isContinuingTrace: false,
userAudioBuffer: new Map(),
pendingUserAudio: Buffer.alloc(0),
currentItemId: null,
// Audio input mode flag
isAudioInput: false,
// Flag for deferred trace finalization
pendingTraceFinalization: false,
};
this.modelParametersToIgnore = ["model", "instructions", "expires_at", "include", "object", "type", "id", "audio"];
// Attach event listeners
this.attachEventListeners();
// Wrap send method to intercept client events
this.wrapSendMethod();
}
/**
* Attach event listeners to the realtime client.
*/
attachEventListeners() {
const events = [
"session.created",
"session.updated",
"conversation.item.added", // Server sends this when audio buffer is committed (user audio input)
"conversation.item.created", // Server sends this in response to client's conversation.item.create
"conversation.item.deleted",
"response.created",
"response.function_call_arguments.delta",
"response.function_call_arguments.done",
"response.output_audio.delta",
"response.output_audio.done",
"response.done",
"conversation.item.input_audio_transcription.completed",
"error",
];
for (const eventType of events) {
const handler = (event) => this.handleEvent(eventType, event);
this.boundHandlers.set(eventType, handler);
this.realtimeClient.on(eventType, handler);
}
}
/**
* Wrap the send method to intercept client events like input_audio_buffer.append.
*/
wrapSendMethod() {
const originalSend = this.realtimeClient.send.bind(this.realtimeClient);
if (!originalSend)
return;
this.originalSend = originalSend;
this.realtimeClient.send = (event) => {
this.handleClientEvent(event);
return originalSend(event);
};
}
/**
* Handle client events being sent to the server.
* Events are queued and processed sequentially to prevent race conditions.
*/
handleClientEvent(event) {
// Queue the client event handler to ensure sequential processing
this.eventQueue.enqueue(async () => {
try {
switch (event.type) {
case "input_audio_buffer.append":
this.handleInputAudioBufferAppend(event);
break;
default:
break;
// Add other client events as needed
}
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error handling client event ${event.type}: ${e}`);
}
});
}
/**
* Handle input_audio_buffer.append events to capture user audio.
*/
handleInputAudioBufferAppend(event) {
try {
const audioBytes = Buffer.from(event.audio, "base64");
this.state.pendingUserAudio = Buffer.concat([this.state.pendingUserAudio, audioBytes]);
// Mark that this is an audio input conversation
if (!this.state.isAudioInput) {
this.state.isAudioInput = true;
}
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error capturing user audio: ${e}`);
}
}
/**
* Handle realtime events and log to Maxim.
* Events are queued and processed sequentially to prevent race conditions.
*/
handleEvent(eventType, event) {
// Queue the event handler to ensure sequential processing
this.eventQueue.enqueue(async () => {
try {
switch (eventType) {
case "session.created":
this.handleSessionCreated(event);
break;
case "session.updated":
this.handleSessionUpdated(event);
break;
case "conversation.item.added":
this.handleConversationItemAdded(event);
break;
case "conversation.item.deleted":
this.handleConversationItemDeleted();
break;
case "response.created":
this.handleResponseCreated(event);
break;
case "response.function_call_arguments.delta":
this.handleFunctionCallArgumentsDelta(event);
break;
case "response.function_call_arguments.done":
this.handleFunctionCallArgumentsDone(event);
break;
case "response.output_audio.delta":
this.handleResponseOutputAudioDelta(event);
break;
case "response.done":
this.handleResponseDone(event);
break;
case "conversation.item.input_audio_transcription.completed":
this.handleInputAudioTranscriptionCompleted(event);
break;
case "error":
this.handleError(event);
break;
}
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error handling event ${eventType}: ${e}`);
}
});
}
/**
* Get or create the session container.
*/
getOrCreateSession() {
var _a;
if (!this.state.session) {
const tags = { ...((_a = this.state.sessionTags) !== null && _a !== void 0 ? _a : {}) };
this.state.session = this.logger.session({
id: this.state.sessionId,
name: this.state.sessionName || "OpenAI Realtime Session",
tags,
});
}
return this.state.session;
}
/**
* Get the current trace container.
*/
getCurrentTraceContainer() {
if (!this.state.currentTraceId)
return undefined;
const container = this.containerManager.getContainer(this.state.currentTraceId);
return container instanceof containers_1.TraceContainer ? container : undefined;
}
/**
* Create a new trace for an interaction using ContainerManager.
*/
createTrace(traceId) {
const session = this.getOrCreateSession();
const traceContainer = new containers_1.TraceContainer(this.containerManager, this.logger, traceId, "Realtime Interaction", undefined, false);
traceContainer.create({}, session.id);
return traceContainer;
}
/**
* Finalize the trace and clean up state.
* This handles ending the trace, session, and resetting all relevant state.
*/
finalizeTrace(traceContainer) {
// End trace
if (traceContainer && this.state.currentTraceId) {
try {
traceContainer.end();
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error ending trace: ${e}`);
}
}
// End session (update timestamp)
if (this.state.session) {
try {
this.state.session.end();
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error ending session: ${e}`);
}
}
// Cleanup state
this.state.currentGenerationId = null;
this.state.llmGenerationId = null;
this.state.currentGenerationType = null;
this.state.currentTraceId = null;
this.state.hasPendingToolCalls = false;
this.state.isContinuingTrace = false;
this.state.toolCallIds.clear();
this.state.toolCallOutputs.clear();
this.state.lastUserMessage = null;
// Clear audio state
this.state.isAudioInput = false;
this.state.pendingUserAudio = Buffer.alloc(0);
this.state.userAudioBuffer.clear();
this.state.currentItemId = null;
this.state.outputAudio = null;
// Clear finalization flag
this.state.pendingTraceFinalization = false;
}
/**
* Handle session.created event.
*/
handleSessionCreated(event) {
try {
const session = event.session;
this.state.systemInstructions = (session === null || session === void 0 ? void 0 : session.instructions) || null;
this.state.sessionModel = (session === null || session === void 0 ? void 0 : session.model) || null;
if (session) {
this.state.sessionConfig = { ...session };
}
// Create session container
this.getOrCreateSession();
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error handling session.created: ${e}`);
}
}
/**
* Handle session.updated event.
*/
handleSessionUpdated(event) {
var _a, _b, _c;
try {
const session = event.session;
if (session) {
// Update session config
if (!this.state.sessionConfig) {
this.state.sessionConfig = { ...session };
}
else {
for (const [key, value] of Object.entries(session)) {
if (key === "id") {
(_a = this.state.session) === null || _a === void 0 ? void 0 : _a.addTag("sess_id", value);
}
if (key !== "id" && value !== null && value !== undefined) {
this.state.sessionConfig[key] = value;
}
}
}
const tools = session === null || session === void 0 ? void 0 : session.tools;
if (tools && Array.isArray(tools)) {
for (const tool of tools) {
if (tool.type === "function") {
if (tool.name) {
this.state.toolsConfig.set(tool.name, {
name: tool.name,
description: tool.description || `Function: ${tool.name}`,
});
}
}
}
}
const transcription = (_c = (_b = session.audio) === null || _b === void 0 ? void 0 : _b.input) === null || _c === void 0 ? void 0 : _c.transcription;
// Extract transcription settings
if (transcription) {
this.state.transcriptionModel = transcription.model || null;
this.state.transcriptionLanguage = transcription.language || null;
}
}
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error handling session.updated: ${e}`);
}
}
/**
* Handle conversation.item.added event.
* - conversation.item.added: Server sends when audio buffer is committed (user speaks)
* // Using this event instead of conversation.item.created because this event gets called once after the message is committed,
*/
handleConversationItemAdded(event) {
var _a, _b;
try {
const item = event.item;
if (!item)
return;
if (item.type === "message") {
const role = item.role;
if (role !== "user")
return;
// Check if it's audio input
if (((_a = item.content) === null || _a === void 0 ? void 0 : _a.length) > 0 && ((_b = item.content[0]) === null || _b === void 0 ? void 0 : _b.type) === "input_audio") {
const itemId = item.id;
this.state.currentItemId = itemId !== null && itemId !== void 0 ? itemId : null;
// Mark that this is audio input
this.state.isAudioInput = true;
if (this.state.pendingUserAudio.length > 0 && itemId) {
this.state.userAudioBuffer.set(itemId, this.state.pendingUserAudio);
this.state.pendingUserAudio = Buffer.alloc(0);
}
return;
}
// Extract text message
this.state.lastUserMessage = (0, utils_1.extractMessageContent)(item);
}
else if (item.type === "function_call_output") {
const callId = item.call_id;
const output = item.output;
if (callId && output !== undefined) {
const outputStr = typeof output === "string" ? output : String(output);
this.state.toolCallOutputs.set(callId, outputStr);
// Use logger directly with tool call ID (container pattern)
if (this.state.toolCallIds.has(callId)) {
try {
this.logger.toolCallResult(callId, outputStr);
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error setting tool call result: ${e}`);
}
}
else {
try {
this.logger.toolCallResult(callId, outputStr);
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error setting tool call result via logger: ${e}`);
}
}
}
}
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error handling conversation.item.created: ${e}`);
}
}
/**
* Handle conversation.item.deleted event.
*/
handleConversationItemDeleted() {
// Clear stale user message reference
this.state.lastUserMessage = null;
}
/**
* Handle response.created event.
*/
handleResponseCreated(event) {
var _a, _b;
try {
// Ensure session exists
this.getOrCreateSession();
// Reset output audio buffer for the new response
this.state.outputAudio = null;
// Get current trace container
let traceContainer = this.getCurrentTraceContainer();
// Check if we should continue an existing trace (after tool calls)
if (traceContainer && this.state.hasPendingToolCalls) {
this.state.hasPendingToolCalls = false;
this.state.isContinuingTrace = true;
}
else {
// End previous trace if exists
if (traceContainer) {
try {
traceContainer.end();
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error ending previous trace: ${e}`);
}
}
// Create new trace using ContainerManager pattern
const traceId = (0, uuid_1.v4)();
traceContainer = this.createTrace(traceId);
this.state.currentTraceId = traceId;
this.state.isContinuingTrace = false;
// Clear generation IDs for new trace
this.state.sttGenerationId = null;
this.state.llmGenerationId = null;
}
// Extract model parameters from session config
const modelParameters = {};
if (this.state.sessionConfig) {
for (const [key, value] of Object.entries(this.state.sessionConfig)) {
if (!this.modelParametersToIgnore.includes(key) && value !== undefined && value !== null) {
modelParameters[key] = value;
}
if (key === "output_modalities" && value !== undefined && value !== null) {
if (Array.isArray(value) && value.length > 0) {
modelParameters["output_modalities"] = value[0];
}
}
if (key === "audio") {
modelParameters["maxim-audio-model-parameters"] = value;
}
}
}
this.state.currentModelParameters = modelParameters;
// For audio input on a NEW trace (not continuation), create 2 generations:
// 1. STT generation (empty, will be updated when transcription arrives)
// 2. LLM generation (with system message, model params)
if (this.state.isAudioInput && !this.state.isContinuingTrace) {
// Create STT generation (empty - will be updated by transcription handler)
const sttGenerationId = (0, uuid_1.v4)();
try {
traceContainer.addGeneration({
id: sttGenerationId,
model: this.state.transcriptionModel || "whisper-1",
provider: "openai",
name: "User Speech Transcription",
modelParameters: this.state.transcriptionLanguage ? { language: this.state.transcriptionLanguage } : {},
messages: [], // Empty - will be updated when transcription arrives
});
this.state.sttGenerationId = sttGenerationId;
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error creating STT generation: ${e}`);
}
// Create LLM generation (with system message)
const llmGenerationId = (0, uuid_1.v4)();
const llmMessages = [];
// Add system message if instructions exist
if ((_a = this.state.sessionConfig) === null || _a === void 0 ? void 0 : _a["instructions"]) {
llmMessages.push({
role: "system",
content: this.state.sessionConfig["instructions"],
});
}
try {
traceContainer.addGeneration({
id: llmGenerationId,
model: this.state.sessionModel || "unknown",
provider: "openai",
name: this.state.generationName,
modelParameters,
messages: llmMessages,
});
this.state.currentGenerationId = llmGenerationId;
this.state.llmGenerationId = llmGenerationId; // Store for transcription handler
this.state.currentGenerationType = "llm";
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error creating LLM generation: ${e}`);
}
}
else {
// Non-audio input or continuation - create single generation
const generationId = (0, uuid_1.v4)();
const messages = [];
// Add system message if instructions exist
if ((_b = this.state.sessionConfig) === null || _b === void 0 ? void 0 : _b["instructions"]) {
messages.push({
role: "system",
content: this.state.sessionConfig["instructions"],
});
}
// Handle continuation (after tool calls)
if (this.state.isContinuingTrace) {
// Add tool outputs
if (this.state.toolCallOutputs.size > 0) {
for (const [callId, output] of this.state.toolCallOutputs.entries()) {
messages.push({
role: "tool",
content: output,
tool_call_id: callId,
});
}
this.state.toolCallOutputs.clear();
}
// Clear lastUserMessage to prevent it from appearing in continuation
this.state.lastUserMessage = null;
}
else if (this.state.lastUserMessage) {
// Text input - add user message
messages.push({
role: "user",
content: this.state.lastUserMessage,
});
// Set trace input for new trace
if (traceContainer) {
try {
traceContainer.setInput(this.state.lastUserMessage);
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error setting trace input: ${e}`);
}
}
this.state.lastUserMessage = null;
}
try {
traceContainer.addGeneration({
id: generationId,
model: this.state.sessionModel || "unknown",
provider: "openai",
name: this.state.generationName,
modelParameters,
messages,
});
this.state.currentGenerationId = generationId;
this.state.currentGenerationType = "llm";
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error creating generation: ${e}`);
}
}
this.state.isContinuingTrace = false;
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error handling response.created: ${e}`);
}
}
/**
* Handle response.output_audio.delta event.
*/
handleResponseOutputAudioDelta(event) {
try {
if (event.delta) {
const audioChunk = Buffer.from(event.delta, "base64");
if (!this.state.outputAudio) {
this.state.outputAudio = audioChunk;
}
else {
this.state.outputAudio = Buffer.concat([this.state.outputAudio, audioChunk]);
}
}
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error handling response.output_audio.delta: ${e}`);
}
}
/**
* Handle response.function_call_arguments.delta event.
*/
handleFunctionCallArgumentsDelta(event) {
try {
const callId = event.item_id;
const delta = event.delta;
if (callId && delta) {
const existing = this.state.functionCallArguments.get(callId) || "";
this.state.functionCallArguments.set(callId, existing + delta);
}
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error handling function call arguments delta: ${e}`);
}
}
/**
* Handle response.function_call_arguments.done event.
*/
handleFunctionCallArgumentsDone(event) {
try {
// Use call_id to match with function_call_output (not item_id)
const callId = event.call_id;
const itemId = event.item_id;
// Note: The realtime event type doesn't include function name in its type definition,
// but at runtime it may be present. We'll try to access it and fall back to other methods.
let functionName = event.name;
const finalArguments = event.arguments || this.state.functionCallArguments.get(itemId) || "";
// Try to get function name from arguments if not provided
if (!functionName) {
try {
const argsDict = JSON.parse(finalArguments);
if (argsDict.name) {
functionName = argsDict.name;
}
}
catch { }
}
// Fallback name
if (!functionName) {
functionName = callId ? `function_${callId.slice(0, 8)}` : "unknown";
}
// Get the tool description from config
const toolConfig = this.state.toolsConfig.get(functionName);
const description = (toolConfig === null || toolConfig === void 0 ? void 0 : toolConfig.description) || `Function: ${functionName}`;
// Create tool call using container pattern
const traceContainer = this.getCurrentTraceContainer();
if (traceContainer && callId) {
try {
traceContainer.addToolCall({
id: callId,
name: functionName,
description,
args: finalArguments,
});
this.state.toolCallIds.add(callId);
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error creating tool call: ${e}`);
}
}
// Cleanup - use itemId for the arguments map (which was keyed by item_id in delta handler)
if (itemId) {
this.state.functionCallArguments.delete(itemId);
}
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error handling function call arguments done: ${e}`);
}
}
/**
* Handle conversation.item.input_audio_transcription.completed event.
* This runs asynchronously - it updates existing generations without blocking the main flow.
*/
handleInputAudioTranscriptionCompleted(event) {
var _a;
try {
const transcript = event.transcript || "";
const itemId = event.item_id;
// Get user audio for this item
let userAudio = null;
if (itemId && this.state.userAudioBuffer.has(itemId)) {
userAudio = this.state.userAudioBuffer.get(itemId);
}
// Update the STT generation if it exists
if (this.state.sttGenerationId) {
const sttGenerationId = this.state.sttGenerationId;
// Add user transcript to the STT generation
this.logger.generationAddMessage(sttGenerationId, [{ role: "user", content: transcript }]);
// Attach user audio to STT generation
if (userAudio && userAudio.length > 0) {
try {
const wavBuffer = (0, utils_1.pcm16ToWav)(userAudio);
const attachment = {
type: "fileData",
id: (0, uuid_1.v4)(),
name: "User Audio Input",
data: wavBuffer,
mimeType: "audio/wav",
tags: { "attach-to": "input" },
};
this.logger.generationAddAttachment(sttGenerationId, attachment);
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error adding user audio to STT generation: ${e}`);
}
}
// Build usage from event
let usage;
if (((_a = event.usage) === null || _a === void 0 ? void 0 : _a.type) === "tokens") {
usage = {
prompt_tokens: event.usage.input_tokens,
completion_tokens: event.usage.output_tokens,
total_tokens: event.usage.total_tokens,
};
}
else {
usage = {
prompt_tokens: 0,
completion_tokens: 0,
total_tokens: 0,
};
}
// End the STT generation with result
this.logger.generationResult(sttGenerationId, {
id: event.event_id || (0, uuid_1.v4)(),
object: "stt.response",
created: Math.floor(Date.now() / 1000),
model: this.state.transcriptionModel || "whisper-1",
choices: [], // STT has no assistant response
usage: usage,
});
// Explicitly end the STT generation
this.logger.generationEnd(sttGenerationId);
}
// Update the LLM generation if it exists (add user message)
// Use llmGenerationId (the original LLM generation) instead of currentGenerationId
// because currentGenerationId may have changed during continuations
if (this.state.llmGenerationId) {
// Add user transcript to the LLM generation
this.logger.generationAddMessage(this.state.llmGenerationId, [{ role: "user", content: transcript }]);
// Also attach user audio to the LLM generation
if (userAudio && userAudio.length > 0) {
try {
const wavBuffer = (0, utils_1.pcm16ToWav)(userAudio);
const attachment = {
type: "fileData",
id: (0, uuid_1.v4)(),
name: "User Audio Input",
data: wavBuffer,
mimeType: "audio/wav",
tags: { "attach-to": "input" },
};
this.logger.generationAddAttachment(this.state.llmGenerationId, attachment);
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error adding user audio to LLM generation: ${e}`);
}
}
}
// Set trace input with the transcript
if (this.state.currentTraceId) {
try {
this.logger.traceInput(this.state.currentTraceId, transcript);
// Also attach user audio to the trace
if (userAudio && userAudio.length > 0) {
try {
const wavBuffer = (0, utils_1.pcm16ToWav)(userAudio);
const attachment = {
type: "fileData",
id: (0, uuid_1.v4)(),
name: "User Audio Input",
data: wavBuffer,
mimeType: "audio/wav",
tags: { "attach-to": "input" },
};
this.logger.traceAddAttachment(this.state.currentTraceId, attachment);
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error adding user audio to trace: ${e}`);
}
}
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error setting trace input: ${e}`);
}
}
// Cleanup audio buffer for this specific item (transcription-related state only)
if (itemId) {
this.state.userAudioBuffer.delete(itemId);
if (this.state.currentItemId === itemId) {
this.state.currentItemId = null;
}
}
// Clear sttGenerationId now that we've processed the transcription
this.state.sttGenerationId = null;
// If response.done was waiting for us, finalize the trace now
if (this.state.pendingTraceFinalization) {
const traceContainer = this.getCurrentTraceContainer();
this.finalizeTrace(traceContainer);
}
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error handling input audio transcription completed: ${e}`);
}
}
/**
* Handle response.done event.
* No longer waits for transcription - processes immediately.
* Transcription updates are handled asynchronously in handleInputAudioTranscriptionCompleted.
*/
handleResponseDone(event) {
try {
const response = event.response;
const responseId = (response === null || response === void 0 ? void 0 : response.id) || (0, uuid_1.v4)();
// Extract text and tool calls from output
let responseText = null;
const toolCalls = [];
const outputItems = (response === null || response === void 0 ? void 0 : response.output) || [];
for (const outputItem of outputItems) {
if ((outputItem === null || outputItem === void 0 ? void 0 : outputItem.type) === "message") {
// Extract text from message content
if (!responseText) {
responseText = (0, utils_1.extractOutputText)(outputItem);
}
}
else if ((outputItem === null || outputItem === void 0 ? void 0 : outputItem.type) === "function_call") {
const callId = outputItem.call_id || outputItem.id || (0, uuid_1.v4)();
const funcName = outputItem.name || "unknown";
let args = outputItem.arguments || "";
if (typeof args !== "string") {
args = JSON.stringify(args);
}
toolCalls.push({
id: callId,
type: "function",
function: {
name: funcName,
arguments: args,
},
});
}
}
// Extract usage
let usage;
if (response === null || response === void 0 ? void 0 : response.usage) {
usage = {
prompt_tokens: response.usage.input_tokens || 0,
completion_tokens: response.usage.output_tokens || 0,
total_tokens: response.usage.total_tokens || 0,
};
}
const choices = [];
choices.push({
index: 0,
message: {
role: "assistant",
content: responseText,
tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
},
logprobs: null,
finish_reason: toolCalls.length > 0 ? "tool_calls" : "stop",
});
// Build result
const result = {
id: responseId,
object: "realtime.response",
created: Math.floor(Date.now() / 1000),
model: this.state.sessionModel || "unknown",
choices,
usage: usage !== undefined ? usage : { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
};
// Log result using logger methods (container pattern)
if (this.state.currentGenerationId) {
try {
this.logger.generationResult(this.state.currentGenerationId, result);
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error logging generation result: ${e}`);
}
// Attach output audio to generation only (trace inherits from generation)
if (this.state.outputAudio && this.state.outputAudio.length > 0) {
try {
const wavBuffer = (0, utils_1.pcm16ToWav)(this.state.outputAudio);
const attachment = {
type: "fileData",
name: "Assistant Audio Response",
data: wavBuffer,
mimeType: "audio/wav",
tags: { "attach-to": "output" },
};
if (this.state.currentTraceId) {
this.logger.traceAddAttachment(this.state.currentTraceId, {
id: (0, uuid_1.v4)(),
...attachment,
});
}
this.logger.generationAddAttachment(this.state.currentGenerationId, {
id: (0, uuid_1.v4)(),
...attachment,
});
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error adding output audio attachment: ${e}`);
}
}
else {
}
// Always clear output audio after attempting attachment
this.state.outputAudio = null;
}
// Handle trace ending
const hasToolCalls = toolCalls.length > 0;
const traceContainer = this.getCurrentTraceContainer();
if (hasToolCalls) {
// Keep trace open for next response
this.state.hasPendingToolCalls = true;
// Clear lastUserMessage to prevent it from appearing in continuation generation
this.state.lastUserMessage = null;
}
else {
// Set trace output
if (traceContainer && this.state.currentTraceId && responseText) {
try {
this.logger.traceOutput(this.state.currentTraceId, responseText);
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error setting trace output: ${e}`);
}
}
// Check if we're still waiting for audio transcription
const waitingForTranscription = this.state.sttGenerationId !== null;
if (waitingForTranscription) {
// Mark that we need to finalize the trace after transcription completes
this.state.pendingTraceFinalization = true;
}
else {
// End trace immediately
this.finalizeTrace(traceContainer);
}
}
// Always clear function call tracking
this.state.functionCallArguments.clear();
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error processing response.done: ${e}`);
}
}
/**
* Handle error event.
*/
handleError(event) {
try {
const errorObj = event.error || event;
let errorMessage = String(errorObj);
if (errorObj === null || errorObj === void 0 ? void 0 : errorObj.message) {
errorMessage = String(errorObj.message);
}
// Use logger methods for error handling (container pattern)
if (this.state.currentGenerationId) {
try {
this.logger.generationError(this.state.currentGenerationId, {
message: errorMessage,
type: (errorObj === null || errorObj === void 0 ? void 0 : errorObj.type) || "RealtimeError",
});
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error logging generation error: ${e}`);
}
}
}
catch (e) {
console.warn(`[MaximSDK][MaximOpenAIRealtimeWrapper] Error handling error event: ${e}`);
}
}
/**
* Cleanup and detach event listeners.
* Call this when you're done with the wrapper.
* Returns a promise that resolves when the queue is drained.
*/
async cleanup() {
// Wait for any pending events in the queue to finish processing
await this.waitForQueueDrain();
// End any open trace using container pattern
const traceContainer = this.getCurrentTraceContainer();
if (traceContainer) {
try {
traceContainer.end();
}
catch {
// Ignore
}
}
if (this.state.isLocalSession && this.state.session) {
try {
this.state.session.end();
}
catch {
// Ignore
}
}
// Remove event listeners
for (const [eventType, handler] of this.boundHandlers) {
try {
this.realtimeClient.off(eventType, handler);
}
catch {
// Ignore - some clients may not support off()
}
}
this.boundHandlers.clear();
// Restore original send method
if (this.originalSend) {
this.realtimeClient.send = this.originalSend;
this.originalSend = null;
}
}
/**
* Wait for the event queue to drain.
* Useful for ensuring all pending events are processed before cleanup.
*/
async waitForQueueDrain() {
// If queue is idle, return immediately
if (this.eventQueue.isIdle) {
return;
}
// Otherwise, enqueue a no-op task and wait for it to complete
// This ensures all prior tasks have finished
return new Promise((resolve) => {
this.eventQueue.enqueue(async () => {
resolve();
});
});
}
/**
* Get the current session ID.
*/
get sessionId() {
return this.state.sessionId;
}
/**
* Get the underlying realtime client.
*/
get client() {
return this.realtimeClient;
}
}
exports.MaximOpenAIRealtimeWrapper = MaximOpenAIRealtimeWrapper;
/**
* Helper function to wrap an OpenAI Realtime client with Maxim logging.
*
* @param realtimeClient - The OpenAI Realtime client (OpenAIRealtimeWS or OpenAIRealtimeWebSocket)
* @param logger - The MaximLogger instance
* @param headers - Optional headers for session/generation metadata
* @returns A wrapped client that logs to Maxim
*
* @example
* ```typescript
* import { OpenAIRealtimeWS } from 'openai/realtime/ws';
* import { Maxim } from '@maximai/maxim-js';
* import { wrapOpenAIRealtime } from '@maximai/maxim-js/openai';
*
* const maxim = new Maxim({ apiKey: process.env.MAXIM_API_KEY });
* const logger = await maxim.logger({ id: 'my-app' });
*
* const rt = new OpenAIRealtimeWS({ model: 'gpt-4o-realtime-preview' });
* const wrapper = wrapOpenAIRealtime(rt, logger, {
* 'maxim-session-name': 'Voice Assistant Session'
* });
*
* // Use rt normally - all events are automatically logged
* ```
*/
function wrapOpenAIRealtime(realtimeClient, logger, headers) {
return new MaximOpenAIRealtimeWrapper(realtimeClient, logger, headers);
}
//# sourceMappingURL=realtime.js.map