UNPKG

@openai/agents-realtime

Version:

The OpenAI Agents SDK is a lightweight yet powerful framework for building multi-agent workflows. This package contains the logic for building realtime voice agents on the server or in the browser.

265 lines 11 kB
"use strict"; /// <reference lib="dom" /> var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.OpenAIRealtimeWebRTC = void 0; const _shims_1 = require("@openai/agents-core/_shims"); const agents_core_1 = require("@openai/agents-core"); const logger_1 = __importDefault(require("./logger.js")); const openaiRealtimeBase_1 = require("./openaiRealtimeBase.js"); const openaiRealtimeEvents_1 = require("./openaiRealtimeEvents.js"); const utils_1 = require("./utils.js"); /** * Transport layer that's handling the connection between the client and OpenAI's Realtime API * via WebRTC. While this transport layer is designed to be used within a RealtimeSession, it can * also be used standalone if you want to have a direct connection to the Realtime API. * * Unless you specify a `mediaStream` or `audioElement` option, the transport layer will * automatically configure the microphone and audio output to be used by the session. */ class OpenAIRealtimeWebRTC extends openaiRealtimeBase_1.OpenAIRealtimeBase { options; #url; #state = { status: 'disconnected', peerConnection: undefined, dataChannel: undefined, callId: undefined, }; #useInsecureApiKey; #ongoingResponse = false; #muted = false; constructor(options = {}) { if (typeof RTCPeerConnection === 'undefined') { throw new Error('WebRTC is not supported in this environment'); } super(options); this.options = options; this.#url = options.baseUrl ?? `https://api.openai.com/v1/realtime/calls`; this.#useInsecureApiKey = options.useInsecureApiKey ?? false; } /** * The current call ID of the WebRTC connection. */ get callId() { return this.#state.callId; } /** * The current status of the WebRTC connection. */ get status() { return this.#state.status; } /** * The current connection state of the WebRTC connection including the peer connection and data * channel. */ get connectionState() { return this.#state; } /** * Whether the session is muted. */ get muted() { return this.#muted; } /** * Connect to the Realtime API. This will establish the connection to the OpenAI Realtime API * via WebRTC. * * If you are using a browser, the transport layer will also automatically configure the * microphone and audio output to be used by the session. * * @param options - The options for the connection. */ async connect(options) { if (this.#state.status === 'connected') { return; } if (this.#state.status === 'connecting') { logger_1.default.warn('Realtime connection already in progress. Please await original promise'); } const model = options.model ?? this.currentModel; this.currentModel = model; const baseUrl = options.url ?? this.#url; const apiKey = await this._getApiKey(options); const isClientKey = typeof apiKey === 'string' && apiKey.startsWith('ek_'); if ((0, _shims_1.isBrowserEnvironment)() && !this.#useInsecureApiKey && !isClientKey) { throw new agents_core_1.UserError('Using the WebRTC connection in a browser environment requires an ephemeral client key. If you need to use a regular API key, use the WebSocket transport or set the `useInsecureApiKey` option to true.'); } // eslint-disable-next-line no-async-promise-executor return new Promise(async (resolve, reject) => { try { const userSessionConfig = { ...(options.initialSessionConfig || {}), model: this.currentModel, }; const connectionUrl = new URL(baseUrl); let peerConnection = new RTCPeerConnection(); const dataChannel = peerConnection.createDataChannel('oai-events'); let callId = undefined; this.#state = { status: 'connecting', peerConnection, dataChannel, callId, }; this.emit('connection_change', this.#state.status); dataChannel.addEventListener('open', () => { this.#state = { status: 'connected', peerConnection, dataChannel, callId, }; // Sending the session config again here once the channel is connected to ensure // that the session config is sent to the server before the first response is received // Setting it on connection should work but the config is not being validated on the // server. This triggers a validation error if the config is not valid. this.updateSessionConfig(userSessionConfig); this.emit('connection_change', this.#state.status); this._onOpen(); resolve(); }); dataChannel.addEventListener('error', (event) => { this.close(); this._onError(event); reject(event); }); dataChannel.addEventListener('message', (event) => { this._onMessage(event); const { data: parsed, isGeneric } = (0, openaiRealtimeEvents_1.parseRealtimeEvent)(event); if (!parsed || isGeneric) { return; } if (parsed.type === 'response.created') { this.#ongoingResponse = true; } else if (parsed.type === 'response.done') { this.#ongoingResponse = false; } if (parsed.type === 'session.created') { this._tracingConfig = parsed.session.tracing; // Trying to turn on tracing after the session is created this._updateTracingConfig(userSessionConfig.tracing ?? 'auto'); } }); // set up audio playback const audioElement = this.options.audioElement ?? document.createElement('audio'); audioElement.autoplay = true; peerConnection.ontrack = (event) => { audioElement.srcObject = event.streams[0]; }; // get microphone stream const stream = this.options.mediaStream ?? (await navigator.mediaDevices.getUserMedia({ audio: true, })); peerConnection.addTrack(stream.getAudioTracks()[0]); if (this.options.changePeerConnection) { peerConnection = await this.options.changePeerConnection(peerConnection); this.#state = { ...this.#state, peerConnection }; } const offer = await peerConnection.createOffer(); await peerConnection.setLocalDescription(offer); if (!offer.sdp) { throw new Error('Failed to create offer'); } const sdpResponse = await fetch(connectionUrl, { method: 'POST', body: offer.sdp, headers: { 'Content-Type': 'application/sdp', Authorization: `Bearer ${apiKey}`, 'X-OpenAI-Agents-SDK': utils_1.HEADERS['X-OpenAI-Agents-SDK'], }, }); callId = sdpResponse.headers?.get('Location')?.split('/').pop(); this.#state = { ...this.#state, callId }; const answer = { type: 'answer', sdp: await sdpResponse.text(), }; await peerConnection.setRemoteDescription(answer); } catch (error) { this.close(); this._onError(error); reject(error); } }); } /** * Send an event to the Realtime API. This will stringify the event and send it directly to the * API. This can be used if you want to take control over the connection and send events manually. * * @param event - The event to send. */ sendEvent(event) { if (!this.#state.dataChannel || this.#state.dataChannel.readyState !== 'open') { throw new Error('WebRTC data channel is not connected. Make sure you call `connect()` before sending events.'); } this.#state.dataChannel.send(JSON.stringify(event)); } /** * Mute or unmute the session. * @param muted - Whether to mute the session. */ mute(muted) { this.#muted = muted; if (this.#state.peerConnection) { const peerConnection = this.#state.peerConnection; peerConnection.getSenders().forEach((sender) => { if (sender.track) { sender.track.enabled = !muted; } }); } } /** * Close the connection to the Realtime API and disconnects the underlying WebRTC connection. */ close() { if (this.#state.dataChannel) { this.#state.dataChannel.close(); } if (this.#state.peerConnection) { const peerConnection = this.#state.peerConnection; peerConnection.getSenders().forEach((sender) => { sender.track?.stop(); }); peerConnection.close(); } if (this.#state.status !== 'disconnected') { this.#state = { status: 'disconnected', peerConnection: undefined, dataChannel: undefined, callId: undefined, }; this.emit('connection_change', this.#state.status); this._onClose(); } } /** * Interrupt the current response if one is ongoing and clear the audio buffer so that the agent * stops talking. */ interrupt() { if (this.#ongoingResponse) { this.sendEvent({ type: 'response.cancel', }); this.#ongoingResponse = false; } this.sendEvent({ type: 'output_audio_buffer.clear', }); } } exports.OpenAIRealtimeWebRTC = OpenAIRealtimeWebRTC; //# sourceMappingURL=openaiRealtimeWebRtc.js.map