@openai/agents-realtime
Version:
The OpenAI Agents SDK is a lightweight yet powerful framework for building multi-agent workflows. This package contains the logic for building realtime voice agents on the server or in the browser.
265 lines • 11 kB
JavaScript
"use strict";
/// <reference lib="dom" />
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.OpenAIRealtimeWebRTC = void 0;
const _shims_1 = require("@openai/agents-core/_shims");
const agents_core_1 = require("@openai/agents-core");
const logger_1 = __importDefault(require("./logger.js"));
const openaiRealtimeBase_1 = require("./openaiRealtimeBase.js");
const openaiRealtimeEvents_1 = require("./openaiRealtimeEvents.js");
const utils_1 = require("./utils.js");
/**
* Transport layer that's handling the connection between the client and OpenAI's Realtime API
* via WebRTC. While this transport layer is designed to be used within a RealtimeSession, it can
* also be used standalone if you want to have a direct connection to the Realtime API.
*
* Unless you specify a `mediaStream` or `audioElement` option, the transport layer will
* automatically configure the microphone and audio output to be used by the session.
*/
class OpenAIRealtimeWebRTC extends openaiRealtimeBase_1.OpenAIRealtimeBase {
options;
#url;
#state = {
status: 'disconnected',
peerConnection: undefined,
dataChannel: undefined,
callId: undefined,
};
#useInsecureApiKey;
#ongoingResponse = false;
#muted = false;
constructor(options = {}) {
if (typeof RTCPeerConnection === 'undefined') {
throw new Error('WebRTC is not supported in this environment');
}
super(options);
this.options = options;
this.#url = options.baseUrl ?? `https://api.openai.com/v1/realtime/calls`;
this.#useInsecureApiKey = options.useInsecureApiKey ?? false;
}
/**
* The current call ID of the WebRTC connection.
*/
get callId() {
return this.#state.callId;
}
/**
* The current status of the WebRTC connection.
*/
get status() {
return this.#state.status;
}
/**
* The current connection state of the WebRTC connection including the peer connection and data
* channel.
*/
get connectionState() {
return this.#state;
}
/**
* Whether the session is muted.
*/
get muted() {
return this.#muted;
}
/**
* Connect to the Realtime API. This will establish the connection to the OpenAI Realtime API
* via WebRTC.
*
* If you are using a browser, the transport layer will also automatically configure the
* microphone and audio output to be used by the session.
*
* @param options - The options for the connection.
*/
async connect(options) {
if (this.#state.status === 'connected') {
return;
}
if (this.#state.status === 'connecting') {
logger_1.default.warn('Realtime connection already in progress. Please await original promise');
}
const model = options.model ?? this.currentModel;
this.currentModel = model;
const baseUrl = options.url ?? this.#url;
const apiKey = await this._getApiKey(options);
const isClientKey = typeof apiKey === 'string' && apiKey.startsWith('ek_');
if ((0, _shims_1.isBrowserEnvironment)() && !this.#useInsecureApiKey && !isClientKey) {
throw new agents_core_1.UserError('Using the WebRTC connection in a browser environment requires an ephemeral client key. If you need to use a regular API key, use the WebSocket transport or set the `useInsecureApiKey` option to true.');
}
// eslint-disable-next-line no-async-promise-executor
return new Promise(async (resolve, reject) => {
try {
const userSessionConfig = {
...(options.initialSessionConfig || {}),
model: this.currentModel,
};
const connectionUrl = new URL(baseUrl);
let peerConnection = new RTCPeerConnection();
const dataChannel = peerConnection.createDataChannel('oai-events');
let callId = undefined;
this.#state = {
status: 'connecting',
peerConnection,
dataChannel,
callId,
};
this.emit('connection_change', this.#state.status);
dataChannel.addEventListener('open', () => {
this.#state = {
status: 'connected',
peerConnection,
dataChannel,
callId,
};
// Sending the session config again here once the channel is connected to ensure
// that the session config is sent to the server before the first response is received
// Setting it on connection should work but the config is not being validated on the
// server. This triggers a validation error if the config is not valid.
this.updateSessionConfig(userSessionConfig);
this.emit('connection_change', this.#state.status);
this._onOpen();
resolve();
});
dataChannel.addEventListener('error', (event) => {
this.close();
this._onError(event);
reject(event);
});
dataChannel.addEventListener('message', (event) => {
this._onMessage(event);
const { data: parsed, isGeneric } = (0, openaiRealtimeEvents_1.parseRealtimeEvent)(event);
if (!parsed || isGeneric) {
return;
}
if (parsed.type === 'response.created') {
this.#ongoingResponse = true;
}
else if (parsed.type === 'response.done') {
this.#ongoingResponse = false;
}
if (parsed.type === 'session.created') {
this._tracingConfig = parsed.session.tracing;
// Trying to turn on tracing after the session is created
this._updateTracingConfig(userSessionConfig.tracing ?? 'auto');
}
});
// set up audio playback
const audioElement = this.options.audioElement ?? document.createElement('audio');
audioElement.autoplay = true;
peerConnection.ontrack = (event) => {
audioElement.srcObject = event.streams[0];
};
// get microphone stream
const stream = this.options.mediaStream ??
(await navigator.mediaDevices.getUserMedia({
audio: true,
}));
peerConnection.addTrack(stream.getAudioTracks()[0]);
if (this.options.changePeerConnection) {
peerConnection =
await this.options.changePeerConnection(peerConnection);
this.#state = { ...this.#state, peerConnection };
}
const offer = await peerConnection.createOffer();
await peerConnection.setLocalDescription(offer);
if (!offer.sdp) {
throw new Error('Failed to create offer');
}
const sdpResponse = await fetch(connectionUrl, {
method: 'POST',
body: offer.sdp,
headers: {
'Content-Type': 'application/sdp',
Authorization: `Bearer ${apiKey}`,
'X-OpenAI-Agents-SDK': utils_1.HEADERS['X-OpenAI-Agents-SDK'],
},
});
callId = sdpResponse.headers?.get('Location')?.split('/').pop();
this.#state = { ...this.#state, callId };
const answer = {
type: 'answer',
sdp: await sdpResponse.text(),
};
await peerConnection.setRemoteDescription(answer);
}
catch (error) {
this.close();
this._onError(error);
reject(error);
}
});
}
/**
* Send an event to the Realtime API. This will stringify the event and send it directly to the
* API. This can be used if you want to take control over the connection and send events manually.
*
* @param event - The event to send.
*/
sendEvent(event) {
if (!this.#state.dataChannel ||
this.#state.dataChannel.readyState !== 'open') {
throw new Error('WebRTC data channel is not connected. Make sure you call `connect()` before sending events.');
}
this.#state.dataChannel.send(JSON.stringify(event));
}
/**
* Mute or unmute the session.
* @param muted - Whether to mute the session.
*/
mute(muted) {
this.#muted = muted;
if (this.#state.peerConnection) {
const peerConnection = this.#state.peerConnection;
peerConnection.getSenders().forEach((sender) => {
if (sender.track) {
sender.track.enabled = !muted;
}
});
}
}
/**
* Close the connection to the Realtime API and disconnects the underlying WebRTC connection.
*/
close() {
if (this.#state.dataChannel) {
this.#state.dataChannel.close();
}
if (this.#state.peerConnection) {
const peerConnection = this.#state.peerConnection;
peerConnection.getSenders().forEach((sender) => {
sender.track?.stop();
});
peerConnection.close();
}
if (this.#state.status !== 'disconnected') {
this.#state = {
status: 'disconnected',
peerConnection: undefined,
dataChannel: undefined,
callId: undefined,
};
this.emit('connection_change', this.#state.status);
this._onClose();
}
}
/**
* Interrupt the current response if one is ongoing and clear the audio buffer so that the agent
* stops talking.
*/
interrupt() {
if (this.#ongoingResponse) {
this.sendEvent({
type: 'response.cancel',
});
this.#ongoingResponse = false;
}
this.sendEvent({
type: 'output_audio_buffer.clear',
});
}
}
exports.OpenAIRealtimeWebRTC = OpenAIRealtimeWebRTC;
//# sourceMappingURL=openaiRealtimeWebRtc.js.map