speech-provider
Version:
A unified interface for browser speech synthesis and Eleven Labs voices
198 lines • 7.67 kB
JavaScript
import { ElevenLabsVoiceDataSchema, } from "./ElevenLabsTypes";
import { cachedFetch } from "./utils/cachedFetch";
import { checkObjectsAgainstSchema, printDistinctPropertyValues, } from "./utils/debugging";
/** The base URL for the Eleven Labs API */
export const ELEVEN_LABS_BASE_URL = "https://api.elevenlabs.io/v1";
/**
* A voice provider that uses the ElevenLabs API for high-quality text-to-speech.
* This provider requires an API key from ElevenLabs.
*
* @example
* ```typescript
* const provider = createElevenLabsVoiceProvider("your-api-key");
* const voices = await provider.getVoices({ lang: "en-US", minVoices: 1 });
* const voice = voices[0];
* const utterance = voice.createUtterance("Hello, world!");
* utterance.start();
* ```
*/
export class ElevenLabsVoiceProvider {
name = "ElevenLabs";
apiKey;
baseUrl;
validateResponses;
printVoiceProperties;
cacheMaxAge;
/**
* Create a new ElevenLabs voice provider.
* @param apiKey - Your ElevenLabs API key
* @param baseUrl - The base URL for the ElevenLabs API (defaults to the official API)
* @param options - Additional options for the provider
* @param options.validateResponses - Whether to validate API responses against the schema
* @param options.printVoiceProperties - Whether to print voice properties for debugging
* @param options.cacheMaxAge - Maximum age of cached responses in seconds (default: 1 hour). Set to null to disable caching.
*/
constructor(apiKey, baseUrl = ELEVEN_LABS_BASE_URL, options = {}) {
this.apiKey = apiKey;
this.baseUrl = baseUrl;
this.validateResponses = options.validateResponses || false;
this.printVoiceProperties = options.printVoiceProperties || false;
this.cacheMaxAge = options.cacheMaxAge ?? 3600; // Default to 1 hour, null to disable
}
/**
* Get available voices for a given language code.
* @param options - The options for getting voices
* @param options.lang - The language code to match (e.g., "en-US")
* @param options.minVoices - The minimum number of voices to return
* @returns A promise that resolves to an array of ElevenLabs voices
*/
async getVoices({ lang, minVoices, }) {
const langCode = lang.slice(0, 2);
const response = await fetch(`${this.baseUrl}/voices?language=${langCode}`, {
headers: { "xi-api-key": this.apiKey },
});
const data = (await response.json());
if (this.validateResponses) {
checkObjectsAgainstSchema(data.voices, ElevenLabsVoiceDataSchema);
}
if (this.printVoiceProperties) {
printDistinctPropertyValues(data.voices, {
omit: [
"name",
"voice_id",
"sharing",
"voice_verification",
"fine_tuning",
],
});
}
const voices = data.voices.filter((voice) => voice.labels.language === lang.slice(0, 2));
return (voices.length >= minVoices ? voices : data.voices).map((voice) => new ElevenLabsVoice(this.apiKey, voice, this, this.cacheMaxAge));
}
/**
* Get the default voice for a given language code.
* @param options - The options for getting the default voice
* @param options.lang - The language code to match (e.g., "en-US")
* @returns A promise that resolves to the first available voice or null if none is available
*/
async getDefaultVoice({ lang }) {
const voices = await this.getVoices({ lang, minVoices: 1 });
return voices[0] ?? null;
}
}
/**
* A voice implementation that wraps an ElevenLabs voice.
*/
export class ElevenLabsVoice {
apiKey;
voiceData;
provider;
cacheMaxAge;
constructor(apiKey, voiceData, provider, cacheMaxAge = 3600) {
this.apiKey = apiKey;
this.voiceData = voiceData;
this.provider = provider;
this.cacheMaxAge = cacheMaxAge;
}
/** The language code for the voice */
get lang() {
return this.voiceData.labels.language;
}
/** The display name of the voice */
get name() {
return this.voiceData.name ?? this.voiceData.voice_id;
}
/** The unique identifier for the voice */
get id() {
return this.voiceData.voice_id;
}
/** A short description of the voice */
get description() {
const match = / - (.+)/.exec(this.voiceData.description ?? "");
return match?.[1] ?? null;
}
/** The full description of the voice */
get longDescription() {
return this.voiceData.description;
}
/**
* Create a new utterance with this voice.
* @param text - The text to speak
* @returns A new utterance that can be started and stopped
*/
createUtterance(text) {
return new ElevenLabsUtterance(this.apiKey, this.voiceData.voice_id, this.voiceData.labels.language, text, this.cacheMaxAge);
}
}
/**
* An utterance implementation that uses the ElevenLabs API to synthesize speech.
*/
export class ElevenLabsUtterance {
apiKey;
voiceId;
languageCode;
text;
audio = null;
onStartCallback = null;
onEndCallback = null;
cacheMaxAge;
constructor(apiKey, voiceId, languageCode, text, cacheMaxAge = 3600) {
this.apiKey = apiKey;
this.voiceId = voiceId;
this.languageCode = languageCode;
this.text = text;
this.cacheMaxAge = cacheMaxAge;
}
/**
* Start speaking the utterance by fetching audio from ElevenLabs and playing it.
*/
async start() {
const response = await cachedFetch(`${ELEVEN_LABS_BASE_URL}/text-to-speech/${this.voiceId}`, {
method: "POST",
headers: {
"xi-api-key": this.apiKey,
"Content-Type": "application/json",
},
body: JSON.stringify({
model_id: "eleven_turbo_v2_5",
language_code: this.languageCode,
text: this.text,
}),
cacheOptions: {
maxAge: this.cacheMaxAge,
},
});
const audioBlob = await response.blob();
const audioUrl = URL.createObjectURL(audioBlob);
this.audio = new Audio(audioUrl);
this.audio.onplay = () => this.onStartCallback?.();
this.audio.onended = () => this.onEndCallback?.();
await this.audio.play();
}
/** Stop speaking the utterance */
stop() {
this.audio?.pause();
this.audio = null;
}
/** Set the callback for when the utterance starts speaking */
set onstart(callback) {
this.onStartCallback = callback;
}
/** Set the callback for when the utterance finishes speaking */
set onend(callback) {
this.onEndCallback = callback;
}
}
/**
* Create a new Eleven Labs voice provider.
* @param apiKey - Your Eleven Labs API key
* @param baseUrl - The base URL for the Eleven Labs API (defaults to the official API)
* @param options - Additional options for the provider
* @param options.validateResponses - Whether to validate API responses against the schema
* @param options.printVoiceProperties - Whether to print voice properties for debugging
* @param options.cacheMaxAge - Maximum age of cached responses in seconds (default: 1 hour). Set to null to disable caching.
*/
export function createElevenLabsVoiceProvider(apiKey, baseUrl = ELEVEN_LABS_BASE_URL, options = {}) {
return new ElevenLabsVoiceProvider(apiKey, baseUrl, options);
}
//# sourceMappingURL=ElevenLabsVoiceProvider.js.map