UNPKG

speech-provider

Version:

A unified interface for browser speech synthesis and Eleven Labs voices

198 lines 7.67 kB
import { ElevenLabsVoiceDataSchema, } from "./ElevenLabsTypes"; import { cachedFetch } from "./utils/cachedFetch"; import { checkObjectsAgainstSchema, printDistinctPropertyValues, } from "./utils/debugging"; /** The base URL for the Eleven Labs API */ export const ELEVEN_LABS_BASE_URL = "https://api.elevenlabs.io/v1"; /** * A voice provider that uses the ElevenLabs API for high-quality text-to-speech. * This provider requires an API key from ElevenLabs. * * @example * ```typescript * const provider = createElevenLabsVoiceProvider("your-api-key"); * const voices = await provider.getVoices({ lang: "en-US", minVoices: 1 }); * const voice = voices[0]; * const utterance = voice.createUtterance("Hello, world!"); * utterance.start(); * ``` */ export class ElevenLabsVoiceProvider { name = "ElevenLabs"; apiKey; baseUrl; validateResponses; printVoiceProperties; cacheMaxAge; /** * Create a new ElevenLabs voice provider. * @param apiKey - Your ElevenLabs API key * @param baseUrl - The base URL for the ElevenLabs API (defaults to the official API) * @param options - Additional options for the provider * @param options.validateResponses - Whether to validate API responses against the schema * @param options.printVoiceProperties - Whether to print voice properties for debugging * @param options.cacheMaxAge - Maximum age of cached responses in seconds (default: 1 hour). Set to null to disable caching. */ constructor(apiKey, baseUrl = ELEVEN_LABS_BASE_URL, options = {}) { this.apiKey = apiKey; this.baseUrl = baseUrl; this.validateResponses = options.validateResponses || false; this.printVoiceProperties = options.printVoiceProperties || false; this.cacheMaxAge = options.cacheMaxAge ?? 3600; // Default to 1 hour, null to disable } /** * Get available voices for a given language code. * @param options - The options for getting voices * @param options.lang - The language code to match (e.g., "en-US") * @param options.minVoices - The minimum number of voices to return * @returns A promise that resolves to an array of ElevenLabs voices */ async getVoices({ lang, minVoices, }) { const langCode = lang.slice(0, 2); const response = await fetch(`${this.baseUrl}/voices?language=${langCode}`, { headers: { "xi-api-key": this.apiKey }, }); const data = (await response.json()); if (this.validateResponses) { checkObjectsAgainstSchema(data.voices, ElevenLabsVoiceDataSchema); } if (this.printVoiceProperties) { printDistinctPropertyValues(data.voices, { omit: [ "name", "voice_id", "sharing", "voice_verification", "fine_tuning", ], }); } const voices = data.voices.filter((voice) => voice.labels.language === lang.slice(0, 2)); return (voices.length >= minVoices ? voices : data.voices).map((voice) => new ElevenLabsVoice(this.apiKey, voice, this, this.cacheMaxAge)); } /** * Get the default voice for a given language code. * @param options - The options for getting the default voice * @param options.lang - The language code to match (e.g., "en-US") * @returns A promise that resolves to the first available voice or null if none is available */ async getDefaultVoice({ lang }) { const voices = await this.getVoices({ lang, minVoices: 1 }); return voices[0] ?? null; } } /** * A voice implementation that wraps an ElevenLabs voice. */ export class ElevenLabsVoice { apiKey; voiceData; provider; cacheMaxAge; constructor(apiKey, voiceData, provider, cacheMaxAge = 3600) { this.apiKey = apiKey; this.voiceData = voiceData; this.provider = provider; this.cacheMaxAge = cacheMaxAge; } /** The language code for the voice */ get lang() { return this.voiceData.labels.language; } /** The display name of the voice */ get name() { return this.voiceData.name ?? this.voiceData.voice_id; } /** The unique identifier for the voice */ get id() { return this.voiceData.voice_id; } /** A short description of the voice */ get description() { const match = / - (.+)/.exec(this.voiceData.description ?? ""); return match?.[1] ?? null; } /** The full description of the voice */ get longDescription() { return this.voiceData.description; } /** * Create a new utterance with this voice. * @param text - The text to speak * @returns A new utterance that can be started and stopped */ createUtterance(text) { return new ElevenLabsUtterance(this.apiKey, this.voiceData.voice_id, this.voiceData.labels.language, text, this.cacheMaxAge); } } /** * An utterance implementation that uses the ElevenLabs API to synthesize speech. */ export class ElevenLabsUtterance { apiKey; voiceId; languageCode; text; audio = null; onStartCallback = null; onEndCallback = null; cacheMaxAge; constructor(apiKey, voiceId, languageCode, text, cacheMaxAge = 3600) { this.apiKey = apiKey; this.voiceId = voiceId; this.languageCode = languageCode; this.text = text; this.cacheMaxAge = cacheMaxAge; } /** * Start speaking the utterance by fetching audio from ElevenLabs and playing it. */ async start() { const response = await cachedFetch(`${ELEVEN_LABS_BASE_URL}/text-to-speech/${this.voiceId}`, { method: "POST", headers: { "xi-api-key": this.apiKey, "Content-Type": "application/json", }, body: JSON.stringify({ model_id: "eleven_turbo_v2_5", language_code: this.languageCode, text: this.text, }), cacheOptions: { maxAge: this.cacheMaxAge, }, }); const audioBlob = await response.blob(); const audioUrl = URL.createObjectURL(audioBlob); this.audio = new Audio(audioUrl); this.audio.onplay = () => this.onStartCallback?.(); this.audio.onended = () => this.onEndCallback?.(); await this.audio.play(); } /** Stop speaking the utterance */ stop() { this.audio?.pause(); this.audio = null; } /** Set the callback for when the utterance starts speaking */ set onstart(callback) { this.onStartCallback = callback; } /** Set the callback for when the utterance finishes speaking */ set onend(callback) { this.onEndCallback = callback; } } /** * Create a new Eleven Labs voice provider. * @param apiKey - Your Eleven Labs API key * @param baseUrl - The base URL for the Eleven Labs API (defaults to the official API) * @param options - Additional options for the provider * @param options.validateResponses - Whether to validate API responses against the schema * @param options.printVoiceProperties - Whether to print voice properties for debugging * @param options.cacheMaxAge - Maximum age of cached responses in seconds (default: 1 hour). Set to null to disable caching. */ export function createElevenLabsVoiceProvider(apiKey, baseUrl = ELEVEN_LABS_BASE_URL, options = {}) { return new ElevenLabsVoiceProvider(apiKey, baseUrl, options); } //# sourceMappingURL=ElevenLabsVoiceProvider.js.map