UNPKG

@ai-sdk/elevenlabs

Version:

The **[ElevenLabs provider](https://ai-sdk.dev/providers/ai-sdk-providers/elevenlabs)** for the [AI SDK](https://ai-sdk.dev/docs) contains language model support for the ElevenLabs chat and completion APIs and embedding model support for the ElevenLabs em

423 lines (417 loc) 14 kB
// src/elevenlabs-provider.ts import { NoSuchModelError } from "@ai-sdk/provider"; import { loadApiKey, withUserAgentSuffix } from "@ai-sdk/provider-utils"; // src/elevenlabs-transcription-model.ts import { combineHeaders, convertBase64ToUint8Array, createJsonResponseHandler, mediaTypeToExtension, parseProviderOptions, postFormDataToApi } from "@ai-sdk/provider-utils"; import { z as z2 } from "zod/v4"; // src/elevenlabs-error.ts import { z } from "zod/v4"; import { createJsonErrorResponseHandler } from "@ai-sdk/provider-utils"; var elevenlabsErrorDataSchema = z.object({ error: z.object({ message: z.string(), code: z.number() }) }); var elevenlabsFailedResponseHandler = createJsonErrorResponseHandler({ errorSchema: elevenlabsErrorDataSchema, errorToMessage: (data) => data.error.message }); // src/elevenlabs-transcription-model.ts var elevenLabsTranscriptionModelOptionsSchema = z2.object({ languageCode: z2.string().nullish(), tagAudioEvents: z2.boolean().nullish().default(true), numSpeakers: z2.number().int().min(1).max(32).nullish(), timestampsGranularity: z2.enum(["none", "word", "character"]).nullish().default("word"), diarize: z2.boolean().nullish().default(false), fileFormat: z2.enum(["pcm_s16le_16", "other"]).nullish().default("other") }); var ElevenLabsTranscriptionModel = class { constructor(modelId, config) { this.modelId = modelId; this.config = config; this.specificationVersion = "v3"; } get provider() { return this.config.provider; } async getArgs({ audio, mediaType, providerOptions }) { var _a, _b, _c, _d, _e; const warnings = []; const elevenlabsOptions = await parseProviderOptions({ provider: "elevenlabs", providerOptions, schema: elevenLabsTranscriptionModelOptionsSchema }); const formData = new FormData(); const blob = audio instanceof Uint8Array ? new Blob([audio]) : new Blob([convertBase64ToUint8Array(audio)]); formData.append("model_id", this.modelId); const fileExtension = mediaTypeToExtension(mediaType); formData.append( "file", new File([blob], "audio", { type: mediaType }), `audio.${fileExtension}` ); formData.append("diarize", "true"); if (elevenlabsOptions) { const transcriptionModelOptions = { language_code: (_a = elevenlabsOptions.languageCode) != null ? _a : void 0, tag_audio_events: (_b = elevenlabsOptions.tagAudioEvents) != null ? _b : void 0, num_speakers: (_c = elevenlabsOptions.numSpeakers) != null ? _c : void 0, timestamps_granularity: (_d = elevenlabsOptions.timestampsGranularity) != null ? _d : void 0, file_format: (_e = elevenlabsOptions.fileFormat) != null ? _e : void 0 }; if (typeof elevenlabsOptions.diarize === "boolean") { formData.append("diarize", String(elevenlabsOptions.diarize)); } for (const key in transcriptionModelOptions) { const value = transcriptionModelOptions[key]; if (value !== void 0) { formData.append(key, String(value)); } } } return { formData, warnings }; } async doGenerate(options) { var _a, _b, _c, _d, _e, _f, _g, _h; const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date(); const { formData, warnings } = await this.getArgs(options); const { value: response, responseHeaders, rawValue: rawResponse } = await postFormDataToApi({ url: this.config.url({ path: "/v1/speech-to-text", modelId: this.modelId }), headers: combineHeaders(this.config.headers(), options.headers), formData, failedResponseHandler: elevenlabsFailedResponseHandler, successfulResponseHandler: createJsonResponseHandler( elevenlabsTranscriptionResponseSchema ), abortSignal: options.abortSignal, fetch: this.config.fetch }); return { text: response.text, segments: (_e = (_d = response.words) == null ? void 0 : _d.map((word) => { var _a2, _b2; return { text: word.text, startSecond: (_a2 = word.start) != null ? _a2 : 0, endSecond: (_b2 = word.end) != null ? _b2 : 0 }; })) != null ? _e : [], language: response.language_code, durationInSeconds: (_h = (_g = (_f = response.words) == null ? void 0 : _f.at(-1)) == null ? void 0 : _g.end) != null ? _h : void 0, warnings, response: { timestamp: currentDate, modelId: this.modelId, headers: responseHeaders, body: rawResponse } }; } }; var elevenlabsTranscriptionResponseSchema = z2.object({ language_code: z2.string(), language_probability: z2.number(), text: z2.string(), words: z2.array( z2.object({ text: z2.string(), type: z2.enum(["word", "spacing", "audio_event"]), start: z2.number().nullish(), end: z2.number().nullish(), speaker_id: z2.string().nullish(), characters: z2.array( z2.object({ text: z2.string(), start: z2.number().nullish(), end: z2.number().nullish() }) ).nullish() }) ).nullish() }); // src/elevenlabs-speech-model.ts import { combineHeaders as combineHeaders2, createBinaryResponseHandler, parseProviderOptions as parseProviderOptions2, postJsonToApi } from "@ai-sdk/provider-utils"; import { z as z3 } from "zod/v4"; var elevenLabsSpeechModelOptionsSchema = z3.object({ languageCode: z3.string().optional(), voiceSettings: z3.object({ stability: z3.number().min(0).max(1).optional(), similarityBoost: z3.number().min(0).max(1).optional(), style: z3.number().min(0).max(1).optional(), useSpeakerBoost: z3.boolean().optional() }).optional(), pronunciationDictionaryLocators: z3.array( z3.object({ pronunciationDictionaryId: z3.string(), versionId: z3.string().optional() }) ).max(3).optional(), seed: z3.number().min(0).max(4294967295).optional(), previousText: z3.string().optional(), nextText: z3.string().optional(), previousRequestIds: z3.array(z3.string()).max(3).optional(), nextRequestIds: z3.array(z3.string()).max(3).optional(), applyTextNormalization: z3.enum(["auto", "on", "off"]).optional(), applyLanguageTextNormalization: z3.boolean().optional(), enableLogging: z3.boolean().optional() }); var ElevenLabsSpeechModel = class { constructor(modelId, config) { this.modelId = modelId; this.config = config; this.specificationVersion = "v3"; } get provider() { return this.config.provider; } async getArgs({ text, voice = "21m00Tcm4TlvDq8ikWAM", outputFormat = "mp3_44100_128", instructions, language, speed, providerOptions }) { const warnings = []; const elevenLabsOptions = await parseProviderOptions2({ provider: "elevenlabs", providerOptions, schema: elevenLabsSpeechModelOptionsSchema }); const requestBody = { text, model_id: this.modelId }; const queryParams = {}; if (outputFormat) { const formatMap = { mp3: "mp3_44100_128", mp3_32: "mp3_44100_32", mp3_64: "mp3_44100_64", mp3_96: "mp3_44100_96", mp3_128: "mp3_44100_128", mp3_192: "mp3_44100_192", pcm: "pcm_44100", pcm_16000: "pcm_16000", pcm_22050: "pcm_22050", pcm_24000: "pcm_24000", pcm_44100: "pcm_44100", ulaw: "ulaw_8000" }; const mappedFormat = formatMap[outputFormat] || outputFormat; queryParams.output_format = mappedFormat; } if (language) { requestBody.language_code = language; } const voiceSettings = {}; if (speed != null) { voiceSettings.speed = speed; } if (elevenLabsOptions) { if (elevenLabsOptions.voiceSettings) { if (elevenLabsOptions.voiceSettings.stability != null) { voiceSettings.stability = elevenLabsOptions.voiceSettings.stability; } if (elevenLabsOptions.voiceSettings.similarityBoost != null) { voiceSettings.similarity_boost = elevenLabsOptions.voiceSettings.similarityBoost; } if (elevenLabsOptions.voiceSettings.style != null) { voiceSettings.style = elevenLabsOptions.voiceSettings.style; } if (elevenLabsOptions.voiceSettings.useSpeakerBoost != null) { voiceSettings.use_speaker_boost = elevenLabsOptions.voiceSettings.useSpeakerBoost; } } if (elevenLabsOptions.languageCode && !requestBody.language_code) { requestBody.language_code = elevenLabsOptions.languageCode; } if (elevenLabsOptions.pronunciationDictionaryLocators) { requestBody.pronunciation_dictionary_locators = elevenLabsOptions.pronunciationDictionaryLocators.map((locator) => ({ pronunciation_dictionary_id: locator.pronunciationDictionaryId, ...locator.versionId && { version_id: locator.versionId } })); } if (elevenLabsOptions.seed != null) { requestBody.seed = elevenLabsOptions.seed; } if (elevenLabsOptions.previousText) { requestBody.previous_text = elevenLabsOptions.previousText; } if (elevenLabsOptions.nextText) { requestBody.next_text = elevenLabsOptions.nextText; } if (elevenLabsOptions.previousRequestIds) { requestBody.previous_request_ids = elevenLabsOptions.previousRequestIds; } if (elevenLabsOptions.nextRequestIds) { requestBody.next_request_ids = elevenLabsOptions.nextRequestIds; } if (elevenLabsOptions.applyTextNormalization) { requestBody.apply_text_normalization = elevenLabsOptions.applyTextNormalization; } if (elevenLabsOptions.applyLanguageTextNormalization != null) { requestBody.apply_language_text_normalization = elevenLabsOptions.applyLanguageTextNormalization; } if (elevenLabsOptions.enableLogging != null) { queryParams.enable_logging = String(elevenLabsOptions.enableLogging); } } if (Object.keys(voiceSettings).length > 0) { requestBody.voice_settings = voiceSettings; } if (instructions) { warnings.push({ type: "unsupported", feature: "instructions", details: `ElevenLabs speech models do not support instructions. Instructions parameter was ignored.` }); } return { requestBody, queryParams, warnings, voiceId: voice }; } async doGenerate(options) { var _a, _b, _c; const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date(); const { requestBody, queryParams, warnings, voiceId } = await this.getArgs(options); const { value: audio, responseHeaders, rawValue: rawResponse } = await postJsonToApi({ url: (() => { const baseUrl = this.config.url({ path: `/v1/text-to-speech/${voiceId}`, modelId: this.modelId }); const queryString = new URLSearchParams(queryParams).toString(); return queryString ? `${baseUrl}?${queryString}` : baseUrl; })(), headers: combineHeaders2(this.config.headers(), options.headers), body: requestBody, failedResponseHandler: elevenlabsFailedResponseHandler, successfulResponseHandler: createBinaryResponseHandler(), abortSignal: options.abortSignal, fetch: this.config.fetch }); return { audio, warnings, request: { body: JSON.stringify(requestBody) }, response: { timestamp: currentDate, modelId: this.modelId, headers: responseHeaders, body: rawResponse } }; } }; // src/version.ts var VERSION = true ? "2.0.33" : "0.0.0-test"; // src/elevenlabs-provider.ts function createElevenLabs(options = {}) { const getHeaders = () => withUserAgentSuffix( { "xi-api-key": loadApiKey({ apiKey: options.apiKey, environmentVariableName: "ELEVENLABS_API_KEY", description: "ElevenLabs" }), ...options.headers }, `ai-sdk/elevenlabs/${VERSION}` ); const createTranscriptionModel = (modelId) => new ElevenLabsTranscriptionModel(modelId, { provider: `elevenlabs.transcription`, url: ({ path }) => `https://api.elevenlabs.io${path}`, headers: getHeaders, fetch: options.fetch }); const createSpeechModel = (modelId) => new ElevenLabsSpeechModel(modelId, { provider: `elevenlabs.speech`, url: ({ path }) => `https://api.elevenlabs.io${path}`, headers: getHeaders, fetch: options.fetch }); const provider = function(modelId) { return { transcription: createTranscriptionModel(modelId) }; }; provider.specificationVersion = "v3"; provider.transcription = createTranscriptionModel; provider.transcriptionModel = createTranscriptionModel; provider.speech = createSpeechModel; provider.speechModel = createSpeechModel; provider.languageModel = (modelId) => { throw new NoSuchModelError({ modelId, modelType: "languageModel", message: "ElevenLabs does not provide language models" }); }; provider.embeddingModel = (modelId) => { throw new NoSuchModelError({ modelId, modelType: "embeddingModel", message: "ElevenLabs does not provide embedding models" }); }; provider.textEmbeddingModel = provider.embeddingModel; provider.imageModel = (modelId) => { throw new NoSuchModelError({ modelId, modelType: "imageModel", message: "ElevenLabs does not provide image models" }); }; return provider; } var elevenlabs = createElevenLabs(); export { VERSION, createElevenLabs, elevenlabs }; //# sourceMappingURL=index.mjs.map