UNPKG

@ai-sdk/elevenlabs

Version:

The **[ElevenLabs provider](https://ai-sdk.dev/providers/ai-sdk-providers/elevenlabs)** for the [AI SDK](https://ai-sdk.dev/docs) contains language model support for the ElevenLabs chat and completion APIs and embedding model support for the ElevenLabs em

455 lines (446 loc) 15.1 kB
// src/elevenlabs-provider.ts import { NoSuchModelError } from "@ai-sdk/provider"; import { loadApiKey, withUserAgentSuffix } from "@ai-sdk/provider-utils"; // src/elevenlabs-transcription-model.ts import { combineHeaders, convertBase64ToUint8Array, createJsonResponseHandler, mediaTypeToExtension, parseProviderOptions, postFormDataToApi, serializeModelOptions, WORKFLOW_SERIALIZE, WORKFLOW_DESERIALIZE } from "@ai-sdk/provider-utils"; import { z as z3 } from "zod/v4"; // src/elevenlabs-error.ts import { z } from "zod/v4"; import { createJsonErrorResponseHandler } from "@ai-sdk/provider-utils"; var elevenlabsErrorDataSchema = z.object({ error: z.object({ message: z.string(), code: z.number() }) }); var elevenlabsFailedResponseHandler = createJsonErrorResponseHandler({ errorSchema: elevenlabsErrorDataSchema, errorToMessage: (data) => data.error.message }); // src/elevenlabs-transcription-model-options.ts import { z as z2 } from "zod/v4"; var elevenLabsTranscriptionModelOptionsSchema = z2.object({ languageCode: z2.string().nullish(), tagAudioEvents: z2.boolean().nullish().default(true), numSpeakers: z2.number().int().min(1).max(32).nullish(), timestampsGranularity: z2.enum(["none", "word", "character"]).nullish().default("word"), diarize: z2.boolean().nullish().default(false), fileFormat: z2.enum(["pcm_s16le_16", "other"]).nullish().default("other") }); // src/elevenlabs-transcription-model.ts var ElevenLabsTranscriptionModel = class _ElevenLabsTranscriptionModel { constructor(modelId, config) { this.modelId = modelId; this.config = config; this.specificationVersion = "v4"; } get provider() { return this.config.provider; } static [WORKFLOW_SERIALIZE](model) { return serializeModelOptions({ modelId: model.modelId, config: model.config }); } static [WORKFLOW_DESERIALIZE](options) { return new _ElevenLabsTranscriptionModel(options.modelId, options.config); } async getArgs({ audio, mediaType, providerOptions }) { var _a, _b, _c, _d, _e; const warnings = []; const elevenlabsOptions = await parseProviderOptions({ provider: "elevenlabs", providerOptions, schema: elevenLabsTranscriptionModelOptionsSchema }); const formData = new FormData(); const blob = audio instanceof Uint8Array ? new Blob([audio]) : new Blob([convertBase64ToUint8Array(audio)]); formData.append("model_id", this.modelId); const fileExtension = mediaTypeToExtension(mediaType); formData.append( "file", new File([blob], "audio", { type: mediaType }), `audio.${fileExtension}` ); formData.append("diarize", "true"); if (elevenlabsOptions) { const transcriptionModelOptions = { language_code: (_a = elevenlabsOptions.languageCode) != null ? _a : void 0, tag_audio_events: (_b = elevenlabsOptions.tagAudioEvents) != null ? _b : void 0, num_speakers: (_c = elevenlabsOptions.numSpeakers) != null ? _c : void 0, timestamps_granularity: (_d = elevenlabsOptions.timestampsGranularity) != null ? _d : void 0, file_format: (_e = elevenlabsOptions.fileFormat) != null ? _e : void 0 }; if (typeof elevenlabsOptions.diarize === "boolean") { formData.append("diarize", String(elevenlabsOptions.diarize)); } for (const key in transcriptionModelOptions) { const value = transcriptionModelOptions[key]; if (value !== void 0) { formData.append(key, String(value)); } } } return { formData, warnings }; } async doGenerate(options) { var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j; const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date(); const { formData, warnings } = await this.getArgs(options); const { value: response, responseHeaders, rawValue: rawResponse } = await postFormDataToApi({ url: this.config.url({ path: "/v1/speech-to-text", modelId: this.modelId }), headers: combineHeaders((_e = (_d = this.config).headers) == null ? void 0 : _e.call(_d), options.headers), formData, failedResponseHandler: elevenlabsFailedResponseHandler, successfulResponseHandler: createJsonResponseHandler( elevenlabsTranscriptionResponseSchema ), abortSignal: options.abortSignal, fetch: this.config.fetch }); return { text: response.text, segments: (_g = (_f = response.words) == null ? void 0 : _f.map((word) => { var _a2, _b2; return { text: word.text, startSecond: (_a2 = word.start) != null ? _a2 : 0, endSecond: (_b2 = word.end) != null ? _b2 : 0 }; })) != null ? _g : [], language: response.language_code, durationInSeconds: (_j = (_i = (_h = response.words) == null ? void 0 : _h.at(-1)) == null ? void 0 : _i.end) != null ? _j : void 0, warnings, response: { timestamp: currentDate, modelId: this.modelId, headers: responseHeaders, body: rawResponse } }; } }; var elevenlabsTranscriptionResponseSchema = z3.object({ language_code: z3.string(), language_probability: z3.number(), text: z3.string(), words: z3.array( z3.object({ text: z3.string(), type: z3.enum(["word", "spacing", "audio_event"]), start: z3.number().nullish(), end: z3.number().nullish(), speaker_id: z3.string().nullish(), characters: z3.array( z3.object({ text: z3.string(), start: z3.number().nullish(), end: z3.number().nullish() }) ).nullish() }) ).nullish() }); // src/elevenlabs-speech-model.ts import { combineHeaders as combineHeaders2, createBinaryResponseHandler, parseProviderOptions as parseProviderOptions2, postJsonToApi, serializeModelOptions as serializeModelOptions2, WORKFLOW_SERIALIZE as WORKFLOW_SERIALIZE2, WORKFLOW_DESERIALIZE as WORKFLOW_DESERIALIZE2 } from "@ai-sdk/provider-utils"; // src/elevenlabs-speech-model-options.ts import { z as z4 } from "zod/v4"; var elevenLabsSpeechModelOptionsSchema = z4.object({ languageCode: z4.string().optional(), voiceSettings: z4.object({ stability: z4.number().min(0).max(1).optional(), similarityBoost: z4.number().min(0).max(1).optional(), style: z4.number().min(0).max(1).optional(), useSpeakerBoost: z4.boolean().optional() }).optional(), pronunciationDictionaryLocators: z4.array( z4.object({ pronunciationDictionaryId: z4.string(), versionId: z4.string().optional() }) ).max(3).optional(), seed: z4.number().min(0).max(4294967295).optional(), previousText: z4.string().optional(), nextText: z4.string().optional(), previousRequestIds: z4.array(z4.string()).max(3).optional(), nextRequestIds: z4.array(z4.string()).max(3).optional(), applyTextNormalization: z4.enum(["auto", "on", "off"]).optional(), applyLanguageTextNormalization: z4.boolean().optional(), enableLogging: z4.boolean().optional() }); // src/elevenlabs-speech-model.ts var ElevenLabsSpeechModel = class _ElevenLabsSpeechModel { constructor(modelId, config) { this.modelId = modelId; this.config = config; this.specificationVersion = "v4"; } get provider() { return this.config.provider; } static [WORKFLOW_SERIALIZE2](model) { return serializeModelOptions2({ modelId: model.modelId, config: model.config }); } static [WORKFLOW_DESERIALIZE2](options) { return new _ElevenLabsSpeechModel(options.modelId, options.config); } async getArgs({ text, voice = "21m00Tcm4TlvDq8ikWAM", outputFormat = "mp3_44100_128", instructions, language, speed, providerOptions }) { const warnings = []; const elevenLabsOptions = await parseProviderOptions2({ provider: "elevenlabs", providerOptions, schema: elevenLabsSpeechModelOptionsSchema }); const requestBody = { text, model_id: this.modelId }; const queryParams = {}; if (outputFormat) { const formatMap = { mp3: "mp3_44100_128", mp3_32: "mp3_44100_32", mp3_64: "mp3_44100_64", mp3_96: "mp3_44100_96", mp3_128: "mp3_44100_128", mp3_192: "mp3_44100_192", pcm: "pcm_44100", pcm_16000: "pcm_16000", pcm_22050: "pcm_22050", pcm_24000: "pcm_24000", pcm_44100: "pcm_44100", ulaw: "ulaw_8000" }; const mappedFormat = formatMap[outputFormat] || outputFormat; queryParams.output_format = mappedFormat; } if (language) { requestBody.language_code = language; } const voiceSettings = {}; if (speed != null) { voiceSettings.speed = speed; } if (elevenLabsOptions) { if (elevenLabsOptions.voiceSettings) { if (elevenLabsOptions.voiceSettings.stability != null) { voiceSettings.stability = elevenLabsOptions.voiceSettings.stability; } if (elevenLabsOptions.voiceSettings.similarityBoost != null) { voiceSettings.similarity_boost = elevenLabsOptions.voiceSettings.similarityBoost; } if (elevenLabsOptions.voiceSettings.style != null) { voiceSettings.style = elevenLabsOptions.voiceSettings.style; } if (elevenLabsOptions.voiceSettings.useSpeakerBoost != null) { voiceSettings.use_speaker_boost = elevenLabsOptions.voiceSettings.useSpeakerBoost; } } if (elevenLabsOptions.languageCode && !requestBody.language_code) { requestBody.language_code = elevenLabsOptions.languageCode; } if (elevenLabsOptions.pronunciationDictionaryLocators) { requestBody.pronunciation_dictionary_locators = elevenLabsOptions.pronunciationDictionaryLocators.map((locator) => ({ pronunciation_dictionary_id: locator.pronunciationDictionaryId, ...locator.versionId && { version_id: locator.versionId } })); } if (elevenLabsOptions.seed != null) { requestBody.seed = elevenLabsOptions.seed; } if (elevenLabsOptions.previousText) { requestBody.previous_text = elevenLabsOptions.previousText; } if (elevenLabsOptions.nextText) { requestBody.next_text = elevenLabsOptions.nextText; } if (elevenLabsOptions.previousRequestIds) { requestBody.previous_request_ids = elevenLabsOptions.previousRequestIds; } if (elevenLabsOptions.nextRequestIds) { requestBody.next_request_ids = elevenLabsOptions.nextRequestIds; } if (elevenLabsOptions.applyTextNormalization) { requestBody.apply_text_normalization = elevenLabsOptions.applyTextNormalization; } if (elevenLabsOptions.applyLanguageTextNormalization != null) { requestBody.apply_language_text_normalization = elevenLabsOptions.applyLanguageTextNormalization; } if (elevenLabsOptions.enableLogging != null) { queryParams.enable_logging = String(elevenLabsOptions.enableLogging); } } if (Object.keys(voiceSettings).length > 0) { requestBody.voice_settings = voiceSettings; } if (instructions) { warnings.push({ type: "unsupported", feature: "instructions", details: `ElevenLabs speech models do not support instructions. Instructions parameter was ignored.` }); } return { requestBody, queryParams, warnings, voiceId: voice }; } async doGenerate(options) { var _a, _b, _c, _d, _e; const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date(); const { requestBody, queryParams, warnings, voiceId } = await this.getArgs(options); const { value: audio, responseHeaders, rawValue: rawResponse } = await postJsonToApi({ url: (() => { const baseUrl = this.config.url({ path: `/v1/text-to-speech/${voiceId}`, modelId: this.modelId }); const queryString = new URLSearchParams(queryParams).toString(); return queryString ? `${baseUrl}?${queryString}` : baseUrl; })(), headers: combineHeaders2((_e = (_d = this.config).headers) == null ? void 0 : _e.call(_d), options.headers), body: requestBody, failedResponseHandler: elevenlabsFailedResponseHandler, successfulResponseHandler: createBinaryResponseHandler(), abortSignal: options.abortSignal, fetch: this.config.fetch }); return { audio, warnings, request: { body: JSON.stringify(requestBody) }, response: { timestamp: currentDate, modelId: this.modelId, headers: responseHeaders, body: rawResponse } }; } }; // src/version.ts var VERSION = true ? "3.0.3" : "0.0.0-test"; // src/elevenlabs-provider.ts function createElevenLabs(options = {}) { const getHeaders = () => withUserAgentSuffix( { "xi-api-key": loadApiKey({ apiKey: options.apiKey, environmentVariableName: "ELEVENLABS_API_KEY", description: "ElevenLabs" }), ...options.headers }, `ai-sdk/elevenlabs/${VERSION}` ); const createTranscriptionModel = (modelId) => new ElevenLabsTranscriptionModel(modelId, { provider: `elevenlabs.transcription`, url: ({ path }) => `https://api.elevenlabs.io${path}`, headers: getHeaders, fetch: options.fetch }); const createSpeechModel = (modelId) => new ElevenLabsSpeechModel(modelId, { provider: `elevenlabs.speech`, url: ({ path }) => `https://api.elevenlabs.io${path}`, headers: getHeaders, fetch: options.fetch }); const provider = function(modelId) { return { transcription: createTranscriptionModel(modelId) }; }; provider.specificationVersion = "v4"; provider.transcription = createTranscriptionModel; provider.transcriptionModel = createTranscriptionModel; provider.speech = createSpeechModel; provider.speechModel = createSpeechModel; provider.languageModel = (modelId) => { throw new NoSuchModelError({ modelId, modelType: "languageModel", message: "ElevenLabs does not provide language models" }); }; provider.embeddingModel = (modelId) => { throw new NoSuchModelError({ modelId, modelType: "embeddingModel", message: "ElevenLabs does not provide embedding models" }); }; provider.textEmbeddingModel = provider.embeddingModel; provider.imageModel = (modelId) => { throw new NoSuchModelError({ modelId, modelType: "imageModel", message: "ElevenLabs does not provide image models" }); }; return provider; } var elevenLabs = createElevenLabs(); export { VERSION, createElevenLabs, elevenLabs, elevenLabs as elevenlabs }; //# sourceMappingURL=index.js.map