UNPKG

@ai-sdk/elevenlabs

Version:

The **[ElevenLabs provider](https://ai-sdk.dev/providers/ai-sdk-providers/elevenlabs)** for the [AI SDK](https://ai-sdk.dev/docs) contains language model support for the ElevenLabs chat and completion APIs and embedding model support for the ElevenLabs em

435 lines (427 loc) 15.8 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // src/index.ts var index_exports = {}; __export(index_exports, { VERSION: () => VERSION, createElevenLabs: () => createElevenLabs, elevenlabs: () => elevenlabs }); module.exports = __toCommonJS(index_exports); // src/elevenlabs-provider.ts var import_provider = require("@ai-sdk/provider"); var import_provider_utils4 = require("@ai-sdk/provider-utils"); // src/elevenlabs-transcription-model.ts var import_provider_utils2 = require("@ai-sdk/provider-utils"); var import_v42 = require("zod/v4"); // src/elevenlabs-error.ts var import_v4 = require("zod/v4"); var import_provider_utils = require("@ai-sdk/provider-utils"); var elevenlabsErrorDataSchema = import_v4.z.object({ error: import_v4.z.object({ message: import_v4.z.string(), code: import_v4.z.number() }) }); var elevenlabsFailedResponseHandler = (0, import_provider_utils.createJsonErrorResponseHandler)({ errorSchema: elevenlabsErrorDataSchema, errorToMessage: (data) => data.error.message }); // src/elevenlabs-transcription-model.ts var elevenLabsTranscriptionModelOptionsSchema = import_v42.z.object({ languageCode: import_v42.z.string().nullish(), tagAudioEvents: import_v42.z.boolean().nullish().default(true), numSpeakers: import_v42.z.number().int().min(1).max(32).nullish(), timestampsGranularity: import_v42.z.enum(["none", "word", "character"]).nullish().default("word"), diarize: import_v42.z.boolean().nullish().default(false), fileFormat: import_v42.z.enum(["pcm_s16le_16", "other"]).nullish().default("other") }); var ElevenLabsTranscriptionModel = class { constructor(modelId, config) { this.modelId = modelId; this.config = config; this.specificationVersion = "v3"; } get provider() { return this.config.provider; } async getArgs({ audio, mediaType, providerOptions }) { var _a, _b, _c, _d, _e; const warnings = []; const elevenlabsOptions = await (0, import_provider_utils2.parseProviderOptions)({ provider: "elevenlabs", providerOptions, schema: elevenLabsTranscriptionModelOptionsSchema }); const formData = new FormData(); const blob = audio instanceof Uint8Array ? new Blob([audio]) : new Blob([(0, import_provider_utils2.convertBase64ToUint8Array)(audio)]); formData.append("model_id", this.modelId); const fileExtension = (0, import_provider_utils2.mediaTypeToExtension)(mediaType); formData.append( "file", new File([blob], "audio", { type: mediaType }), `audio.${fileExtension}` ); formData.append("diarize", "true"); if (elevenlabsOptions) { const transcriptionModelOptions = { language_code: (_a = elevenlabsOptions.languageCode) != null ? _a : void 0, tag_audio_events: (_b = elevenlabsOptions.tagAudioEvents) != null ? _b : void 0, num_speakers: (_c = elevenlabsOptions.numSpeakers) != null ? _c : void 0, timestamps_granularity: (_d = elevenlabsOptions.timestampsGranularity) != null ? _d : void 0, file_format: (_e = elevenlabsOptions.fileFormat) != null ? _e : void 0 }; if (typeof elevenlabsOptions.diarize === "boolean") { formData.append("diarize", String(elevenlabsOptions.diarize)); } for (const key in transcriptionModelOptions) { const value = transcriptionModelOptions[key]; if (value !== void 0) { formData.append(key, String(value)); } } } return { formData, warnings }; } async doGenerate(options) { var _a, _b, _c, _d, _e, _f, _g, _h; const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date(); const { formData, warnings } = await this.getArgs(options); const { value: response, responseHeaders, rawValue: rawResponse } = await (0, import_provider_utils2.postFormDataToApi)({ url: this.config.url({ path: "/v1/speech-to-text", modelId: this.modelId }), headers: (0, import_provider_utils2.combineHeaders)(this.config.headers(), options.headers), formData, failedResponseHandler: elevenlabsFailedResponseHandler, successfulResponseHandler: (0, import_provider_utils2.createJsonResponseHandler)( elevenlabsTranscriptionResponseSchema ), abortSignal: options.abortSignal, fetch: this.config.fetch }); return { text: response.text, segments: (_e = (_d = response.words) == null ? void 0 : _d.map((word) => { var _a2, _b2; return { text: word.text, startSecond: (_a2 = word.start) != null ? _a2 : 0, endSecond: (_b2 = word.end) != null ? _b2 : 0 }; })) != null ? _e : [], language: response.language_code, durationInSeconds: (_h = (_g = (_f = response.words) == null ? void 0 : _f.at(-1)) == null ? void 0 : _g.end) != null ? _h : void 0, warnings, response: { timestamp: currentDate, modelId: this.modelId, headers: responseHeaders, body: rawResponse } }; } }; var elevenlabsTranscriptionResponseSchema = import_v42.z.object({ language_code: import_v42.z.string(), language_probability: import_v42.z.number(), text: import_v42.z.string(), words: import_v42.z.array( import_v42.z.object({ text: import_v42.z.string(), type: import_v42.z.enum(["word", "spacing", "audio_event"]), start: import_v42.z.number().nullish(), end: import_v42.z.number().nullish(), speaker_id: import_v42.z.string().nullish(), characters: import_v42.z.array( import_v42.z.object({ text: import_v42.z.string(), start: import_v42.z.number().nullish(), end: import_v42.z.number().nullish() }) ).nullish() }) ).nullish() }); // src/elevenlabs-speech-model.ts var import_provider_utils3 = require("@ai-sdk/provider-utils"); var import_v43 = require("zod/v4"); var elevenLabsSpeechModelOptionsSchema = import_v43.z.object({ languageCode: import_v43.z.string().optional(), voiceSettings: import_v43.z.object({ stability: import_v43.z.number().min(0).max(1).optional(), similarityBoost: import_v43.z.number().min(0).max(1).optional(), style: import_v43.z.number().min(0).max(1).optional(), useSpeakerBoost: import_v43.z.boolean().optional() }).optional(), pronunciationDictionaryLocators: import_v43.z.array( import_v43.z.object({ pronunciationDictionaryId: import_v43.z.string(), versionId: import_v43.z.string().optional() }) ).max(3).optional(), seed: import_v43.z.number().min(0).max(4294967295).optional(), previousText: import_v43.z.string().optional(), nextText: import_v43.z.string().optional(), previousRequestIds: import_v43.z.array(import_v43.z.string()).max(3).optional(), nextRequestIds: import_v43.z.array(import_v43.z.string()).max(3).optional(), applyTextNormalization: import_v43.z.enum(["auto", "on", "off"]).optional(), applyLanguageTextNormalization: import_v43.z.boolean().optional(), enableLogging: import_v43.z.boolean().optional() }); var ElevenLabsSpeechModel = class { constructor(modelId, config) { this.modelId = modelId; this.config = config; this.specificationVersion = "v3"; } get provider() { return this.config.provider; } async getArgs({ text, voice = "21m00Tcm4TlvDq8ikWAM", outputFormat = "mp3_44100_128", instructions, language, speed, providerOptions }) { const warnings = []; const elevenLabsOptions = await (0, import_provider_utils3.parseProviderOptions)({ provider: "elevenlabs", providerOptions, schema: elevenLabsSpeechModelOptionsSchema }); const requestBody = { text, model_id: this.modelId }; const queryParams = {}; if (outputFormat) { const formatMap = { mp3: "mp3_44100_128", mp3_32: "mp3_44100_32", mp3_64: "mp3_44100_64", mp3_96: "mp3_44100_96", mp3_128: "mp3_44100_128", mp3_192: "mp3_44100_192", pcm: "pcm_44100", pcm_16000: "pcm_16000", pcm_22050: "pcm_22050", pcm_24000: "pcm_24000", pcm_44100: "pcm_44100", ulaw: "ulaw_8000" }; const mappedFormat = formatMap[outputFormat] || outputFormat; queryParams.output_format = mappedFormat; } if (language) { requestBody.language_code = language; } const voiceSettings = {}; if (speed != null) { voiceSettings.speed = speed; } if (elevenLabsOptions) { if (elevenLabsOptions.voiceSettings) { if (elevenLabsOptions.voiceSettings.stability != null) { voiceSettings.stability = elevenLabsOptions.voiceSettings.stability; } if (elevenLabsOptions.voiceSettings.similarityBoost != null) { voiceSettings.similarity_boost = elevenLabsOptions.voiceSettings.similarityBoost; } if (elevenLabsOptions.voiceSettings.style != null) { voiceSettings.style = elevenLabsOptions.voiceSettings.style; } if (elevenLabsOptions.voiceSettings.useSpeakerBoost != null) { voiceSettings.use_speaker_boost = elevenLabsOptions.voiceSettings.useSpeakerBoost; } } if (elevenLabsOptions.languageCode && !requestBody.language_code) { requestBody.language_code = elevenLabsOptions.languageCode; } if (elevenLabsOptions.pronunciationDictionaryLocators) { requestBody.pronunciation_dictionary_locators = elevenLabsOptions.pronunciationDictionaryLocators.map((locator) => ({ pronunciation_dictionary_id: locator.pronunciationDictionaryId, ...locator.versionId && { version_id: locator.versionId } })); } if (elevenLabsOptions.seed != null) { requestBody.seed = elevenLabsOptions.seed; } if (elevenLabsOptions.previousText) { requestBody.previous_text = elevenLabsOptions.previousText; } if (elevenLabsOptions.nextText) { requestBody.next_text = elevenLabsOptions.nextText; } if (elevenLabsOptions.previousRequestIds) { requestBody.previous_request_ids = elevenLabsOptions.previousRequestIds; } if (elevenLabsOptions.nextRequestIds) { requestBody.next_request_ids = elevenLabsOptions.nextRequestIds; } if (elevenLabsOptions.applyTextNormalization) { requestBody.apply_text_normalization = elevenLabsOptions.applyTextNormalization; } if (elevenLabsOptions.applyLanguageTextNormalization != null) { requestBody.apply_language_text_normalization = elevenLabsOptions.applyLanguageTextNormalization; } if (elevenLabsOptions.enableLogging != null) { queryParams.enable_logging = String(elevenLabsOptions.enableLogging); } } if (Object.keys(voiceSettings).length > 0) { requestBody.voice_settings = voiceSettings; } if (instructions) { warnings.push({ type: "unsupported", feature: "instructions", details: `ElevenLabs speech models do not support instructions. Instructions parameter was ignored.` }); } return { requestBody, queryParams, warnings, voiceId: voice }; } async doGenerate(options) { var _a, _b, _c; const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date(); const { requestBody, queryParams, warnings, voiceId } = await this.getArgs(options); const { value: audio, responseHeaders, rawValue: rawResponse } = await (0, import_provider_utils3.postJsonToApi)({ url: (() => { const baseUrl = this.config.url({ path: `/v1/text-to-speech/${voiceId}`, modelId: this.modelId }); const queryString = new URLSearchParams(queryParams).toString(); return queryString ? `${baseUrl}?${queryString}` : baseUrl; })(), headers: (0, import_provider_utils3.combineHeaders)(this.config.headers(), options.headers), body: requestBody, failedResponseHandler: elevenlabsFailedResponseHandler, successfulResponseHandler: (0, import_provider_utils3.createBinaryResponseHandler)(), abortSignal: options.abortSignal, fetch: this.config.fetch }); return { audio, warnings, request: { body: JSON.stringify(requestBody) }, response: { timestamp: currentDate, modelId: this.modelId, headers: responseHeaders, body: rawResponse } }; } }; // src/version.ts var VERSION = true ? "2.0.33" : "0.0.0-test"; // src/elevenlabs-provider.ts function createElevenLabs(options = {}) { const getHeaders = () => (0, import_provider_utils4.withUserAgentSuffix)( { "xi-api-key": (0, import_provider_utils4.loadApiKey)({ apiKey: options.apiKey, environmentVariableName: "ELEVENLABS_API_KEY", description: "ElevenLabs" }), ...options.headers }, `ai-sdk/elevenlabs/${VERSION}` ); const createTranscriptionModel = (modelId) => new ElevenLabsTranscriptionModel(modelId, { provider: `elevenlabs.transcription`, url: ({ path }) => `https://api.elevenlabs.io${path}`, headers: getHeaders, fetch: options.fetch }); const createSpeechModel = (modelId) => new ElevenLabsSpeechModel(modelId, { provider: `elevenlabs.speech`, url: ({ path }) => `https://api.elevenlabs.io${path}`, headers: getHeaders, fetch: options.fetch }); const provider = function(modelId) { return { transcription: createTranscriptionModel(modelId) }; }; provider.specificationVersion = "v3"; provider.transcription = createTranscriptionModel; provider.transcriptionModel = createTranscriptionModel; provider.speech = createSpeechModel; provider.speechModel = createSpeechModel; provider.languageModel = (modelId) => { throw new import_provider.NoSuchModelError({ modelId, modelType: "languageModel", message: "ElevenLabs does not provide language models" }); }; provider.embeddingModel = (modelId) => { throw new import_provider.NoSuchModelError({ modelId, modelType: "embeddingModel", message: "ElevenLabs does not provide embedding models" }); }; provider.textEmbeddingModel = provider.embeddingModel; provider.imageModel = (modelId) => { throw new import_provider.NoSuchModelError({ modelId, modelType: "imageModel", message: "ElevenLabs does not provide image models" }); }; return provider; } var elevenlabs = createElevenLabs(); // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { VERSION, createElevenLabs, elevenlabs }); //# sourceMappingURL=index.js.map