@ai-sdk/elevenlabs
Version:
The **[ElevenLabs provider](https://ai-sdk.dev/providers/ai-sdk-providers/elevenlabs)** for the [AI SDK](https://ai-sdk.dev/docs) contains language model support for the ElevenLabs chat and completion APIs and embedding model support for the ElevenLabs em
435 lines (427 loc) • 15.8 kB
JavaScript
;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
// src/index.ts
var index_exports = {};
__export(index_exports, {
VERSION: () => VERSION,
createElevenLabs: () => createElevenLabs,
elevenlabs: () => elevenlabs
});
module.exports = __toCommonJS(index_exports);
// src/elevenlabs-provider.ts
var import_provider = require("@ai-sdk/provider");
var import_provider_utils4 = require("@ai-sdk/provider-utils");
// src/elevenlabs-transcription-model.ts
var import_provider_utils2 = require("@ai-sdk/provider-utils");
var import_v42 = require("zod/v4");
// src/elevenlabs-error.ts
var import_v4 = require("zod/v4");
var import_provider_utils = require("@ai-sdk/provider-utils");
var elevenlabsErrorDataSchema = import_v4.z.object({
error: import_v4.z.object({
message: import_v4.z.string(),
code: import_v4.z.number()
})
});
var elevenlabsFailedResponseHandler = (0, import_provider_utils.createJsonErrorResponseHandler)({
errorSchema: elevenlabsErrorDataSchema,
errorToMessage: (data) => data.error.message
});
// src/elevenlabs-transcription-model.ts
var elevenLabsTranscriptionModelOptionsSchema = import_v42.z.object({
languageCode: import_v42.z.string().nullish(),
tagAudioEvents: import_v42.z.boolean().nullish().default(true),
numSpeakers: import_v42.z.number().int().min(1).max(32).nullish(),
timestampsGranularity: import_v42.z.enum(["none", "word", "character"]).nullish().default("word"),
diarize: import_v42.z.boolean().nullish().default(false),
fileFormat: import_v42.z.enum(["pcm_s16le_16", "other"]).nullish().default("other")
});
var ElevenLabsTranscriptionModel = class {
constructor(modelId, config) {
this.modelId = modelId;
this.config = config;
this.specificationVersion = "v3";
}
get provider() {
return this.config.provider;
}
async getArgs({
audio,
mediaType,
providerOptions
}) {
var _a, _b, _c, _d, _e;
const warnings = [];
const elevenlabsOptions = await (0, import_provider_utils2.parseProviderOptions)({
provider: "elevenlabs",
providerOptions,
schema: elevenLabsTranscriptionModelOptionsSchema
});
const formData = new FormData();
const blob = audio instanceof Uint8Array ? new Blob([audio]) : new Blob([(0, import_provider_utils2.convertBase64ToUint8Array)(audio)]);
formData.append("model_id", this.modelId);
const fileExtension = (0, import_provider_utils2.mediaTypeToExtension)(mediaType);
formData.append(
"file",
new File([blob], "audio", { type: mediaType }),
`audio.${fileExtension}`
);
formData.append("diarize", "true");
if (elevenlabsOptions) {
const transcriptionModelOptions = {
language_code: (_a = elevenlabsOptions.languageCode) != null ? _a : void 0,
tag_audio_events: (_b = elevenlabsOptions.tagAudioEvents) != null ? _b : void 0,
num_speakers: (_c = elevenlabsOptions.numSpeakers) != null ? _c : void 0,
timestamps_granularity: (_d = elevenlabsOptions.timestampsGranularity) != null ? _d : void 0,
file_format: (_e = elevenlabsOptions.fileFormat) != null ? _e : void 0
};
if (typeof elevenlabsOptions.diarize === "boolean") {
formData.append("diarize", String(elevenlabsOptions.diarize));
}
for (const key in transcriptionModelOptions) {
const value = transcriptionModelOptions[key];
if (value !== void 0) {
formData.append(key, String(value));
}
}
}
return {
formData,
warnings
};
}
async doGenerate(options) {
var _a, _b, _c, _d, _e, _f, _g, _h;
const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
const { formData, warnings } = await this.getArgs(options);
const {
value: response,
responseHeaders,
rawValue: rawResponse
} = await (0, import_provider_utils2.postFormDataToApi)({
url: this.config.url({
path: "/v1/speech-to-text",
modelId: this.modelId
}),
headers: (0, import_provider_utils2.combineHeaders)(this.config.headers(), options.headers),
formData,
failedResponseHandler: elevenlabsFailedResponseHandler,
successfulResponseHandler: (0, import_provider_utils2.createJsonResponseHandler)(
elevenlabsTranscriptionResponseSchema
),
abortSignal: options.abortSignal,
fetch: this.config.fetch
});
return {
text: response.text,
segments: (_e = (_d = response.words) == null ? void 0 : _d.map((word) => {
var _a2, _b2;
return {
text: word.text,
startSecond: (_a2 = word.start) != null ? _a2 : 0,
endSecond: (_b2 = word.end) != null ? _b2 : 0
};
})) != null ? _e : [],
language: response.language_code,
durationInSeconds: (_h = (_g = (_f = response.words) == null ? void 0 : _f.at(-1)) == null ? void 0 : _g.end) != null ? _h : void 0,
warnings,
response: {
timestamp: currentDate,
modelId: this.modelId,
headers: responseHeaders,
body: rawResponse
}
};
}
};
var elevenlabsTranscriptionResponseSchema = import_v42.z.object({
language_code: import_v42.z.string(),
language_probability: import_v42.z.number(),
text: import_v42.z.string(),
words: import_v42.z.array(
import_v42.z.object({
text: import_v42.z.string(),
type: import_v42.z.enum(["word", "spacing", "audio_event"]),
start: import_v42.z.number().nullish(),
end: import_v42.z.number().nullish(),
speaker_id: import_v42.z.string().nullish(),
characters: import_v42.z.array(
import_v42.z.object({
text: import_v42.z.string(),
start: import_v42.z.number().nullish(),
end: import_v42.z.number().nullish()
})
).nullish()
})
).nullish()
});
// src/elevenlabs-speech-model.ts
var import_provider_utils3 = require("@ai-sdk/provider-utils");
var import_v43 = require("zod/v4");
var elevenLabsSpeechModelOptionsSchema = import_v43.z.object({
languageCode: import_v43.z.string().optional(),
voiceSettings: import_v43.z.object({
stability: import_v43.z.number().min(0).max(1).optional(),
similarityBoost: import_v43.z.number().min(0).max(1).optional(),
style: import_v43.z.number().min(0).max(1).optional(),
useSpeakerBoost: import_v43.z.boolean().optional()
}).optional(),
pronunciationDictionaryLocators: import_v43.z.array(
import_v43.z.object({
pronunciationDictionaryId: import_v43.z.string(),
versionId: import_v43.z.string().optional()
})
).max(3).optional(),
seed: import_v43.z.number().min(0).max(4294967295).optional(),
previousText: import_v43.z.string().optional(),
nextText: import_v43.z.string().optional(),
previousRequestIds: import_v43.z.array(import_v43.z.string()).max(3).optional(),
nextRequestIds: import_v43.z.array(import_v43.z.string()).max(3).optional(),
applyTextNormalization: import_v43.z.enum(["auto", "on", "off"]).optional(),
applyLanguageTextNormalization: import_v43.z.boolean().optional(),
enableLogging: import_v43.z.boolean().optional()
});
var ElevenLabsSpeechModel = class {
constructor(modelId, config) {
this.modelId = modelId;
this.config = config;
this.specificationVersion = "v3";
}
get provider() {
return this.config.provider;
}
async getArgs({
text,
voice = "21m00Tcm4TlvDq8ikWAM",
outputFormat = "mp3_44100_128",
instructions,
language,
speed,
providerOptions
}) {
const warnings = [];
const elevenLabsOptions = await (0, import_provider_utils3.parseProviderOptions)({
provider: "elevenlabs",
providerOptions,
schema: elevenLabsSpeechModelOptionsSchema
});
const requestBody = {
text,
model_id: this.modelId
};
const queryParams = {};
if (outputFormat) {
const formatMap = {
mp3: "mp3_44100_128",
mp3_32: "mp3_44100_32",
mp3_64: "mp3_44100_64",
mp3_96: "mp3_44100_96",
mp3_128: "mp3_44100_128",
mp3_192: "mp3_44100_192",
pcm: "pcm_44100",
pcm_16000: "pcm_16000",
pcm_22050: "pcm_22050",
pcm_24000: "pcm_24000",
pcm_44100: "pcm_44100",
ulaw: "ulaw_8000"
};
const mappedFormat = formatMap[outputFormat] || outputFormat;
queryParams.output_format = mappedFormat;
}
if (language) {
requestBody.language_code = language;
}
const voiceSettings = {};
if (speed != null) {
voiceSettings.speed = speed;
}
if (elevenLabsOptions) {
if (elevenLabsOptions.voiceSettings) {
if (elevenLabsOptions.voiceSettings.stability != null) {
voiceSettings.stability = elevenLabsOptions.voiceSettings.stability;
}
if (elevenLabsOptions.voiceSettings.similarityBoost != null) {
voiceSettings.similarity_boost = elevenLabsOptions.voiceSettings.similarityBoost;
}
if (elevenLabsOptions.voiceSettings.style != null) {
voiceSettings.style = elevenLabsOptions.voiceSettings.style;
}
if (elevenLabsOptions.voiceSettings.useSpeakerBoost != null) {
voiceSettings.use_speaker_boost = elevenLabsOptions.voiceSettings.useSpeakerBoost;
}
}
if (elevenLabsOptions.languageCode && !requestBody.language_code) {
requestBody.language_code = elevenLabsOptions.languageCode;
}
if (elevenLabsOptions.pronunciationDictionaryLocators) {
requestBody.pronunciation_dictionary_locators = elevenLabsOptions.pronunciationDictionaryLocators.map((locator) => ({
pronunciation_dictionary_id: locator.pronunciationDictionaryId,
...locator.versionId && { version_id: locator.versionId }
}));
}
if (elevenLabsOptions.seed != null) {
requestBody.seed = elevenLabsOptions.seed;
}
if (elevenLabsOptions.previousText) {
requestBody.previous_text = elevenLabsOptions.previousText;
}
if (elevenLabsOptions.nextText) {
requestBody.next_text = elevenLabsOptions.nextText;
}
if (elevenLabsOptions.previousRequestIds) {
requestBody.previous_request_ids = elevenLabsOptions.previousRequestIds;
}
if (elevenLabsOptions.nextRequestIds) {
requestBody.next_request_ids = elevenLabsOptions.nextRequestIds;
}
if (elevenLabsOptions.applyTextNormalization) {
requestBody.apply_text_normalization = elevenLabsOptions.applyTextNormalization;
}
if (elevenLabsOptions.applyLanguageTextNormalization != null) {
requestBody.apply_language_text_normalization = elevenLabsOptions.applyLanguageTextNormalization;
}
if (elevenLabsOptions.enableLogging != null) {
queryParams.enable_logging = String(elevenLabsOptions.enableLogging);
}
}
if (Object.keys(voiceSettings).length > 0) {
requestBody.voice_settings = voiceSettings;
}
if (instructions) {
warnings.push({
type: "unsupported",
feature: "instructions",
details: `ElevenLabs speech models do not support instructions. Instructions parameter was ignored.`
});
}
return {
requestBody,
queryParams,
warnings,
voiceId: voice
};
}
async doGenerate(options) {
var _a, _b, _c;
const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
const { requestBody, queryParams, warnings, voiceId } = await this.getArgs(options);
const {
value: audio,
responseHeaders,
rawValue: rawResponse
} = await (0, import_provider_utils3.postJsonToApi)({
url: (() => {
const baseUrl = this.config.url({
path: `/v1/text-to-speech/${voiceId}`,
modelId: this.modelId
});
const queryString = new URLSearchParams(queryParams).toString();
return queryString ? `${baseUrl}?${queryString}` : baseUrl;
})(),
headers: (0, import_provider_utils3.combineHeaders)(this.config.headers(), options.headers),
body: requestBody,
failedResponseHandler: elevenlabsFailedResponseHandler,
successfulResponseHandler: (0, import_provider_utils3.createBinaryResponseHandler)(),
abortSignal: options.abortSignal,
fetch: this.config.fetch
});
return {
audio,
warnings,
request: {
body: JSON.stringify(requestBody)
},
response: {
timestamp: currentDate,
modelId: this.modelId,
headers: responseHeaders,
body: rawResponse
}
};
}
};
// src/version.ts
var VERSION = true ? "2.0.33" : "0.0.0-test";
// src/elevenlabs-provider.ts
function createElevenLabs(options = {}) {
const getHeaders = () => (0, import_provider_utils4.withUserAgentSuffix)(
{
"xi-api-key": (0, import_provider_utils4.loadApiKey)({
apiKey: options.apiKey,
environmentVariableName: "ELEVENLABS_API_KEY",
description: "ElevenLabs"
}),
...options.headers
},
`ai-sdk/elevenlabs/${VERSION}`
);
const createTranscriptionModel = (modelId) => new ElevenLabsTranscriptionModel(modelId, {
provider: `elevenlabs.transcription`,
url: ({ path }) => `https://api.elevenlabs.io${path}`,
headers: getHeaders,
fetch: options.fetch
});
const createSpeechModel = (modelId) => new ElevenLabsSpeechModel(modelId, {
provider: `elevenlabs.speech`,
url: ({ path }) => `https://api.elevenlabs.io${path}`,
headers: getHeaders,
fetch: options.fetch
});
const provider = function(modelId) {
return {
transcription: createTranscriptionModel(modelId)
};
};
provider.specificationVersion = "v3";
provider.transcription = createTranscriptionModel;
provider.transcriptionModel = createTranscriptionModel;
provider.speech = createSpeechModel;
provider.speechModel = createSpeechModel;
provider.languageModel = (modelId) => {
throw new import_provider.NoSuchModelError({
modelId,
modelType: "languageModel",
message: "ElevenLabs does not provide language models"
});
};
provider.embeddingModel = (modelId) => {
throw new import_provider.NoSuchModelError({
modelId,
modelType: "embeddingModel",
message: "ElevenLabs does not provide embedding models"
});
};
provider.textEmbeddingModel = provider.embeddingModel;
provider.imageModel = (modelId) => {
throw new import_provider.NoSuchModelError({
modelId,
modelType: "imageModel",
message: "ElevenLabs does not provide image models"
});
};
return provider;
}
var elevenlabs = createElevenLabs();
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
VERSION,
createElevenLabs,
elevenlabs
});
//# sourceMappingURL=index.js.map