@ai-sdk/elevenlabs
Version:
The **[ElevenLabs provider](https://ai-sdk.dev/providers/ai-sdk-providers/elevenlabs)** for the [AI SDK](https://ai-sdk.dev/docs) contains language model support for the ElevenLabs chat and completion APIs and embedding model support for the ElevenLabs em
423 lines (417 loc) • 14 kB
JavaScript
// src/elevenlabs-provider.ts
import {
NoSuchModelError
} from "@ai-sdk/provider";
import {
loadApiKey,
withUserAgentSuffix
} from "@ai-sdk/provider-utils";
// src/elevenlabs-transcription-model.ts
import {
combineHeaders,
convertBase64ToUint8Array,
createJsonResponseHandler,
mediaTypeToExtension,
parseProviderOptions,
postFormDataToApi
} from "@ai-sdk/provider-utils";
import { z as z2 } from "zod/v4";
// src/elevenlabs-error.ts
import { z } from "zod/v4";
import { createJsonErrorResponseHandler } from "@ai-sdk/provider-utils";
var elevenlabsErrorDataSchema = z.object({
error: z.object({
message: z.string(),
code: z.number()
})
});
var elevenlabsFailedResponseHandler = createJsonErrorResponseHandler({
errorSchema: elevenlabsErrorDataSchema,
errorToMessage: (data) => data.error.message
});
// src/elevenlabs-transcription-model.ts
var elevenLabsTranscriptionModelOptionsSchema = z2.object({
languageCode: z2.string().nullish(),
tagAudioEvents: z2.boolean().nullish().default(true),
numSpeakers: z2.number().int().min(1).max(32).nullish(),
timestampsGranularity: z2.enum(["none", "word", "character"]).nullish().default("word"),
diarize: z2.boolean().nullish().default(false),
fileFormat: z2.enum(["pcm_s16le_16", "other"]).nullish().default("other")
});
var ElevenLabsTranscriptionModel = class {
constructor(modelId, config) {
this.modelId = modelId;
this.config = config;
this.specificationVersion = "v3";
}
get provider() {
return this.config.provider;
}
async getArgs({
audio,
mediaType,
providerOptions
}) {
var _a, _b, _c, _d, _e;
const warnings = [];
const elevenlabsOptions = await parseProviderOptions({
provider: "elevenlabs",
providerOptions,
schema: elevenLabsTranscriptionModelOptionsSchema
});
const formData = new FormData();
const blob = audio instanceof Uint8Array ? new Blob([audio]) : new Blob([convertBase64ToUint8Array(audio)]);
formData.append("model_id", this.modelId);
const fileExtension = mediaTypeToExtension(mediaType);
formData.append(
"file",
new File([blob], "audio", { type: mediaType }),
`audio.${fileExtension}`
);
formData.append("diarize", "true");
if (elevenlabsOptions) {
const transcriptionModelOptions = {
language_code: (_a = elevenlabsOptions.languageCode) != null ? _a : void 0,
tag_audio_events: (_b = elevenlabsOptions.tagAudioEvents) != null ? _b : void 0,
num_speakers: (_c = elevenlabsOptions.numSpeakers) != null ? _c : void 0,
timestamps_granularity: (_d = elevenlabsOptions.timestampsGranularity) != null ? _d : void 0,
file_format: (_e = elevenlabsOptions.fileFormat) != null ? _e : void 0
};
if (typeof elevenlabsOptions.diarize === "boolean") {
formData.append("diarize", String(elevenlabsOptions.diarize));
}
for (const key in transcriptionModelOptions) {
const value = transcriptionModelOptions[key];
if (value !== void 0) {
formData.append(key, String(value));
}
}
}
return {
formData,
warnings
};
}
async doGenerate(options) {
var _a, _b, _c, _d, _e, _f, _g, _h;
const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
const { formData, warnings } = await this.getArgs(options);
const {
value: response,
responseHeaders,
rawValue: rawResponse
} = await postFormDataToApi({
url: this.config.url({
path: "/v1/speech-to-text",
modelId: this.modelId
}),
headers: combineHeaders(this.config.headers(), options.headers),
formData,
failedResponseHandler: elevenlabsFailedResponseHandler,
successfulResponseHandler: createJsonResponseHandler(
elevenlabsTranscriptionResponseSchema
),
abortSignal: options.abortSignal,
fetch: this.config.fetch
});
return {
text: response.text,
segments: (_e = (_d = response.words) == null ? void 0 : _d.map((word) => {
var _a2, _b2;
return {
text: word.text,
startSecond: (_a2 = word.start) != null ? _a2 : 0,
endSecond: (_b2 = word.end) != null ? _b2 : 0
};
})) != null ? _e : [],
language: response.language_code,
durationInSeconds: (_h = (_g = (_f = response.words) == null ? void 0 : _f.at(-1)) == null ? void 0 : _g.end) != null ? _h : void 0,
warnings,
response: {
timestamp: currentDate,
modelId: this.modelId,
headers: responseHeaders,
body: rawResponse
}
};
}
};
var elevenlabsTranscriptionResponseSchema = z2.object({
language_code: z2.string(),
language_probability: z2.number(),
text: z2.string(),
words: z2.array(
z2.object({
text: z2.string(),
type: z2.enum(["word", "spacing", "audio_event"]),
start: z2.number().nullish(),
end: z2.number().nullish(),
speaker_id: z2.string().nullish(),
characters: z2.array(
z2.object({
text: z2.string(),
start: z2.number().nullish(),
end: z2.number().nullish()
})
).nullish()
})
).nullish()
});
// src/elevenlabs-speech-model.ts
import {
combineHeaders as combineHeaders2,
createBinaryResponseHandler,
parseProviderOptions as parseProviderOptions2,
postJsonToApi
} from "@ai-sdk/provider-utils";
import { z as z3 } from "zod/v4";
var elevenLabsSpeechModelOptionsSchema = z3.object({
languageCode: z3.string().optional(),
voiceSettings: z3.object({
stability: z3.number().min(0).max(1).optional(),
similarityBoost: z3.number().min(0).max(1).optional(),
style: z3.number().min(0).max(1).optional(),
useSpeakerBoost: z3.boolean().optional()
}).optional(),
pronunciationDictionaryLocators: z3.array(
z3.object({
pronunciationDictionaryId: z3.string(),
versionId: z3.string().optional()
})
).max(3).optional(),
seed: z3.number().min(0).max(4294967295).optional(),
previousText: z3.string().optional(),
nextText: z3.string().optional(),
previousRequestIds: z3.array(z3.string()).max(3).optional(),
nextRequestIds: z3.array(z3.string()).max(3).optional(),
applyTextNormalization: z3.enum(["auto", "on", "off"]).optional(),
applyLanguageTextNormalization: z3.boolean().optional(),
enableLogging: z3.boolean().optional()
});
var ElevenLabsSpeechModel = class {
constructor(modelId, config) {
this.modelId = modelId;
this.config = config;
this.specificationVersion = "v3";
}
get provider() {
return this.config.provider;
}
async getArgs({
text,
voice = "21m00Tcm4TlvDq8ikWAM",
outputFormat = "mp3_44100_128",
instructions,
language,
speed,
providerOptions
}) {
const warnings = [];
const elevenLabsOptions = await parseProviderOptions2({
provider: "elevenlabs",
providerOptions,
schema: elevenLabsSpeechModelOptionsSchema
});
const requestBody = {
text,
model_id: this.modelId
};
const queryParams = {};
if (outputFormat) {
const formatMap = {
mp3: "mp3_44100_128",
mp3_32: "mp3_44100_32",
mp3_64: "mp3_44100_64",
mp3_96: "mp3_44100_96",
mp3_128: "mp3_44100_128",
mp3_192: "mp3_44100_192",
pcm: "pcm_44100",
pcm_16000: "pcm_16000",
pcm_22050: "pcm_22050",
pcm_24000: "pcm_24000",
pcm_44100: "pcm_44100",
ulaw: "ulaw_8000"
};
const mappedFormat = formatMap[outputFormat] || outputFormat;
queryParams.output_format = mappedFormat;
}
if (language) {
requestBody.language_code = language;
}
const voiceSettings = {};
if (speed != null) {
voiceSettings.speed = speed;
}
if (elevenLabsOptions) {
if (elevenLabsOptions.voiceSettings) {
if (elevenLabsOptions.voiceSettings.stability != null) {
voiceSettings.stability = elevenLabsOptions.voiceSettings.stability;
}
if (elevenLabsOptions.voiceSettings.similarityBoost != null) {
voiceSettings.similarity_boost = elevenLabsOptions.voiceSettings.similarityBoost;
}
if (elevenLabsOptions.voiceSettings.style != null) {
voiceSettings.style = elevenLabsOptions.voiceSettings.style;
}
if (elevenLabsOptions.voiceSettings.useSpeakerBoost != null) {
voiceSettings.use_speaker_boost = elevenLabsOptions.voiceSettings.useSpeakerBoost;
}
}
if (elevenLabsOptions.languageCode && !requestBody.language_code) {
requestBody.language_code = elevenLabsOptions.languageCode;
}
if (elevenLabsOptions.pronunciationDictionaryLocators) {
requestBody.pronunciation_dictionary_locators = elevenLabsOptions.pronunciationDictionaryLocators.map((locator) => ({
pronunciation_dictionary_id: locator.pronunciationDictionaryId,
...locator.versionId && { version_id: locator.versionId }
}));
}
if (elevenLabsOptions.seed != null) {
requestBody.seed = elevenLabsOptions.seed;
}
if (elevenLabsOptions.previousText) {
requestBody.previous_text = elevenLabsOptions.previousText;
}
if (elevenLabsOptions.nextText) {
requestBody.next_text = elevenLabsOptions.nextText;
}
if (elevenLabsOptions.previousRequestIds) {
requestBody.previous_request_ids = elevenLabsOptions.previousRequestIds;
}
if (elevenLabsOptions.nextRequestIds) {
requestBody.next_request_ids = elevenLabsOptions.nextRequestIds;
}
if (elevenLabsOptions.applyTextNormalization) {
requestBody.apply_text_normalization = elevenLabsOptions.applyTextNormalization;
}
if (elevenLabsOptions.applyLanguageTextNormalization != null) {
requestBody.apply_language_text_normalization = elevenLabsOptions.applyLanguageTextNormalization;
}
if (elevenLabsOptions.enableLogging != null) {
queryParams.enable_logging = String(elevenLabsOptions.enableLogging);
}
}
if (Object.keys(voiceSettings).length > 0) {
requestBody.voice_settings = voiceSettings;
}
if (instructions) {
warnings.push({
type: "unsupported",
feature: "instructions",
details: `ElevenLabs speech models do not support instructions. Instructions parameter was ignored.`
});
}
return {
requestBody,
queryParams,
warnings,
voiceId: voice
};
}
async doGenerate(options) {
var _a, _b, _c;
const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
const { requestBody, queryParams, warnings, voiceId } = await this.getArgs(options);
const {
value: audio,
responseHeaders,
rawValue: rawResponse
} = await postJsonToApi({
url: (() => {
const baseUrl = this.config.url({
path: `/v1/text-to-speech/${voiceId}`,
modelId: this.modelId
});
const queryString = new URLSearchParams(queryParams).toString();
return queryString ? `${baseUrl}?${queryString}` : baseUrl;
})(),
headers: combineHeaders2(this.config.headers(), options.headers),
body: requestBody,
failedResponseHandler: elevenlabsFailedResponseHandler,
successfulResponseHandler: createBinaryResponseHandler(),
abortSignal: options.abortSignal,
fetch: this.config.fetch
});
return {
audio,
warnings,
request: {
body: JSON.stringify(requestBody)
},
response: {
timestamp: currentDate,
modelId: this.modelId,
headers: responseHeaders,
body: rawResponse
}
};
}
};
// src/version.ts
var VERSION = true ? "2.0.33" : "0.0.0-test";
// src/elevenlabs-provider.ts
function createElevenLabs(options = {}) {
const getHeaders = () => withUserAgentSuffix(
{
"xi-api-key": loadApiKey({
apiKey: options.apiKey,
environmentVariableName: "ELEVENLABS_API_KEY",
description: "ElevenLabs"
}),
...options.headers
},
`ai-sdk/elevenlabs/${VERSION}`
);
const createTranscriptionModel = (modelId) => new ElevenLabsTranscriptionModel(modelId, {
provider: `elevenlabs.transcription`,
url: ({ path }) => `https://api.elevenlabs.io${path}`,
headers: getHeaders,
fetch: options.fetch
});
const createSpeechModel = (modelId) => new ElevenLabsSpeechModel(modelId, {
provider: `elevenlabs.speech`,
url: ({ path }) => `https://api.elevenlabs.io${path}`,
headers: getHeaders,
fetch: options.fetch
});
const provider = function(modelId) {
return {
transcription: createTranscriptionModel(modelId)
};
};
provider.specificationVersion = "v3";
provider.transcription = createTranscriptionModel;
provider.transcriptionModel = createTranscriptionModel;
provider.speech = createSpeechModel;
provider.speechModel = createSpeechModel;
provider.languageModel = (modelId) => {
throw new NoSuchModelError({
modelId,
modelType: "languageModel",
message: "ElevenLabs does not provide language models"
});
};
provider.embeddingModel = (modelId) => {
throw new NoSuchModelError({
modelId,
modelType: "embeddingModel",
message: "ElevenLabs does not provide embedding models"
});
};
provider.textEmbeddingModel = provider.embeddingModel;
provider.imageModel = (modelId) => {
throw new NoSuchModelError({
modelId,
modelType: "imageModel",
message: "ElevenLabs does not provide image models"
});
};
return provider;
}
var elevenlabs = createElevenLabs();
export {
VERSION,
createElevenLabs,
elevenlabs
};
//# sourceMappingURL=index.mjs.map