@jackdbd/eleventy-plugin-text-to-speech
Version:
Eleventy plugin that uses text-to-speech to generate audio assets for your website, then injects audio players in your HTML.
154 lines • 5.13 kB
JavaScript
import { Readable } from 'node:stream';
import defDebug from 'debug';
import { z } from 'zod';
import { elevenlabs } from '@jackdbd/zod-schemas';
import { DEBUG_PREFIX } from '../constants.js';
import { mediaType } from '../media-type.js';
import { validatedDataOrThrow } from '../validation.js';
const debug = defDebug(`${DEBUG_PREFIX}:elevenlabs-text-to-speech`);
export const audioExtension = (outputFormat) => {
switch (outputFormat) {
case 'mp3_44100_64':
case 'mp3_44100_96':
case 'mp3_44100_128':
case 'mp3_44100_192':
return 'mp3';
// https://en.wikipedia.org/wiki/Pulse-code_modulation
case 'pcm_16000':
case 'pcm_22050':
case 'pcm_24000':
case 'pcm_44100':
return 'pcm';
// https://en.wikipedia.org/wiki/%CE%9C-law_algorithm
// https://en.wikipedia.org/wiki/FLAC
case 'ulaw_8000':
return 'flac'; // or mulaw?
default:
return 'mp3';
}
};
export const DEFAULT_VOICE_SETTINGS = {
similarity_boost: 0.5,
stability: 0.5,
style: 1,
use_speaker_boost: true
};
export const synthesis_config = z
.object({
/**
* Models supported by the ElevenLabs Text-to-Speech API.
*
* @see https://elevenlabs.io/docs/api-reference/get-models
*/
modelId: elevenlabs.model_id,
outputFormat: elevenlabs.output_format,
/**
* Voices supported by the ElevenLabs Text-to-Speech API.
*
* @see https://elevenlabs.io/docs/api-reference/get-voices
*/
voiceId: elevenlabs.voice_id,
voiceSettings: z.any().default(DEFAULT_VOICE_SETTINGS)
})
.describe('ElevenLabs synthesis config');
export const synthesize_config = z
.object({
/**
* Text to synthesize into speech using the ElevenLabs Text-to-Speech API.
*
* @remarks
* Character limit for the ElevenLabs Text-to-Speech API:
* - 2500 for unsubscribed users
* - 5000 for subscribed users
* @see [elevenlabs.io - Pricing](https://elevenlabs.io/pricing)
*/
text: elevenlabs.text
})
.describe('ElevenLabs synthesize config');
/**
* Synthesizes text into speech using the ElevenLabs Text-to-Speech API.
*
* @public
*/
export const synthesize = async (apiKey, cfg, text) => {
const { modelId, outputFormat, voiceId, voiceSettings } = cfg;
const reqBody = {
model_id: modelId,
output_format: outputFormat,
// pronunciation_dictionary_locators: [
// { pronunciation_dictionary_id: '<string>', version_id: '<string>' }
// ],
text,
voice_settings: voiceSettings
};
const ext = `.${audioExtension(outputFormat)}`;
const reqInit = {
method: 'POST',
headers: {
Accept: mediaType(ext).value,
'Content-Type': 'application/json',
'xi-api-key': apiKey
},
body: JSON.stringify(reqBody)
};
const apiEndpoint = 'https://api.elevenlabs.io/v1/text-to-speech';
let response;
try {
response = await fetch(`${apiEndpoint}/${voiceId}`, reqInit);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
}
catch (err) {
return {
error: new Error(`Could not synthesize text: failed to fetch: ${err.message}`)
};
}
if (!response.body) {
return {
error: new Error(`Could not synthesize text: no body`)
};
}
if (response.status !== 200) {
return {
error: new Error(`Could not synthesize text: ${response.statusText}`)
};
}
const readable = Readable.from(response.body);
readable.on('error', (error) => {
debug(`readable stream errored`);
return { error };
});
readable.on('end', () => {
debug(`readable stream ended, no more data to be consumed from`);
});
readable.on('close', async () => {
debug(`readable stream closed, all underlying resources freed`);
});
return { value: readable };
};
export const auth_options = z.object({
apiKey: elevenlabs.api_key.optional()
});
export const client_config = auth_options.merge(synthesis_config);
/**
* Client for the ElevenLabs Text-to-Speech API.
*/
export const defClient = (config) => {
const data = validatedDataOrThrow(config, client_config);
const { modelId, outputFormat, voiceId } = data;
const apiKey = data.apiKey || process.env.ELEVENLABS_API_KEY || '';
const voiceSettings = data.voiceSettings || DEFAULT_VOICE_SETTINGS;
const extension = audioExtension(data.outputFormat);
debug(`text will be synthesized into [hash].${extension} using model ${modelId}, voice ${voiceId}, voice settings %O`, voiceSettings);
const synthesizeWithApiKeyAndSynthesisConfig = synthesize.bind(null, apiKey, {
modelId,
outputFormat,
voiceId,
voiceSettings
});
return {
config: { modelId, outputFormat, voiceId },
extension,
synthesize: synthesizeWithApiKeyAndSynthesisConfig
};
};
//# sourceMappingURL=elevenlabs-text-to-speech.js.map