js-tts-wrapper
Version:
A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services
1,392 lines (1,380 loc) • 1.72 MB
JavaScript
(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) :
typeof define === 'function' && define.amd ? define(['exports'], factory) :
(global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global.JSTTSWrapper = {}));
})(this, (function (exports) { 'use strict';
function _mergeNamespaces(n, m) {
m.forEach(function (e) {
e && typeof e !== 'string' && !Array.isArray(e) && Object.keys(e).forEach(function (k) {
if (k !== 'default' && !(k in n)) {
var d = Object.getOwnPropertyDescriptor(e, k);
Object.defineProperty(n, k, d.get ? d : {
enumerable: true,
get: function () { return e[k]; }
});
}
});
});
return Object.freeze(n);
}
/**
* Environment detection and cross-platform utilities
*/
/**
* Check if code is running in a browser environment
*/
const isBrowser$1 = typeof window !== "undefined";
/**
* Check if code is running in a Node.js environment
*/
const isNode = !isBrowser$1 &&
typeof process !== "undefined" &&
typeof process.versions !== "undefined" &&
typeof process.versions.node !== "undefined";
/**
* File system utilities that work in both environments
*/
const fileSystem = {
/**
* Read a file asynchronously
* @param path Path to the file
* @returns Promise resolving to the file contents as a string
*/
readFile: async (path) => {
if (isNode) {
// Node.js implementation
const fs = await new Function("m", "return import(m)")("node:fs/promises");
return fs.readFile(path, "utf-8");
}
// Browser implementation - fetch from URL
const response = await fetch(path);
if (!response.ok) {
throw new Error(`Failed to fetch ${path}: ${response.status} ${response.statusText}`);
}
return response.text();
},
/**
* Read a file synchronously
* @param path Path to the file
* @returns File contents as a string
*/
readFileSync: (path) => {
if (isNode) {
// Node.js implementation
// eslint-disable-next-line @typescript-eslint/no-var-requires
const fs = new Function("n", "return require(n)")("node" + ":fs");
return fs.readFileSync(path, "utf-8");
}
throw new Error("Synchronous file reading is not supported in browsers");
},
/**
* Write a file asynchronously
* @param path Path to the file
* @param data Data to write
* @returns Promise resolving when the file is written
*/
writeFile: async (path, data) => {
if (isNode) {
// Node.js implementation
const fs = await new Function("m", "return import(m)")("node:fs/promises");
return fs.writeFile(path, data);
}
// Browser implementation - download file
const blobData = typeof data === "string" ? data : Uint8Array.from(data);
const blob = new Blob([blobData], { type: "application/octet-stream" });
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
a.download = path.split("/").pop() || "download";
document.body.appendChild(a);
a.click();
setTimeout(() => {
if (document === null || document === void 0 ? void 0 : document.body) {
document.body.removeChild(a);
}
URL.revokeObjectURL(url);
}, 100);
},
/**
* Write a file synchronously
* @param path Path to the file
* @param data Data to write
*/
writeFileSync: (path, data) => {
if (isNode) {
// Node.js implementation
// eslint-disable-next-line @typescript-eslint/no-var-requires
const fs = new Function("n", "return require(n)")("node" + ":fs");
fs.writeFileSync(path, data);
}
else {
throw new Error("Synchronous file writing is not supported in browsers");
}
},
/**
* Check if a file exists asynchronously
* @param path Path to the file
* @returns Promise resolving to true if the file exists, false otherwise
*/
exists: async (path) => {
if (isNode) {
// Node.js implementation
const fs = await new Function("m", "return import(m)")("node:fs/promises");
try {
await fs.access(path);
return true;
}
catch (_a) {
return false;
}
}
else {
// Browser implementation - try to fetch
try {
const response = await fetch(path, { method: "HEAD" });
return response.ok;
}
catch (_b) {
return false;
}
}
},
/**
* Check if a file exists synchronously
* @param path Path to the file
* @returns True if the file exists, false otherwise
*/
existsSync: (path) => {
if (isNode) {
// Node.js implementation
// eslint-disable-next-line @typescript-eslint/no-var-requires
const fs = new Function("n", "return require(n)")("node" + ":fs");
return fs.existsSync(path);
}
throw new Error("Synchronous file existence check is not supported in browsers");
},
};
/**
* Path utilities that work in both environments
*/
const pathUtils = {
/**
* Join path segments
* @param paths Path segments to join
* @returns Joined path
*/
join: (...paths) => {
if (isNode) {
// Node.js implementation
// eslint-disable-next-line @typescript-eslint/no-var-requires
const path = new Function("n", "return require(n)")("node" + ":path");
return path.join(...paths);
}
// Browser implementation
return paths.join("/").replace(/\/+/g, "/");
},
/**
* Get the directory name of a path
* @param path Path
* @returns Directory name
*/
dirname: (path) => {
if (isNode) {
// Node.js implementation
// eslint-disable-next-line @typescript-eslint/no-var-requires
const nodePath = new Function("n", "return require(n)")("node" + ":path");
return nodePath.dirname(path);
}
// Browser implementation
return path.split("/").slice(0, -1).join("/") || ".";
},
/**
* Get the base name of a path
* @param path Path
* @returns Base name
*/
basename: (path) => {
if (isNode) {
// Node.js implementation
// eslint-disable-next-line @typescript-eslint/no-var-requires
const nodePath = new Function("n", "return require(n)")("node" + ":path");
return nodePath.basename(path);
}
// Browser implementation
return path.split("/").pop() || "";
},
/**
* Get the extension of a path
* @param path Path
* @returns Extension
*/
extname: (path) => {
if (isNode) {
// Node.js implementation
// eslint-disable-next-line @typescript-eslint/no-var-requires
const nodePath = new Function("n", "return require(n)")("node" + ":path");
return nodePath.extname(path);
}
// Browser implementation
const basename = path.split("/").pop() || "";
const dotIndex = basename.lastIndexOf(".");
return dotIndex === -1 ? "" : basename.slice(dotIndex);
},
};
const runtimeConfig$1 = {};
const TRUE_PATTERN = /^(1|true|yes|on)$/i;
const FALSE_PATTERN = /^(0|false|no|off)$/i;
function parseBooleanFlag(value) {
if (value === undefined || value === null)
return undefined;
const text = String(value).trim();
if (!text)
return undefined;
if (TRUE_PATTERN.test(text))
return true;
if (FALSE_PATTERN.test(text))
return false;
return undefined;
}
function getEnvEnabledOverride() {
var _a;
if (!isNode)
return undefined;
try {
const env = (_a = process === null || process === void 0 ? void 0 : process.env) !== null && _a !== void 0 ? _a : {};
const disableFlag = parseBooleanFlag(env.SPEECHMARKDOWN_DISABLE);
if (disableFlag === true) {
return false;
}
if (disableFlag === false) {
return true;
}
const enableFlag = parseBooleanFlag(env.SPEECHMARKDOWN_ENABLE);
if (enableFlag !== undefined) {
return enableFlag;
}
}
catch (_b) {
// Ignore env parsing errors and fall back to defaults
}
return undefined;
}
function isSpeechMarkdownEnabled$1() {
if (typeof runtimeConfig$1.enabled === "boolean") {
return runtimeConfig$1.enabled;
}
const envOverride = getEnvEnabledOverride();
if (typeof envOverride === "boolean") {
return envOverride;
}
// Default: enabled everywhere (Node + browser)
return true;
}
/**
* Speech Markdown converter using the official speechmarkdown-js library
*
* This module provides functions to convert Speech Markdown to SSML
* using the speechmarkdown-js library (https://github.com/speechmarkdown/speechmarkdown-js)
*/
// Dynamic import for speechmarkdown-js
let SpeechMarkdown$1 = null;
let speechMarkdownLoaded = false;
async function loadSpeechMarkdown() {
var _a, _b, _c;
if (speechMarkdownLoaded)
return SpeechMarkdown$1;
try {
if (!isSpeechMarkdownEnabled$1()) {
console.warn("speechmarkdown-js disabled (set SPEECHMARKDOWN_DISABLE=false or configureSpeechMarkdown({ enabled: true }) to re-enable). Using built-in fallback.");
return null;
}
let module = null;
if (isNode) {
try {
const requireFn = typeof require !== "undefined" ? require : undefined;
if (requireFn) {
module = requireFn("speechmarkdown-js");
}
}
catch (_d) {
// Fallback to dynamic import below
}
}
if (!module) {
try {
module = await Promise.resolve().then(function () { return index$4; });
}
catch (_e) {
// Dynamic import failed
}
}
// Prefer named export, but tolerate default exports
SpeechMarkdown$1 = (_c = (_a = module === null || module === void 0 ? void 0 : module.SpeechMarkdown) !== null && _a !== void 0 ? _a : (_b = module === null || module === void 0 ? void 0 : module.default) === null || _b === void 0 ? void 0 : _b.SpeechMarkdown) !== null && _c !== void 0 ? _c : module === null || module === void 0 ? void 0 : module.default;
if (!SpeechMarkdown$1) {
throw new Error("speechmarkdown-js module did not expose SpeechMarkdown class");
}
speechMarkdownLoaded = true;
return SpeechMarkdown$1;
}
catch (_error) {
console.warn("speechmarkdown-js not available. Using built-in fallback. To enable full Speech Markdown in browsers, add 'speechmarkdown-js' to your app and it will be loaded at runtime.");
return null;
}
}
// Lightweight fallback converter for a minimal subset used in tests
function convertSpeechMarkdownFallback$1(markdown) {
let out = markdown;
// [break:"500ms"] -> <break time="500ms"/>
out = out.replace(/\[break:"([^"]+)"\]/g, '<break time="$1"/>');
// [500ms] or [500s] -> <break time="500ms"/>
out = out.replace(/\[(\d+)m?s\]/g, '<break time="$1ms"/>');
// ++text++ -> <emphasis level="strong">text</emphasis>
out = out.replace(/\+\+([\s\S]+?)\+\+/g, '<emphasis level="strong">$1</emphasis>');
// (text)[rate:'x-slow'] or (text)[rate:"x-slow"] -> prosody rate
out = out.replace(/\(([\s\S]+?)\)\[rate:['"]([^'"]+)['"]\]/g, '<prosody rate="$2">$1</prosody>');
// (text)[pitch:'high'] or (text)[pitch:"high"] -> prosody pitch
out = out.replace(/\(([\s\S]+?)\)\[pitch:['"]([^'"]+)['"]\]/g, '<prosody pitch="$2">$1</prosody>');
// (text)[volume:'loud'] or (text)[volume:"loud"] -> prosody volume
out = out.replace(/\(([\s\S]+?)\)\[volume:['"]([^'"]+)['"]\]/g, '<prosody volume="$2">$1</prosody>');
return out;
}
/**
* SpeechMarkdownConverter class for converting Speech Markdown to SSML
*/
let SpeechMarkdownConverter$1 = class SpeechMarkdownConverter {
constructor() {
this.speechMarkdownInstance = null;
}
async ensureInitialized() {
if (!isSpeechMarkdownEnabled$1()) {
this.speechMarkdownInstance = null;
return null;
}
if (!this.speechMarkdownInstance) {
const SpeechMarkdownClass = await loadSpeechMarkdown();
if (SpeechMarkdownClass) {
this.speechMarkdownInstance = new SpeechMarkdownClass();
}
}
return this.speechMarkdownInstance;
}
/**
* Convert Speech Markdown to SSML
*
* @param markdown Speech Markdown text
* @param platform Target platform (amazon-alexa, google-assistant, microsoft-azure, etc.)
* @returns SSML text
*/
async toSSML(markdown, platform = "amazon-alexa") {
if (!isSpeechMarkdownEnabled$1()) {
this.speechMarkdownInstance = null;
const converted = convertSpeechMarkdownFallback$1(markdown);
return `<speak>${converted}</speak>`;
}
// Attempt to initialize the full converter (no-op if disabled/unavailable)
await this.ensureInitialized();
if (this.speechMarkdownInstance) {
return this.speechMarkdownInstance.toSSML(markdown, { platform });
}
// Fallback: minimal conversion
const converted = convertSpeechMarkdownFallback$1(markdown);
return `<speak>${converted}</speak>`;
}
/**
* Check if text is Speech Markdown
*
* @param text Text to check
* @returns True if the text contains Speech Markdown syntax
*/
isSpeechMarkdown(text) {
return isSpeechMarkdown$1(text);
}
/**
* Get the available platforms supported by the Speech Markdown library
*
* @returns Array of platform names
*/
getAvailablePlatforms() {
return getAvailablePlatforms$1();
}
};
// Create a default converter instance
const defaultConverter$1 = new SpeechMarkdownConverter$1();
/**
* Convert Speech Markdown to SSML
*
* This function uses the speechmarkdown-js library to convert Speech Markdown syntax to SSML.
* The library supports various Speech Markdown features including:
* - Breaks: [500ms] or [break:"500ms"]
* - Emphasis: ++emphasized++ or +emphasized+
* - Rate, pitch, volume: (text)[rate:"slow"], (text)[pitch:"high"], (text)[volume:"loud"]
* - And many more (see the speechmarkdown-js documentation)
*
* @param markdown Speech Markdown text
* @param platform Target platform (amazon-alexa, google-assistant, microsoft-azure, etc.)
* @returns SSML text
*/
async function toSSML$1(markdown, platform = "amazon-alexa") {
return await defaultConverter$1.toSSML(markdown, platform);
}
/**
* Check if text is Speech Markdown
*
* This function checks if the text contains Speech Markdown syntax patterns.
* It uses regular expressions to detect common Speech Markdown patterns such as:
* - Breaks: [500ms] or [break:"500ms"]
* - Emphasis: ++text++ or +text+
* - Rate, pitch, volume: (text)[rate:"slow"], (text)[pitch:"high"], (text)[volume:"loud"]
*
* @param text Text to check
* @returns True if the text contains Speech Markdown syntax
*/
function isSpeechMarkdown$1(text) {
// Use a simple heuristic to check for common Speech Markdown patterns
// This is a simplified version as the library doesn't provide a direct way to check
const patterns = [
/\[\d+m?s\]/, // Breaks: [500ms]
/\[break:"[^"\]]+"\]/, // Breaks with quotes: [break:"weak"] or [break:"500ms"]
/\+\+.*?\+\+/, // Strong emphasis: ++text++
/\+.*?\+/, // Moderate emphasis: +text+
/~.*?~/, // No emphasis: ~text~
/-.*?-/, // Reduced emphasis: -text-
/\(.*?\)\[emphasis(:"(strong|moderate|reduced|none)")?\]/, // Standard emphasis: (text)[emphasis:"strong"]
/\(.*?\)\[rate:"(x-slow|slow|medium|fast|x-fast)"\]/, // Rate: (text)[rate:"slow"]
/\(.*?\)\[pitch:"(x-low|low|medium|high|x-high)"\]/, // Pitch: (text)[pitch:"high"]
/\(.*?\)\[volume:"(silent|x-soft|soft|medium|loud|x-loud)"\]/, // Volume: (text)[volume:"loud"]
/\(.*?\)\[voice:".*?"\]/, // Voice: (text)[voice:"Brian"]
/\(.*?\)\[lang:".*?"\]/, // Language: (text)[lang:"en-US"]
/\(.*?\)\[\w+:"?.*?"?\]/, // Any other Speech Markdown modifier: (text)[modifier:"value"]
];
return patterns.some((pattern) => pattern.test(text));
}
/**
* Get the available platforms supported by the Speech Markdown library
*
* This function returns the list of platforms supported by the speechmarkdown-js library.
* These platforms have different SSML dialects, and the library will generate
* SSML appropriate for the specified platform.
*
* @returns Array of platform names (amazon-alexa, google-assistant, microsoft-azure)
*/
function getAvailablePlatforms$1() {
// The library doesn't expose a direct way to get platforms, so we hardcode them
// These are the platforms supported by speechmarkdown-js as of version 1.x
return ["amazon-alexa", "google-assistant", "microsoft-azure"];
}
/**
* SSML Builder class for creating SSML markup
*/
class SSMLBuilder {
constructor() {
this.ssml = "";
}
/**
* Add text or SSML to the builder
* @param text Text or SSML to add
* @returns The SSML string
*/
add(text) {
// If text doesn't start with <speak>, wrap it
if (text.trim().startsWith("<speak")) {
this.ssml = text;
}
else {
this.ssml = `<speak>${text}</speak>`;
}
return this.ssml;
}
/**
* Add a break to the SSML
* @param time Break duration (e.g., '500ms')
* @returns The SSML builder instance
*/
addBreak(time = "500ms") {
this.ssml = this.ssml.replace("</speak>", `<break time="${time}"/></speak>`);
return this;
}
/**
* Add prosody element to the SSML
* @param text Text to wrap with prosody
* @param rate Speech rate
* @param pitch Speech pitch
* @param volume Speech volume
* @returns The SSML builder instance
*/
addProsody(text, rate, pitch, volume) {
let prosodyAttrs = "";
if (rate)
prosodyAttrs += ` rate="${rate}"`;
if (pitch)
prosodyAttrs += ` pitch="${pitch}"`;
if (volume)
prosodyAttrs += ` volume="${volume}"`;
const prosodyElement = `<prosody${prosodyAttrs}>${text}</prosody>`;
if (this.ssml.includes("<speak>")) {
this.ssml = this.ssml.replace("<speak>", `<speak>${prosodyElement}`);
}
else {
this.ssml = `<speak>${prosodyElement}</speak>`;
}
return this;
}
/**
* Wrap text with speak tags
* @param text Text to wrap
* @returns SSML string with speak tags
*/
wrapWithSpeak(text) {
if (!text.trim().startsWith("<speak")) {
return `<speak>${text}</speak>`;
}
return text;
}
/**
* Clear the SSML content
*/
clearSSML() {
this.ssml = "";
}
/**
* Get the current SSML string
* @returns The current SSML string
*/
toString() {
return this.ssml;
}
}
/**
* Reads a ReadableStream<Uint8Array> (Web) or NodeJS.ReadableStream completely
* and returns its contents as a single Buffer (in Node.js) or Uint8Array (in Browser).
* @param stream The stream to read.
* @returns A promise that resolves with the stream contents.
*/
async function streamToBuffer(stream // Use imported Readable type
) {
const chunks = []; // Use a union type for chunks array
let totalLength = 0;
// Check if it's a Web ReadableStream (has getReader)
if ("getReader" in stream && typeof stream.getReader === "function") {
const reader = stream.getReader();
try {
while (true) {
const { done, value } = await reader.read();
if (done) {
break;
}
if (value) {
// value is Uint8Array from Web Stream
chunks.push(value); // Store as Uint8Array initially
totalLength += value.length;
}
}
}
finally {
reader.releaseLock();
}
// Concatenate AFTER the loop for Web Streams
if (isNode) {
// Use isNode constant
// Convert Uint8Array chunks to Buffer before concatenating in Node
const bufferChunks = chunks.map((chunk) => Buffer.from(chunk));
return Buffer.concat(bufferChunks, totalLength);
}
// Browser environment: Concatenate Uint8Array chunks
const result = new Uint8Array(totalLength);
let offset = 0;
for (const chunk of chunks) {
result.set(chunk, offset);
offset += chunk.length;
}
return result;
}
if (typeof stream.on === "function") {
// Use type assertion
// Assume it's a Node.js Readable stream
return new Promise((resolve, reject) => {
// Explicitly assert stream type for event listeners
const nodeStream = stream;
nodeStream.on("data", (chunk) => {
const bufferChunk = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
chunks.push(bufferChunk);
totalLength += bufferChunk.length;
});
nodeStream.on("end", () => {
// Concatenate collected Buffer chunks
resolve(Buffer.concat(chunks, totalLength));
});
nodeStream.on("error", (err) => {
// Type the error parameter
reject(err);
});
});
}
// Handle unexpected stream type if it's neither Web nor Node stream
throw new Error("Unsupported stream type provided to streamToBuffer");
}
/**
* Utility functions for handling different audio input sources
*/
/**
* Validates that only one input source is provided
*/
function validateSpeakInput(input) {
const inputCount = [input.text, input.filename, input.audioBytes, input.audioStream].filter(Boolean).length;
if (inputCount === 0) {
throw new Error("No input provided. Please provide text, filename, audioBytes, or audioStream.");
}
if (inputCount > 1) {
throw new Error("Multiple input sources provided. Please provide only one of: text, filename, audioBytes, or audioStream.");
}
}
/**
* Determines the audio format from a filename extension
*/
function getAudioFormatFromFilename(filename) {
const extension = filename.toLowerCase().split(".").pop();
switch (extension) {
case "mp3":
return "audio/mpeg";
case "wav":
return "audio/wav";
case "ogg":
return "audio/ogg";
case "opus":
return "audio/opus";
case "aac":
return "audio/aac";
case "flac":
return "audio/flac";
default:
return "audio/wav"; // Default fallback
}
}
/**
* Attempts to detect audio format from byte signature
*/
function detectAudioFormat(audioBytes) {
if (audioBytes.length < 4) {
return "audio/wav"; // Default fallback
}
// Check for common audio file signatures
const header = Array.from(audioBytes.slice(0, 12));
// MP3 - ID3 tag or MPEG frame sync
if ((header[0] === 0x49 && header[1] === 0x44 && header[2] === 0x33) || // ID3
(header[0] === 0xff && (header[1] & 0xe0) === 0xe0)) {
// MPEG frame sync
return "audio/mpeg";
}
// WAV - RIFF header
if (header[0] === 0x52 &&
header[1] === 0x49 &&
header[2] === 0x46 &&
header[3] === 0x46 &&
header[8] === 0x57 &&
header[9] === 0x41 &&
header[10] === 0x56 &&
header[11] === 0x45) {
return "audio/wav";
}
// OGG
if (header[0] === 0x4f && header[1] === 0x67 && header[2] === 0x67 && header[3] === 0x53) {
return "audio/ogg";
}
// FLAC
if (header[0] === 0x66 && header[1] === 0x4c && header[2] === 0x61 && header[3] === 0x43) {
return "audio/flac";
}
return "audio/wav"; // Default fallback
}
/**
* Reads an audio file and returns its contents as Uint8Array
* Only works in Node.js environment
*/
async function readAudioFile(filename) {
if (!isNode) {
throw new Error("File reading is only supported in Node.js environment");
}
try {
const fs = await new Function("m", "return import(m)")("node:fs/promises");
const buffer = await fs.readFile(filename);
return new Uint8Array(buffer);
}
catch (error) {
throw new Error(`Failed to read audio file "${filename}": ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Converts an audio stream to bytes
*/
async function streamToBytes(stream) {
const result = await streamToBuffer(stream);
// Convert Buffer to Uint8Array if needed (Node.js)
if (result instanceof Buffer) {
return new Uint8Array(result);
}
return result;
}
/**
* Processes the input and returns audio bytes with format information
*/
async function processAudioInput(input) {
validateSpeakInput(input);
if (input.audioBytes) {
return {
audioBytes: input.audioBytes,
mimeType: detectAudioFormat(input.audioBytes),
};
}
if (input.audioStream) {
const audioBytes = await streamToBytes(input.audioStream);
return {
audioBytes,
mimeType: detectAudioFormat(audioBytes),
};
}
if (input.filename) {
const audioBytes = await readAudioFile(input.filename);
return {
audioBytes,
mimeType: getAudioFormatFromFilename(input.filename),
};
}
throw new Error("No valid audio input provided");
}
var audioInput = /*#__PURE__*/Object.freeze({
__proto__: null,
detectAudioFormat: detectAudioFormat,
getAudioFormatFromFilename: getAudioFormatFromFilename,
processAudioInput: processAudioInput,
readAudioFile: readAudioFile,
streamToBytes: streamToBytes,
validateSpeakInput: validateSpeakInput
});
/**
* Filter voices by language code
* @param voices Array of voices to filter
* @param languageCode BCP-47 language code to filter by
* @returns Filtered array of voices
*/
function filterByLanguage(voices, languageCode) {
return voices.filter((voice) => voice.languageCodes.some((lang) => lang.bcp47.toLowerCase() === languageCode.toLowerCase()));
}
/**
* Filter voices by gender
* @param voices Array of voices to filter
* @param gender Gender to filter by
* @returns Filtered array of voices
*/
function filterByGender(voices, gender) {
return voices.filter((voice) => voice.gender === gender);
}
/**
* Filter voices by provider
* @param voices Array of voices to filter
* @param provider Provider to filter by
* @returns Filtered array of voices
*/
function filterByProvider(voices, provider) {
return voices.filter((voice) => voice.provider === provider);
}
/**
* Find a voice by ID
* @param voices Array of voices to search
* @param id Voice ID to find
* @returns The found voice or undefined
*/
function findById(voices, id) {
return voices.find((voice) => voice.id === id);
}
/**
* Get all available languages from a list of voices
* @param voices Array of voices
* @returns Array of unique language codes
*/
function getAvailableLanguages(voices) {
// Use a Set to collect unique language codes
const languages = new Set();
// Iterate through all voices and their language codes
for (const voice of voices) {
for (const lang of voice.languageCodes) {
languages.add(lang.bcp47);
}
}
// Convert Set to Array and return
return Array.from(languages);
}
var voiceUtils = /*#__PURE__*/Object.freeze({
__proto__: null,
filterByGender: filterByGender,
filterByLanguage: filterByLanguage,
filterByProvider: filterByProvider,
findById: findById,
getAvailableLanguages: getAvailableLanguages
});
/**
* Language utilities for normalizing language codes across different formats
*/
/**
* Language normalization utilities
*/
class LanguageNormalizer {
/**
* Normalize a language code to standard formats
* @param langCode Input language code (can be ISO639-1/2/3, BCP47, or locale)
* @param countryCode Optional country code to help with regionalization
* @returns StandardizedLanguage object containing normalized codes
*/
static normalize(langCode, countryCode) {
try {
// Handle MMS prefix if present
if (langCode.startsWith("mms_")) {
langCode = langCode.substring(4);
}
// Parse the language code
let language;
let region;
// Check if it's a BCP-47 code with region (e.g., en-US)
if (langCode.includes("-")) {
const parts = langCode.split("-");
language = parts[0].toLowerCase();
region = parts[1].toUpperCase();
}
else {
language = langCode.toLowerCase();
region = countryCode === null || countryCode === void 0 ? void 0 : countryCode.toUpperCase();
}
// Convert to ISO 639-3
const iso639_3 = LanguageNormalizer.iso1To3[language] || language;
// Create BCP-47 tag
const bcp47 = region ? `${language}-${region}` : language;
// Create display name
let display = LanguageNormalizer.languageNames[language] || language;
if (region && LanguageNormalizer.regionNames[region]) {
display += ` (${LanguageNormalizer.regionNames[region]})`;
}
else if (region) {
display += ` (${region})`;
}
return {
iso639_3,
bcp47,
display,
countryCode: region,
};
}
catch (_error) {
// Fallback for unknown codes
return {
iso639_3: "und",
bcp47: "und",
display: "Unknown",
};
}
}
/**
* Get the display name for a language code
* @param langCode Language code
* @returns Display name
*/
static getDisplayName(langCode) {
return LanguageNormalizer.normalize(langCode).display;
}
/**
* Get the ISO 639-3 code for a language code
* @param langCode Language code
* @returns ISO 639-3 code
*/
static getISO639_3(langCode) {
return LanguageNormalizer.normalize(langCode).iso639_3;
}
/**
* Get the BCP-47 tag for a language code
* @param langCode Language code
* @param countryCode Optional country code
* @returns BCP-47 tag
*/
static getBCP47(langCode, countryCode) {
return LanguageNormalizer.normalize(langCode, countryCode).bcp47;
}
}
/**
* Common language display names
*/
LanguageNormalizer.languageNames = {
en: "English",
fr: "French",
es: "Spanish",
de: "German",
it: "Italian",
ja: "Japanese",
ko: "Korean",
zh: "Chinese",
ru: "Russian",
pt: "Portuguese",
ar: "Arabic",
hi: "Hindi",
nl: "Dutch",
sv: "Swedish",
fi: "Finnish",
no: "Norwegian",
da: "Danish",
pl: "Polish",
tr: "Turkish",
cs: "Czech",
hu: "Hungarian",
el: "Greek",
he: "Hebrew",
th: "Thai",
vi: "Vietnamese",
id: "Indonesian",
ms: "Malay",
ro: "Romanian",
sk: "Slovak",
uk: "Ukrainian",
bg: "Bulgarian",
hr: "Croatian",
lt: "Lithuanian",
lv: "Latvian",
et: "Estonian",
sl: "Slovenian",
sr: "Serbian",
};
/**
* Common region display names
*/
LanguageNormalizer.regionNames = {
US: "United States",
GB: "United Kingdom",
AU: "Australia",
CA: "Canada",
IN: "India",
IE: "Ireland",
ZA: "South Africa",
NZ: "New Zealand",
FR: "France",
DE: "Germany",
IT: "Italy",
ES: "Spain",
MX: "Mexico",
JP: "Japan",
KR: "Korea",
CN: "China",
TW: "Taiwan",
HK: "Hong Kong",
BR: "Brazil",
PT: "Portugal",
RU: "Russia",
};
/**
* ISO 639-1 to ISO 639-3 mapping
*/
LanguageNormalizer.iso1To3 = {
ar: "ara",
bg: "bul",
ca: "cat",
cs: "ces",
da: "dan",
de: "deu",
el: "ell",
en: "eng",
es: "spa",
et: "est",
fi: "fin",
fr: "fra",
he: "heb",
hi: "hin",
hr: "hrv",
hu: "hun",
id: "ind",
it: "ita",
ja: "jpn",
ko: "kor",
lt: "lit",
lv: "lav",
ms: "msa",
nl: "nld",
no: "nor",
pl: "pol",
pt: "por",
ro: "ron",
ru: "rus",
sk: "slk",
sl: "slv",
sr: "srp",
sv: "swe",
th: "tha",
tr: "tur",
uk: "ukr",
vi: "vie",
zh: "zho",
};
/**
* SSML Compatibility Layer
*
* This module provides cross-engine SSML compatibility by:
* 1. Validating SSML structure
* 2. Converting SSML to engine-specific formats
* 3. Providing fallbacks for unsupported features
* 4. Ensuring proper SSML nesting and structure
*/
/**
* SSML capabilities for different TTS engines
*/
const ENGINE_SSML_CAPABILITIES = {
// Full SSML Support
sapi: {
supportsSSML: true,
supportLevel: "full",
supportedTags: [
"speak",
"prosody",
"break",
"emphasis",
"voice",
"phoneme",
"say-as",
"sub",
"p",
"s",
],
unsupportedTags: [],
requiresNamespace: false,
requiresVersion: true,
},
witai: {
supportsSSML: true,
supportLevel: "full",
supportedTags: [
"speak",
"prosody",
"break",
"emphasis",
"voice",
"phoneme",
"say-as",
"sub",
"p",
"s",
],
unsupportedTags: [],
requiresNamespace: false,
requiresVersion: false,
},
watson: {
supportsSSML: true,
supportLevel: "full",
supportedTags: [
"speak",
"prosody",
"break",
"emphasis",
"voice",
"phoneme",
"say-as",
"sub",
"p",
"s",
],
unsupportedTags: [],
requiresNamespace: false,
requiresVersion: false,
},
cerevoice: {
supportsSSML: true,
supportLevel: "full",
supportedTags: [
"speak",
"audio",
"break",
"emphasis",
"lexicon",
"mark",
"meta",
"metadata",
"p",
"phoneme",
"prosody",
"say-as",
"sub",
"s",
"voice",
],
unsupportedTags: ["lang"],
requiresNamespace: false,
requiresVersion: false,
},
// Partial SSML Support
azure: {
supportsSSML: true,
supportLevel: "full",
supportedTags: [
"speak",
"prosody",
"break",
"emphasis",
"voice",
"phoneme",
"say-as",
"sub",
"p",
"s",
"mstts:express-as",
],
unsupportedTags: [],
requiresNamespace: true,
requiresVersion: true,
},
polly: {
supportsSSML: true,
supportLevel: "limited", // Depends on voice engine type
supportedTags: [
"speak",
"prosody",
"break",
"voice",
"phoneme",
"say-as",
"sub",
"p",
"s",
"mark",
"lang",
],
unsupportedTags: [], // Depends on voice engine type
requiresNamespace: true,
requiresVersion: false,
},
google: {
supportsSSML: true,
supportLevel: "limited", // Depends on voice type
supportedTags: [
"speak",
"prosody",
"break",
"emphasis",
"voice",
"phoneme",
"say-as",
"sub",
"p",
"s",
"mark",
"lang",
"audio",
],
unsupportedTags: [], // Depends on voice type
requiresNamespace: false,
requiresVersion: false,
},
// No SSML Support
elevenlabs: {
supportsSSML: false,
supportLevel: "none",
supportedTags: [],
unsupportedTags: ["*"],
requiresNamespace: false,
requiresVersion: false,
},
openai: {
supportsSSML: false,
supportLevel: "none",
supportedTags: [],
unsupportedTags: ["*"],
requiresNamespace: false,
requiresVersion: false,
},
playht: {
supportsSSML: false,
supportLevel: "none",
supportedTags: [],
unsupportedTags: ["*"],
requiresNamespace: false,
requiresVersion: false,
},
upliftai: {
supportsSSML: false,
supportLevel: "none",
supportedTags: [],
unsupportedTags: ["*"],
requiresNamespace: false,
requiresVersion: false,
},
sherpaonnx: {
supportsSSML: false,
supportLevel: "none",
supportedTags: [],
unsupportedTags: ["*"],
requiresNamespace: false,
requiresVersion: false,
},
"sherpaonnx-wasm": {
supportsSSML: false,
supportLevel: "none",
supportedTags: [],
unsupportedTags: ["*"],
requiresNamespace: false,
requiresVersion: false,
},
espeak: {
supportsSSML: true,
supportLevel: "limited",
supportedTags: ["speak", "prosody", "break", "emphasis", "p", "s"],
unsupportedTags: ["voice", "phoneme", "say-as", "sub"],
requiresNamespace: false,
requiresVersion: false,
},
"espeak-wasm": {
supportsSSML: true,
supportLevel: "limited",
supportedTags: ["speak", "prosody", "break", "emphasis", "p", "s"],
unsupportedTags: ["voice", "phoneme", "say-as", "sub"],
requiresNamespace: false,
requiresVersion: false,
},
};
/**
* Voice-specific SSML capabilities for engines with dynamic support
*/
const VOICE_SPECIFIC_CAPABILITIES = {
// Amazon Polly voice engine types
polly: {
standard: {
supportLevel: "full",
unsupportedTags: [],
},
"long-form": {
supportLevel: "full",
unsupportedTags: [],
},
neural: {
supportLevel: "limited",
unsupportedTags: ["emphasis", "amazon:auto-breaths", "amazon:effect"],
},
generative: {
supportLevel: "limited",
unsupportedTags: ["emphasis", "amazon:auto-breaths", "amazon:effect", "mark"],
},
},
// Google Cloud TTS voice types
google: {
standard: {
supportLevel: "full",
unsupportedTags: [],
},
wavenet: {
supportLevel: "full",
unsupportedTags: [],
},
neural2: {
supportLevel: "limited",
unsupportedTags: ["mark"],
},
journey: {
supportLevel: "none",
unsupportedTags: ["*"],
},
studio: {
supportLevel: "none",
unsupportedTags: ["*"],
},
},
};
/**
* SSML Compatibility Manager
*/
// biome-ignore lint/complexity/noStaticOnlyClass: using a static utility class for organization
class SSMLCompatibilityManager {
/**
* Get SSML capabilities for a specific engine and voice
*/
static getCapabilities(engine, voiceId) {
const baseCapabilities = ENGINE_SSML_CAPABILITIES[engine];
if (!baseCapabilities) {
// Default to no SSML support for unknown engines
return {
supportsSSML: false,
supportLevel: "none",
supportedTags: [],
unsupportedTags: ["*"],
requiresNamespace: false,
requiresVersion: false,
};
}
// For engines with voice-specific capabilities, adjust based on voice
if (voiceId &&
VOICE_SPECIFIC_CAPABILITIES[engine]) {
const voiceCapabilities = SSMLCompatibilityManager.getVoiceSpecificCapabilities(engine, voiceId);
if (voiceCapabilities) {
return {
...baseCapabilities,
supportLevel: voiceCapabilities.supportLevel,
unsupportedTags: voiceCapabilities.unsupportedTags,
};
}
}
return baseCapabilities;
}
/**
* Get voice-specific SSML capabilities
*/
static getVoiceSpecificCapabilities(engine, voiceId) {
const engineCapabilities = VOICE_SPECIFIC_CAPABILITIES[engine];
if (!engineCapabilities)
return null;
// Determine voice type based on voice ID patterns
const voiceType = SSMLCompatibilityManager.detectVoiceType(engine, voiceId);
return engineCapabilities[voiceType] || null;
}
/**
* Detect voice type from voice ID
*/
static detectVoiceType(engine, voiceId) {
const lowerVoiceId = voiceId.toLowerCase();
switch (engine) {
case "polly":
// Amazon Polly voice engine detection
if (lowerVoiceId.includes("neural"))
return "neural";
if (lowerVoiceId.includes("generative"))
return "generative";
if (lowerVoiceId.includes("long-form"))
return "long-form";
return "standard";
case "google":
// Google Cloud TTS voice type detection
if (lowerVoiceId.includes("neural2"))
return "neural2";
if (lowerVoiceId.includes("journey"))
return "journey";
if (lowerVoiceId.includes("studio"))
return "studio";
if (lowerVoiceId.includes("wavenet"))
return "wavenet";
if (lowerVoiceId.includes("standard"))
return "standard";
// Default to standard for older voice naming
return "standard";
default:
return "default";
}
}
/**
* Validate SSML for a specific engine
*/
static validateSSML(ssml, engine, voiceId) {
const capabilities = SSMLCompatibilityManager.getCapabilities(engine, voiceId);
const errors = [];
const warnings = [];
// Basic SSML structure validation
if (!ssml.trim().startsWith("<speak") || !ssml.trim().endsWith("</speak>")) {
errors.push("SSML must be wrapped in <speak> tags");
}
// Check if engine supports SSML at all
if (!capabilities.supportsSSML) {
warnings.push(`Engine '${engine}' does not support SSML. Tags will be stripped.`);
return {
isValid: true, // Valid for processing (will be stripped)
errors,
warnings,
};
}