UNPKG

js-tts-wrapper

Version:

A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services

1,533 lines (1,518 loc) 2.58 MB
'use strict'; var node_module = require('node:module'); var path = require('node:path'); var node_child_process = require('node:child_process'); var fs = require('node:fs'); var os = require('node:os'); var require$$0$5 = require('path'); var require$$0$2 = require('fs'); var require$$0 = require('constants'); var require$$0$1 = require('stream'); var require$$1 = require('util'); var require$$5 = require('assert'); var require$$2 = require('events'); var require$$0$3 = require('buffer'); var require$$6 = require('string_decoder'); var require$$0$4 = require('zlib'); function _interopNamespaceDefault(e) { var n = Object.create(null); if (e) { Object.keys(e).forEach(function (k) { if (k !== 'default') { var d = Object.getOwnPropertyDescriptor(e, k); Object.defineProperty(n, k, d.get ? d : { enumerable: true, get: function () { return e[k]; } }); } }); } n.default = e; return Object.freeze(n); } function _mergeNamespaces(n, m) { m.forEach(function (e) { e && typeof e !== 'string' && !Array.isArray(e) && Object.keys(e).forEach(function (k) { if (k !== 'default' && !(k in n)) { var d = Object.getOwnPropertyDescriptor(e, k); Object.defineProperty(n, k, d.get ? d : { enumerable: true, get: function () { return e[k]; } }); } }); }); return Object.freeze(n); } var path__namespace = /*#__PURE__*/_interopNamespaceDefault(path); var fs__namespace = /*#__PURE__*/_interopNamespaceDefault(fs); var os__namespace = /*#__PURE__*/_interopNamespaceDefault(os); /** * Environment detection and cross-platform utilities */ /** * Check if code is running in a browser environment */ const isBrowser$1 = typeof window !== "undefined"; /** * Check if code is running in a Node.js environment */ const isNode = !isBrowser$1 && typeof process !== "undefined" && typeof process.versions !== "undefined" && typeof process.versions.node !== "undefined"; /** * File system utilities that work in both environments */ const fileSystem = { /** * Read a file asynchronously * @param path Path to the file * @returns Promise resolving to the file contents as a string */ readFile: async (path) => { if (isNode) { // Node.js implementation const fs = await new Function("m", "return import(m)")("node:fs/promises"); return fs.readFile(path, "utf-8"); } // Browser implementation - fetch from URL const response = await fetch(path); if (!response.ok) { throw new Error(`Failed to fetch ${path}: ${response.status} ${response.statusText}`); } return response.text(); }, /** * Read a file synchronously * @param path Path to the file * @returns File contents as a string */ readFileSync: (path) => { if (isNode) { // Node.js implementation // eslint-disable-next-line @typescript-eslint/no-var-requires const fs = new Function("n", "return require(n)")("node" + ":fs"); return fs.readFileSync(path, "utf-8"); } throw new Error("Synchronous file reading is not supported in browsers"); }, /** * Write a file asynchronously * @param path Path to the file * @param data Data to write * @returns Promise resolving when the file is written */ writeFile: async (path, data) => { if (isNode) { // Node.js implementation const fs = await new Function("m", "return import(m)")("node:fs/promises"); return fs.writeFile(path, data); } // Browser implementation - download file const blobData = typeof data === "string" ? data : Uint8Array.from(data); const blob = new Blob([blobData], { type: "application/octet-stream" }); const url = URL.createObjectURL(blob); const a = document.createElement("a"); a.href = url; a.download = path.split("/").pop() || "download"; document.body.appendChild(a); a.click(); setTimeout(() => { if (document === null || document === void 0 ? void 0 : document.body) { document.body.removeChild(a); } URL.revokeObjectURL(url); }, 100); }, /** * Write a file synchronously * @param path Path to the file * @param data Data to write */ writeFileSync: (path, data) => { if (isNode) { // Node.js implementation // eslint-disable-next-line @typescript-eslint/no-var-requires const fs = new Function("n", "return require(n)")("node" + ":fs"); fs.writeFileSync(path, data); } else { throw new Error("Synchronous file writing is not supported in browsers"); } }, /** * Check if a file exists asynchronously * @param path Path to the file * @returns Promise resolving to true if the file exists, false otherwise */ exists: async (path) => { if (isNode) { // Node.js implementation const fs = await new Function("m", "return import(m)")("node:fs/promises"); try { await fs.access(path); return true; } catch (_a) { return false; } } else { // Browser implementation - try to fetch try { const response = await fetch(path, { method: "HEAD" }); return response.ok; } catch (_b) { return false; } } }, /** * Check if a file exists synchronously * @param path Path to the file * @returns True if the file exists, false otherwise */ existsSync: (path) => { if (isNode) { // Node.js implementation // eslint-disable-next-line @typescript-eslint/no-var-requires const fs = new Function("n", "return require(n)")("node" + ":fs"); return fs.existsSync(path); } throw new Error("Synchronous file existence check is not supported in browsers"); }, }; /** * Path utilities that work in both environments */ const pathUtils = { /** * Join path segments * @param paths Path segments to join * @returns Joined path */ join: (...paths) => { if (isNode) { // Node.js implementation // eslint-disable-next-line @typescript-eslint/no-var-requires const path = new Function("n", "return require(n)")("node" + ":path"); return path.join(...paths); } // Browser implementation return paths.join("/").replace(/\/+/g, "/"); }, /** * Get the directory name of a path * @param path Path * @returns Directory name */ dirname: (path) => { if (isNode) { // Node.js implementation // eslint-disable-next-line @typescript-eslint/no-var-requires const nodePath = new Function("n", "return require(n)")("node" + ":path"); return nodePath.dirname(path); } // Browser implementation return path.split("/").slice(0, -1).join("/") || "."; }, /** * Get the base name of a path * @param path Path * @returns Base name */ basename: (path) => { if (isNode) { // Node.js implementation // eslint-disable-next-line @typescript-eslint/no-var-requires const nodePath = new Function("n", "return require(n)")("node" + ":path"); return nodePath.basename(path); } // Browser implementation return path.split("/").pop() || ""; }, /** * Get the extension of a path * @param path Path * @returns Extension */ extname: (path) => { if (isNode) { // Node.js implementation // eslint-disable-next-line @typescript-eslint/no-var-requires const nodePath = new Function("n", "return require(n)")("node" + ":path"); return nodePath.extname(path); } // Browser implementation const basename = path.split("/").pop() || ""; const dotIndex = basename.lastIndexOf("."); return dotIndex === -1 ? "" : basename.slice(dotIndex); }, }; const runtimeConfig = {}; const TRUE_PATTERN = /^(1|true|yes|on)$/i; const FALSE_PATTERN = /^(0|false|no|off)$/i; function parseBooleanFlag(value) { if (value === undefined || value === null) return undefined; const text = String(value).trim(); if (!text) return undefined; if (TRUE_PATTERN.test(text)) return true; if (FALSE_PATTERN.test(text)) return false; return undefined; } function getEnvEnabledOverride() { var _a; if (!isNode) return undefined; try { const env = (_a = process === null || process === void 0 ? void 0 : process.env) !== null && _a !== void 0 ? _a : {}; const disableFlag = parseBooleanFlag(env.SPEECHMARKDOWN_DISABLE); if (disableFlag === true) { return false; } if (disableFlag === false) { return true; } const enableFlag = parseBooleanFlag(env.SPEECHMARKDOWN_ENABLE); if (enableFlag !== undefined) { return enableFlag; } } catch (_b) { // Ignore env parsing errors and fall back to defaults } return undefined; } function isSpeechMarkdownEnabled() { if (typeof runtimeConfig.enabled === "boolean") { return runtimeConfig.enabled; } const envOverride = getEnvEnabledOverride(); if (typeof envOverride === "boolean") { return envOverride; } // Default: enabled everywhere (Node + browser) return true; } function configureSpeechMarkdown(options = {}) { if (typeof options.enabled === "boolean") { runtimeConfig.enabled = options.enabled; } } /** * Speech Markdown converter using the official speechmarkdown-js library * * This module provides functions to convert Speech Markdown to SSML * using the speechmarkdown-js library (https://github.com/speechmarkdown/speechmarkdown-js) */ // Dynamic import for speechmarkdown-js let SpeechMarkdown$1 = null; let speechMarkdownLoaded = false; async function loadSpeechMarkdown() { var _a, _b, _c; if (speechMarkdownLoaded) return SpeechMarkdown$1; try { if (!isSpeechMarkdownEnabled()) { console.warn("speechmarkdown-js disabled (set SPEECHMARKDOWN_DISABLE=false or configureSpeechMarkdown({ enabled: true }) to re-enable). Using built-in fallback."); return null; } let module = null; if (isNode) { try { const requireFn = typeof require !== "undefined" ? require : undefined; if (requireFn) { module = requireFn("speechmarkdown-js"); } } catch (_d) { // Fallback to dynamic import below } } if (!module) { try { module = await Promise.resolve().then(function () { return index$3; }); } catch (_e) { // Dynamic import failed } } // Prefer named export, but tolerate default exports SpeechMarkdown$1 = (_c = (_a = module === null || module === void 0 ? void 0 : module.SpeechMarkdown) !== null && _a !== void 0 ? _a : (_b = module === null || module === void 0 ? void 0 : module.default) === null || _b === void 0 ? void 0 : _b.SpeechMarkdown) !== null && _c !== void 0 ? _c : module === null || module === void 0 ? void 0 : module.default; if (!SpeechMarkdown$1) { throw new Error("speechmarkdown-js module did not expose SpeechMarkdown class"); } speechMarkdownLoaded = true; return SpeechMarkdown$1; } catch (_error) { console.warn("speechmarkdown-js not available. Using built-in fallback. To enable full Speech Markdown in browsers, add 'speechmarkdown-js' to your app and it will be loaded at runtime."); return null; } } // Lightweight fallback converter for a minimal subset used in tests function convertSpeechMarkdownFallback(markdown) { let out = markdown; // [break:"500ms"] -> <break time="500ms"/> out = out.replace(/\[break:"([^"]+)"\]/g, '<break time="$1"/>'); // [500ms] or [500s] -> <break time="500ms"/> out = out.replace(/\[(\d+)m?s\]/g, '<break time="$1ms"/>'); // ++text++ -> <emphasis level="strong">text</emphasis> out = out.replace(/\+\+([\s\S]+?)\+\+/g, '<emphasis level="strong">$1</emphasis>'); // (text)[rate:'x-slow'] or (text)[rate:"x-slow"] -> prosody rate out = out.replace(/\(([\s\S]+?)\)\[rate:['"]([^'"]+)['"]\]/g, '<prosody rate="$2">$1</prosody>'); // (text)[pitch:'high'] or (text)[pitch:"high"] -> prosody pitch out = out.replace(/\(([\s\S]+?)\)\[pitch:['"]([^'"]+)['"]\]/g, '<prosody pitch="$2">$1</prosody>'); // (text)[volume:'loud'] or (text)[volume:"loud"] -> prosody volume out = out.replace(/\(([\s\S]+?)\)\[volume:['"]([^'"]+)['"]\]/g, '<prosody volume="$2">$1</prosody>'); return out; } /** * SpeechMarkdownConverter class for converting Speech Markdown to SSML */ class SpeechMarkdownConverter { constructor() { this.speechMarkdownInstance = null; } async ensureInitialized() { if (!isSpeechMarkdownEnabled()) { this.speechMarkdownInstance = null; return null; } if (!this.speechMarkdownInstance) { const SpeechMarkdownClass = await loadSpeechMarkdown(); if (SpeechMarkdownClass) { this.speechMarkdownInstance = new SpeechMarkdownClass(); } } return this.speechMarkdownInstance; } /** * Convert Speech Markdown to SSML * * @param markdown Speech Markdown text * @param platform Target platform (amazon-alexa, google-assistant, microsoft-azure, etc.) * @returns SSML text */ async toSSML(markdown, platform = "amazon-alexa") { if (!isSpeechMarkdownEnabled()) { this.speechMarkdownInstance = null; const converted = convertSpeechMarkdownFallback(markdown); return `<speak>${converted}</speak>`; } // Attempt to initialize the full converter (no-op if disabled/unavailable) await this.ensureInitialized(); if (this.speechMarkdownInstance) { return this.speechMarkdownInstance.toSSML(markdown, { platform }); } // Fallback: minimal conversion const converted = convertSpeechMarkdownFallback(markdown); return `<speak>${converted}</speak>`; } /** * Check if text is Speech Markdown * * @param text Text to check * @returns True if the text contains Speech Markdown syntax */ isSpeechMarkdown(text) { return isSpeechMarkdown(text); } /** * Get the available platforms supported by the Speech Markdown library * * @returns Array of platform names */ getAvailablePlatforms() { return getAvailablePlatforms(); } } // Create a default converter instance const defaultConverter = new SpeechMarkdownConverter(); /** * Convert Speech Markdown to SSML * * This function uses the speechmarkdown-js library to convert Speech Markdown syntax to SSML. * The library supports various Speech Markdown features including: * - Breaks: [500ms] or [break:"500ms"] * - Emphasis: ++emphasized++ or +emphasized+ * - Rate, pitch, volume: (text)[rate:"slow"], (text)[pitch:"high"], (text)[volume:"loud"] * - And many more (see the speechmarkdown-js documentation) * * @param markdown Speech Markdown text * @param platform Target platform (amazon-alexa, google-assistant, microsoft-azure, etc.) * @returns SSML text */ async function toSSML(markdown, platform = "amazon-alexa") { return await defaultConverter.toSSML(markdown, platform); } /** * Check if text is Speech Markdown * * This function checks if the text contains Speech Markdown syntax patterns. * It uses regular expressions to detect common Speech Markdown patterns such as: * - Breaks: [500ms] or [break:"500ms"] * - Emphasis: ++text++ or +text+ * - Rate, pitch, volume: (text)[rate:"slow"], (text)[pitch:"high"], (text)[volume:"loud"] * * @param text Text to check * @returns True if the text contains Speech Markdown syntax */ function isSpeechMarkdown(text) { // Use a simple heuristic to check for common Speech Markdown patterns // This is a simplified version as the library doesn't provide a direct way to check const patterns = [ /\[\d+m?s\]/, // Breaks: [500ms] /\[break:"[^"\]]+"\]/, // Breaks with quotes: [break:"weak"] or [break:"500ms"] /\+\+.*?\+\+/, // Strong emphasis: ++text++ /\+.*?\+/, // Moderate emphasis: +text+ /~.*?~/, // No emphasis: ~text~ /-.*?-/, // Reduced emphasis: -text- /\(.*?\)\[emphasis(:"(strong|moderate|reduced|none)")?\]/, // Standard emphasis: (text)[emphasis:"strong"] /\(.*?\)\[rate:"(x-slow|slow|medium|fast|x-fast)"\]/, // Rate: (text)[rate:"slow"] /\(.*?\)\[pitch:"(x-low|low|medium|high|x-high)"\]/, // Pitch: (text)[pitch:"high"] /\(.*?\)\[volume:"(silent|x-soft|soft|medium|loud|x-loud)"\]/, // Volume: (text)[volume:"loud"] /\(.*?\)\[voice:".*?"\]/, // Voice: (text)[voice:"Brian"] /\(.*?\)\[lang:".*?"\]/, // Language: (text)[lang:"en-US"] /\(.*?\)\[\w+:"?.*?"?\]/, // Any other Speech Markdown modifier: (text)[modifier:"value"] ]; return patterns.some((pattern) => pattern.test(text)); } /** * Get the available platforms supported by the Speech Markdown library * * This function returns the list of platforms supported by the speechmarkdown-js library. * These platforms have different SSML dialects, and the library will generate * SSML appropriate for the specified platform. * * @returns Array of platform names (amazon-alexa, google-assistant, microsoft-azure) */ function getAvailablePlatforms() { // The library doesn't expose a direct way to get platforms, so we hardcode them // These are the platforms supported by speechmarkdown-js as of version 1.x return ["amazon-alexa", "google-assistant", "microsoft-azure"]; } var converter = /*#__PURE__*/Object.freeze({ __proto__: null, SpeechMarkdownConverter: SpeechMarkdownConverter, configureSpeechMarkdown: configureSpeechMarkdown, getAvailablePlatforms: getAvailablePlatforms, isSpeechMarkdown: isSpeechMarkdown, toSSML: toSSML }); /** * SSML Builder class for creating SSML markup */ class SSMLBuilder { constructor() { this.ssml = ""; } /** * Add text or SSML to the builder * @param text Text or SSML to add * @returns The SSML string */ add(text) { // If text doesn't start with <speak>, wrap it if (text.trim().startsWith("<speak")) { this.ssml = text; } else { this.ssml = `<speak>${text}</speak>`; } return this.ssml; } /** * Add a break to the SSML * @param time Break duration (e.g., '500ms') * @returns The SSML builder instance */ addBreak(time = "500ms") { this.ssml = this.ssml.replace("</speak>", `<break time="${time}"/></speak>`); return this; } /** * Add prosody element to the SSML * @param text Text to wrap with prosody * @param rate Speech rate * @param pitch Speech pitch * @param volume Speech volume * @returns The SSML builder instance */ addProsody(text, rate, pitch, volume) { let prosodyAttrs = ""; if (rate) prosodyAttrs += ` rate="${rate}"`; if (pitch) prosodyAttrs += ` pitch="${pitch}"`; if (volume) prosodyAttrs += ` volume="${volume}"`; const prosodyElement = `<prosody${prosodyAttrs}>${text}</prosody>`; if (this.ssml.includes("<speak>")) { this.ssml = this.ssml.replace("<speak>", `<speak>${prosodyElement}`); } else { this.ssml = `<speak>${prosodyElement}</speak>`; } return this; } /** * Wrap text with speak tags * @param text Text to wrap * @returns SSML string with speak tags */ wrapWithSpeak(text) { if (!text.trim().startsWith("<speak")) { return `<speak>${text}</speak>`; } return text; } /** * Clear the SSML content */ clearSSML() { this.ssml = ""; } /** * Get the current SSML string * @returns The current SSML string */ toString() { return this.ssml; } } /** * Reads a ReadableStream<Uint8Array> (Web) or NodeJS.ReadableStream completely * and returns its contents as a single Buffer (in Node.js) or Uint8Array (in Browser). * @param stream The stream to read. * @returns A promise that resolves with the stream contents. */ async function streamToBuffer(stream // Use imported Readable type ) { const chunks = []; // Use a union type for chunks array let totalLength = 0; // Check if it's a Web ReadableStream (has getReader) if ("getReader" in stream && typeof stream.getReader === "function") { const reader = stream.getReader(); try { while (true) { const { done, value } = await reader.read(); if (done) { break; } if (value) { // value is Uint8Array from Web Stream chunks.push(value); // Store as Uint8Array initially totalLength += value.length; } } } finally { reader.releaseLock(); } // Concatenate AFTER the loop for Web Streams if (isNode) { // Use isNode constant // Convert Uint8Array chunks to Buffer before concatenating in Node const bufferChunks = chunks.map((chunk) => Buffer.from(chunk)); return Buffer.concat(bufferChunks, totalLength); } // Browser environment: Concatenate Uint8Array chunks const result = new Uint8Array(totalLength); let offset = 0; for (const chunk of chunks) { result.set(chunk, offset); offset += chunk.length; } return result; } if (typeof stream.on === "function") { // Use type assertion // Assume it's a Node.js Readable stream return new Promise((resolve, reject) => { // Explicitly assert stream type for event listeners const nodeStream = stream; nodeStream.on("data", (chunk) => { const bufferChunk = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk); chunks.push(bufferChunk); totalLength += bufferChunk.length; }); nodeStream.on("end", () => { // Concatenate collected Buffer chunks resolve(Buffer.concat(chunks, totalLength)); }); nodeStream.on("error", (err) => { // Type the error parameter reject(err); }); }); } // Handle unexpected stream type if it's neither Web nor Node stream throw new Error("Unsupported stream type provided to streamToBuffer"); } /** * Utility functions for handling different audio input sources */ /** * Validates that only one input source is provided */ function validateSpeakInput(input) { const inputCount = [input.text, input.filename, input.audioBytes, input.audioStream].filter(Boolean).length; if (inputCount === 0) { throw new Error("No input provided. Please provide text, filename, audioBytes, or audioStream."); } if (inputCount > 1) { throw new Error("Multiple input sources provided. Please provide only one of: text, filename, audioBytes, or audioStream."); } } /** * Determines the audio format from a filename extension */ function getAudioFormatFromFilename(filename) { const extension = filename.toLowerCase().split(".").pop(); switch (extension) { case "mp3": return "audio/mpeg"; case "wav": return "audio/wav"; case "ogg": return "audio/ogg"; case "opus": return "audio/opus"; case "aac": return "audio/aac"; case "flac": return "audio/flac"; default: return "audio/wav"; // Default fallback } } /** * Attempts to detect audio format from byte signature */ function detectAudioFormat(audioBytes) { if (audioBytes.length < 4) { return "audio/wav"; // Default fallback } // Check for common audio file signatures const header = Array.from(audioBytes.slice(0, 12)); // MP3 - ID3 tag or MPEG frame sync if ((header[0] === 0x49 && header[1] === 0x44 && header[2] === 0x33) || // ID3 (header[0] === 0xff && (header[1] & 0xe0) === 0xe0)) { // MPEG frame sync return "audio/mpeg"; } // WAV - RIFF header if (header[0] === 0x52 && header[1] === 0x49 && header[2] === 0x46 && header[3] === 0x46 && header[8] === 0x57 && header[9] === 0x41 && header[10] === 0x56 && header[11] === 0x45) { return "audio/wav"; } // OGG if (header[0] === 0x4f && header[1] === 0x67 && header[2] === 0x67 && header[3] === 0x53) { return "audio/ogg"; } // FLAC if (header[0] === 0x66 && header[1] === 0x4c && header[2] === 0x61 && header[3] === 0x43) { return "audio/flac"; } return "audio/wav"; // Default fallback } /** * Reads an audio file and returns its contents as Uint8Array * Only works in Node.js environment */ async function readAudioFile(filename) { if (!isNode) { throw new Error("File reading is only supported in Node.js environment"); } try { const fs = await new Function("m", "return import(m)")("node:fs/promises"); const buffer = await fs.readFile(filename); return new Uint8Array(buffer); } catch (error) { throw new Error(`Failed to read audio file "${filename}": ${error instanceof Error ? error.message : String(error)}`); } } /** * Converts an audio stream to bytes */ async function streamToBytes(stream) { const result = await streamToBuffer(stream); // Convert Buffer to Uint8Array if needed (Node.js) if (result instanceof Buffer) { return new Uint8Array(result); } return result; } /** * Processes the input and returns audio bytes with format information */ async function processAudioInput(input) { validateSpeakInput(input); if (input.audioBytes) { return { audioBytes: input.audioBytes, mimeType: detectAudioFormat(input.audioBytes), }; } if (input.audioStream) { const audioBytes = await streamToBytes(input.audioStream); return { audioBytes, mimeType: detectAudioFormat(audioBytes), }; } if (input.filename) { const audioBytes = await readAudioFile(input.filename); return { audioBytes, mimeType: getAudioFormatFromFilename(input.filename), }; } throw new Error("No valid audio input provided"); } var audioInput = /*#__PURE__*/Object.freeze({ __proto__: null, detectAudioFormat: detectAudioFormat, getAudioFormatFromFilename: getAudioFormatFromFilename, processAudioInput: processAudioInput, readAudioFile: readAudioFile, streamToBytes: streamToBytes, validateSpeakInput: validateSpeakInput }); /** * Filter voices by language code * @param voices Array of voices to filter * @param languageCode BCP-47 language code to filter by * @returns Filtered array of voices */ function filterByLanguage(voices, languageCode) { return voices.filter((voice) => voice.languageCodes.some((lang) => lang.bcp47.toLowerCase() === languageCode.toLowerCase())); } /** * Filter voices by gender * @param voices Array of voices to filter * @param gender Gender to filter by * @returns Filtered array of voices */ function filterByGender(voices, gender) { return voices.filter((voice) => voice.gender === gender); } /** * Filter voices by provider * @param voices Array of voices to filter * @param provider Provider to filter by * @returns Filtered array of voices */ function filterByProvider(voices, provider) { return voices.filter((voice) => voice.provider === provider); } /** * Find a voice by ID * @param voices Array of voices to search * @param id Voice ID to find * @returns The found voice or undefined */ function findById(voices, id) { return voices.find((voice) => voice.id === id); } /** * Get all available languages from a list of voices * @param voices Array of voices * @returns Array of unique language codes */ function getAvailableLanguages(voices) { // Use a Set to collect unique language codes const languages = new Set(); // Iterate through all voices and their language codes for (const voice of voices) { for (const lang of voice.languageCodes) { languages.add(lang.bcp47); } } // Convert Set to Array and return return Array.from(languages); } var voiceUtils = /*#__PURE__*/Object.freeze({ __proto__: null, filterByGender: filterByGender, filterByLanguage: filterByLanguage, filterByProvider: filterByProvider, findById: findById, getAvailableLanguages: getAvailableLanguages }); /** * Language utilities for normalizing language codes across different formats */ /** * Language normalization utilities */ class LanguageNormalizer { /** * Normalize a language code to standard formats * @param langCode Input language code (can be ISO639-1/2/3, BCP47, or locale) * @param countryCode Optional country code to help with regionalization * @returns StandardizedLanguage object containing normalized codes */ static normalize(langCode, countryCode) { try { // Handle MMS prefix if present if (langCode.startsWith("mms_")) { langCode = langCode.substring(4); } // Parse the language code let language; let region; // Check if it's a BCP-47 code with region (e.g., en-US) if (langCode.includes("-")) { const parts = langCode.split("-"); language = parts[0].toLowerCase(); region = parts[1].toUpperCase(); } else { language = langCode.toLowerCase(); region = countryCode === null || countryCode === void 0 ? void 0 : countryCode.toUpperCase(); } // Convert to ISO 639-3 const iso639_3 = LanguageNormalizer.iso1To3[language] || language; // Create BCP-47 tag const bcp47 = region ? `${language}-${region}` : language; // Create display name let display = LanguageNormalizer.languageNames[language] || language; if (region && LanguageNormalizer.regionNames[region]) { display += ` (${LanguageNormalizer.regionNames[region]})`; } else if (region) { display += ` (${region})`; } return { iso639_3, bcp47, display, countryCode: region, }; } catch (_error) { // Fallback for unknown codes return { iso639_3: "und", bcp47: "und", display: "Unknown", }; } } /** * Get the display name for a language code * @param langCode Language code * @returns Display name */ static getDisplayName(langCode) { return LanguageNormalizer.normalize(langCode).display; } /** * Get the ISO 639-3 code for a language code * @param langCode Language code * @returns ISO 639-3 code */ static getISO639_3(langCode) { return LanguageNormalizer.normalize(langCode).iso639_3; } /** * Get the BCP-47 tag for a language code * @param langCode Language code * @param countryCode Optional country code * @returns BCP-47 tag */ static getBCP47(langCode, countryCode) { return LanguageNormalizer.normalize(langCode, countryCode).bcp47; } } /** * Common language display names */ LanguageNormalizer.languageNames = { en: "English", fr: "French", es: "Spanish", de: "German", it: "Italian", ja: "Japanese", ko: "Korean", zh: "Chinese", ru: "Russian", pt: "Portuguese", ar: "Arabic", hi: "Hindi", nl: "Dutch", sv: "Swedish", fi: "Finnish", no: "Norwegian", da: "Danish", pl: "Polish", tr: "Turkish", cs: "Czech", hu: "Hungarian", el: "Greek", he: "Hebrew", th: "Thai", vi: "Vietnamese", id: "Indonesian", ms: "Malay", ro: "Romanian", sk: "Slovak", uk: "Ukrainian", bg: "Bulgarian", hr: "Croatian", lt: "Lithuanian", lv: "Latvian", et: "Estonian", sl: "Slovenian", sr: "Serbian", }; /** * Common region display names */ LanguageNormalizer.regionNames = { US: "United States", GB: "United Kingdom", AU: "Australia", CA: "Canada", IN: "India", IE: "Ireland", ZA: "South Africa", NZ: "New Zealand", FR: "France", DE: "Germany", IT: "Italy", ES: "Spain", MX: "Mexico", JP: "Japan", KR: "Korea", CN: "China", TW: "Taiwan", HK: "Hong Kong", BR: "Brazil", PT: "Portugal", RU: "Russia", }; /** * ISO 639-1 to ISO 639-3 mapping */ LanguageNormalizer.iso1To3 = { ar: "ara", bg: "bul", ca: "cat", cs: "ces", da: "dan", de: "deu", el: "ell", en: "eng", es: "spa", et: "est", fi: "fin", fr: "fra", he: "heb", hi: "hin", hr: "hrv", hu: "hun", id: "ind", it: "ita", ja: "jpn", ko: "kor", lt: "lit", lv: "lav", ms: "msa", nl: "nld", no: "nor", pl: "pol", pt: "por", ro: "ron", ru: "rus", sk: "slk", sl: "slv", sr: "srp", sv: "swe", th: "tha", tr: "tur", uk: "ukr", vi: "vie", zh: "zho", }; /** * SSML Compatibility Layer * * This module provides cross-engine SSML compatibility by: * 1. Validating SSML structure * 2. Converting SSML to engine-specific formats * 3. Providing fallbacks for unsupported features * 4. Ensuring proper SSML nesting and structure */ /** * SSML capabilities for different TTS engines */ const ENGINE_SSML_CAPABILITIES = { // Full SSML Support sapi: { supportsSSML: true, supportLevel: "full", supportedTags: [ "speak", "prosody", "break", "emphasis", "voice", "phoneme", "say-as", "sub", "p", "s", ], unsupportedTags: [], requiresNamespace: false, requiresVersion: true, }, witai: { supportsSSML: true, supportLevel: "full", supportedTags: [ "speak", "prosody", "break", "emphasis", "voice", "phoneme", "say-as", "sub", "p", "s", ], unsupportedTags: [], requiresNamespace: false, requiresVersion: false, }, watson: { supportsSSML: true, supportLevel: "full", supportedTags: [ "speak", "prosody", "break", "emphasis", "voice", "phoneme", "say-as", "sub", "p", "s", ], unsupportedTags: [], requiresNamespace: false, requiresVersion: false, }, cerevoice: { supportsSSML: true, supportLevel: "full", supportedTags: [ "speak", "audio", "break", "emphasis", "lexicon", "mark", "meta", "metadata", "p", "phoneme", "prosody", "say-as", "sub", "s", "voice", ], unsupportedTags: ["lang"], requiresNamespace: false, requiresVersion: false, }, // Partial SSML Support azure: { supportsSSML: true, supportLevel: "full", supportedTags: [ "speak", "prosody", "break", "emphasis", "voice", "phoneme", "say-as", "sub", "p", "s", "mstts:express-as", ], unsupportedTags: [], requiresNamespace: true, requiresVersion: true, }, polly: { supportsSSML: true, supportLevel: "limited", // Depends on voice engine type supportedTags: [ "speak", "prosody", "break", "voice", "phoneme", "say-as", "sub", "p", "s", "mark", "lang", ], unsupportedTags: [], // Depends on voice engine type requiresNamespace: true, requiresVersion: false, }, google: { supportsSSML: true, supportLevel: "limited", // Depends on voice type supportedTags: [ "speak", "prosody", "break", "emphasis", "voice", "phoneme", "say-as", "sub", "p", "s", "mark", "lang", "audio", ], unsupportedTags: [], // Depends on voice type requiresNamespace: false, requiresVersion: false, }, // No SSML Support elevenlabs: { supportsSSML: false, supportLevel: "none", supportedTags: [], unsupportedTags: ["*"], requiresNamespace: false, requiresVersion: false, }, openai: { supportsSSML: false, supportLevel: "none", supportedTags: [], unsupportedTags: ["*"], requiresNamespace: false, requiresVersion: false, }, playht: { supportsSSML: false, supportLevel: "none", supportedTags: [], unsupportedTags: ["*"], requiresNamespace: false, requiresVersion: false, }, upliftai: { supportsSSML: false, supportLevel: "none", supportedTags: [], unsupportedTags: ["*"], requiresNamespace: false, requiresVersion: false, }, sherpaonnx: { supportsSSML: false, supportLevel: "none", supportedTags: [], unsupportedTags: ["*"], requiresNamespace: false, requiresVersion: false, }, "sherpaonnx-wasm": { supportsSSML: false, supportLevel: "none", supportedTags: [], unsupportedTags: ["*"], requiresNamespace: false, requiresVersion: false, }, espeak: { supportsSSML: true, supportLevel: "limited", supportedTags: ["speak", "prosody", "break", "emphasis", "p", "s"], unsupportedTags: ["voice", "phoneme", "say-as", "sub"], requiresNamespace: false, requiresVersion: false, }, "espeak-wasm": { supportsSSML: true, supportLevel: "limited", supportedTags: ["speak", "prosody", "break", "emphasis", "p", "s"], unsupportedTags: ["voice", "phoneme", "say-as", "sub"], requiresNamespace: false, requiresVersion: false, }, }; /** * Voice-specific SSML capabilities for engines with dynamic support */ const VOICE_SPECIFIC_CAPABILITIES = { // Amazon Polly voice engine types polly: { standard: { supportLevel: "full", unsupportedTags: [], }, "long-form": { supportLevel: "full", unsupportedTags: [], }, neural: { supportLevel: "limited", unsupportedTags: ["emphasis", "amazon:auto-breaths", "amazon:effect"], }, generative: { supportLevel: "limited", unsupportedTags: ["emphasis", "amazon:auto-breaths", "amazon:effect", "mark"], }, }, // Google Cloud TTS voice types google: { standard: { supportLevel: "full", unsupportedTags: [], }, wavenet: { supportLevel: "full", unsupportedTags: [], }, neural2: { supportLevel: "limited", unsupportedTags: ["mark"], }, journey: { supportLevel: "none", unsupportedTags: ["*"], }, studio: { supportLevel: "none", unsupportedTags: ["*"], }, }, }; /** * SSML Compatibility Manager */ // biome-ignore lint/complexity/noStaticOnlyClass: using a static utility class for organization class SSMLCompatibilityManager { /** * Get SSML capabilities for a specific engine and voice */ static getCapabilities(engine, voiceId) { const baseCapabilities = ENGINE_SSML_CAPABILITIES[engine]; if (!baseCapabilities) { // Default to no SSML support for unknown engines return { supportsSSML: false, supportLevel: "none", supportedTags: [], unsupportedTags: ["*"], requiresNamespace: false, requiresVersion: false, }; } // For engines with voice-specific capabilities, adjust based on voice if (voiceId && VOICE_SPECIFIC_CAPABILITIES[engine]) { const voiceCapabilities = SSMLCompatibilityManager.getVoiceSpecificCapabilities(engine, voiceId); if (voiceCapabilities) { return { ...baseCapabilities, supportLevel: voiceCapabilities.supportLevel, unsupportedTags: voiceCapabilities.unsupportedTags, }; } } return baseCapabilities; } /** * Get voice-specific SSML capabilities */ static getVoiceSpecificCapabilities(engine, voiceId) { const engineCapabilities = VOICE_SPECIFIC_CAPABILITIES[engine]; if (!engineCapabilities) return null; // Determine voice type based on voice ID patterns const voiceType = SSMLCompatibilityManager.detectVoiceType(engine, voiceId); return engineCapabilities[voiceType] || null; } /** * Detect voice type from voice ID */ static detectVoiceType(engine, voiceId) { const lowerVoiceId = voiceId.toLowerCase(); switch (engine) { case "polly": // Amazon Polly voice engine detection if (lowerVoiceId.includes("neural")) return "neural"; if (lowerVoiceId.includes("generative")) return "generative"; if (lowerVoiceId.includes("long-form")) return "long-form"; return "standard"; case "google": // Google Cloud TTS voice type detection if (lowerVoiceId.includes("neural2")) return "neural2"; if (lowerVoiceId.includes("journey")) return "journey"; if (lowerVoiceId.includes("studio")) return "studio"; if (lowerVoiceId.includes("wavenet")) return "wavenet"; if (lowerVoiceId.includes("standard")) return "standard"; // Default to standard for older voice naming return "standard"; default: return "default"; } } /** * Validate SSML for a specific engine */ static validateSSML(ssml, engine, voiceId) { const capabilities = SSMLCompatibilityManager.getCapabilities(engine, voiceId); const errors = []; const warnings = []; // Basic SSML structure validation if (!ssml.trim().startsWith("<speak") || !ssml.trim().endsWith("</speak>")) { errors.push("SSML must be wrapped in <speak> tags"); } // Check if engine supports SSML at all if (!capabilities.supportsSSML) { warnings.push(`Engine '${engine}' does not support SSML. Tags will be stripped.`); return { isValid: true, // Valid for processing (will be stripped) errors, warnings, }; } // Validate unsupported tags if (capabilities.unsupportedTags.includes("*")) { warnings.push(`Engine '${engine}' does not support any SSML tags. All tags will be stripped.`); } else { for (const unsupportedTag of capabilities.unsupportedTags) { const tagRegex = new RegExp(`<${unsupportedTag}[^>]*>`, "gi"); if (tagRegex.test(ssml)) { warnings.push(`Tag '<${unsupportedTag}>' is not supported by engine '${engine}' and will be removed.`); } } } // Check for required attributes if (capabilities.requiresNamespace && !ssml.includes("xmlns=")) { warnings.push(`Engine '${engine}' requires xmlns attribute in <speak> tag.`); } if (capabilities.requiresVersion && !ssml.includes("version=")) { warnings.push(`Engine '${engine}' requires version attribute in <speak> tag.`); } return { isValid: errors.length === 0, errors, warnings, }; } /** * Process SSML for engine compatibility */ static processSSMLForEngine(ssml, engine, voiceId) { const capabilities = SSMLCompatibilityManager.getCapabilities(engine, voiceId); // If engine doesn't support SSML, strip all tags if (!capabilities.supportsSSML) { return SSMLCompatibilityManager.stripAllSSMLTags(ssml); } let processedSSML = ssml; // Remove unsupported tags if (capabilities.unsupportedTags.includes("*")) { return SSMLCompatibilityManager.stripAllSSMLTags(ssml); } for (const unsupportedTag of capabilities.unsupportedTags) { processedSSML = SSMLCompatibilityManager.removeSSMLTag(processedSSML, unsupportedTag); } // Add required attributes processedSSML = SSMLCompatibilityManager.addRequiredAttributes(processedSSML, capabilities); return processedSSML; } /** * Strip all SSML tags from text */ static stripAllSSMLTags(ssml) { let result = ssml; // Remove all SSML tags while preserving content // Use a more comprehensive approach to handle nested tags result = result.replace(/<speak[^>]*>/gi, ""); result = result.replace(/<\/speak>/gi, ""); result = result.replace(/<break[^>]*\/?>/gi, " "); // Handle nested tags by repeatedly removing them let previousResult = ""; while (result !== previousResult) { previousResult = result; result = result.replace(/<emphasis[^>]*>(.*?)<\/emphasis>/gis, "$1"); result = result.replace(/<prosody[^>]*>(.*?)<\/prosody>/gis, "$1"); result = result.replace(/<voice[^>]*>(.*?)<\/voice>/gis, "$1"); result = result.replace(/<say-as[^>]*>(.*?)<\/say-as>/gis, "$1"); result = result.replace(/<phoneme[^>]*>(.*?)<\/phoneme>/gis, "$1"); result = result.replace(/<sub[^>]*>(.*?)<\/sub>/gis, "$1"); result = result.replace(/<p[^>]*>(.*?)<\/p>/gis, "$1 "); result = result.replace(/<s[^>]*>(.*?)<\/s>/gis, "$1 "); result = result.replace(/<lang[^>]*>(.*?)<\/lang>/gis, "$1"); result = result.replace(/<audio[^>]*>(.*?)<\/audio>/gis, "$1"); result = result.replace(/<mark[^>]*\/?>/gi, ""); // Remove any remaining XML-like tags result = result.replace(/<[^>]+>/g, ""); } // Clean up whitespace result = result.replace(/\s+/g, " ").trim(); return result; } /** * Remove specific SSML tag */ static removeSSMLTag(ssml, tagName) { let result = ssml; // Remove self-closing tags const selfClosingRegex = new RegExp(`<${tagName}[^>]*\\/>`, "gi"); result = result.replace(selfClosingRegex, ""); // Remove paired tags, keeping content const pairedRegex = new RegExp(`<${tagName}[^>]*>(.*?)<\\/${tagName}>`, "gi"); result = result.replace(pairedRegex, "$1"); return result; } /** * Add required attributes to SSML */ static addRequiredA