UNPKG

js-tts-wrapper

Version:

A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services

1,378 lines (1,368 loc) 581 kB
(function (global, factory) { typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports, require('path')) : typeof define === 'function' && define.amd ? define(['exports', 'path'], factory) : (global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global.JSTTSWrapper = {}, global.require$$0)); })(this, (function (exports, require$$0) { 'use strict'; function _mergeNamespaces(n, m) { m.forEach(function (e) { e && typeof e !== 'string' && !Array.isArray(e) && Object.keys(e).forEach(function (k) { if (k !== 'default' && !(k in n)) { var d = Object.getOwnPropertyDescriptor(e, k); Object.defineProperty(n, k, d.get ? d : { enumerable: true, get: function () { return e[k]; } }); } }); }); return Object.freeze(n); } /** * SSML Builder class for creating SSML markup */ class SSMLBuilder { constructor() { this.ssml = ""; } /** * Add text or SSML to the builder * @param text Text or SSML to add * @returns The SSML string */ add(text) { // If text doesn't start with <speak>, wrap it if (text.trim().startsWith("<speak")) { this.ssml = text; } else { this.ssml = `<speak>${text}</speak>`; } return this.ssml; } /** * Add a break to the SSML * @param time Break duration (e.g., '500ms') * @returns The SSML builder instance */ addBreak(time = "500ms") { this.ssml = this.ssml.replace("</speak>", `<break time="${time}"/></speak>`); return this; } /** * Add prosody element to the SSML * @param text Text to wrap with prosody * @param rate Speech rate * @param pitch Speech pitch * @param volume Speech volume * @returns The SSML builder instance */ addProsody(text, rate, pitch, volume) { let prosodyAttrs = ""; if (rate) prosodyAttrs += ` rate="${rate}"`; if (pitch) prosodyAttrs += ` pitch="${pitch}"`; if (volume) prosodyAttrs += ` volume="${volume}"`; const prosodyElement = `<prosody${prosodyAttrs}>${text}</prosody>`; if (this.ssml.includes("<speak>")) { this.ssml = this.ssml.replace("<speak>", `<speak>${prosodyElement}`); } else { this.ssml = `<speak>${prosodyElement}</speak>`; } return this; } /** * Wrap text with speak tags * @param text Text to wrap * @returns SSML string with speak tags */ wrapWithSpeak(text) { if (!text.trim().startsWith("<speak")) { return `<speak>${text}</speak>`; } return text; } /** * Clear the SSML content */ clearSSML() { this.ssml = ""; } /** * Get the current SSML string * @returns The current SSML string */ toString() { return this.ssml; } } /** * Language utilities for normalizing language codes across different formats */ /** * Language normalization utilities */ class LanguageNormalizer { /** * Normalize a language code to standard formats * @param langCode Input language code (can be ISO639-1/2/3, BCP47, or locale) * @param countryCode Optional country code to help with regionalization * @returns StandardizedLanguage object containing normalized codes */ static normalize(langCode, countryCode) { try { // Handle MMS prefix if present if (langCode.startsWith("mms_")) { langCode = langCode.substring(4); } // Parse the language code let language; let region; // Check if it's a BCP-47 code with region (e.g., en-US) if (langCode.includes("-")) { const parts = langCode.split("-"); language = parts[0].toLowerCase(); region = parts[1].toUpperCase(); } else { language = langCode.toLowerCase(); region = countryCode === null || countryCode === void 0 ? void 0 : countryCode.toUpperCase(); } // Convert to ISO 639-3 const iso639_3 = LanguageNormalizer.iso1To3[language] || language; // Create BCP-47 tag const bcp47 = region ? `${language}-${region}` : language; // Create display name let display = LanguageNormalizer.languageNames[language] || language; if (region && LanguageNormalizer.regionNames[region]) { display += ` (${LanguageNormalizer.regionNames[region]})`; } else if (region) { display += ` (${region})`; } return { iso639_3, bcp47, display, countryCode: region, }; } catch (_error) { // Fallback for unknown codes return { iso639_3: "und", bcp47: "und", display: "Unknown", }; } } /** * Get the display name for a language code * @param langCode Language code * @returns Display name */ static getDisplayName(langCode) { return LanguageNormalizer.normalize(langCode).display; } /** * Get the ISO 639-3 code for a language code * @param langCode Language code * @returns ISO 639-3 code */ static getISO639_3(langCode) { return LanguageNormalizer.normalize(langCode).iso639_3; } /** * Get the BCP-47 tag for a language code * @param langCode Language code * @param countryCode Optional country code * @returns BCP-47 tag */ static getBCP47(langCode, countryCode) { return LanguageNormalizer.normalize(langCode, countryCode).bcp47; } } /** * Common language display names */ LanguageNormalizer.languageNames = { en: "English", fr: "French", es: "Spanish", de: "German", it: "Italian", ja: "Japanese", ko: "Korean", zh: "Chinese", ru: "Russian", pt: "Portuguese", ar: "Arabic", hi: "Hindi", nl: "Dutch", sv: "Swedish", fi: "Finnish", no: "Norwegian", da: "Danish", pl: "Polish", tr: "Turkish", cs: "Czech", hu: "Hungarian", el: "Greek", he: "Hebrew", th: "Thai", vi: "Vietnamese", id: "Indonesian", ms: "Malay", ro: "Romanian", sk: "Slovak", uk: "Ukrainian", bg: "Bulgarian", hr: "Croatian", lt: "Lithuanian", lv: "Latvian", et: "Estonian", sl: "Slovenian", sr: "Serbian", }; /** * Common region display names */ LanguageNormalizer.regionNames = { US: "United States", GB: "United Kingdom", AU: "Australia", CA: "Canada", IN: "India", IE: "Ireland", ZA: "South Africa", NZ: "New Zealand", FR: "France", DE: "Germany", IT: "Italy", ES: "Spain", MX: "Mexico", JP: "Japan", KR: "Korea", CN: "China", TW: "Taiwan", HK: "Hong Kong", BR: "Brazil", PT: "Portugal", RU: "Russia", }; /** * ISO 639-1 to ISO 639-3 mapping */ LanguageNormalizer.iso1To3 = { ar: "ara", bg: "bul", ca: "cat", cs: "ces", da: "dan", de: "deu", el: "ell", en: "eng", es: "spa", et: "est", fi: "fin", fr: "fra", he: "heb", hi: "hin", hr: "hrv", hu: "hun", id: "ind", it: "ita", ja: "jpn", ko: "kor", lt: "lit", lv: "lav", ms: "msa", nl: "nld", no: "nor", pl: "pol", pt: "por", ro: "ron", ru: "rus", sk: "slk", sl: "slv", sr: "srp", sv: "swe", th: "tha", tr: "tur", uk: "ukr", vi: "vie", zh: "zho", }; /** * SSML Compatibility Layer * * This module provides cross-engine SSML compatibility by: * 1. Validating SSML structure * 2. Converting SSML to engine-specific formats * 3. Providing fallbacks for unsupported features * 4. Ensuring proper SSML nesting and structure */ /** * SSML capabilities for different TTS engines */ const ENGINE_SSML_CAPABILITIES = { // Full SSML Support sapi: { supportsSSML: true, supportLevel: "full", supportedTags: [ "speak", "prosody", "break", "emphasis", "voice", "phoneme", "say-as", "sub", "p", "s", ], unsupportedTags: [], requiresNamespace: false, requiresVersion: true, }, witai: { supportsSSML: true, supportLevel: "full", supportedTags: [ "speak", "prosody", "break", "emphasis", "voice", "phoneme", "say-as", "sub", "p", "s", ], unsupportedTags: [], requiresNamespace: false, requiresVersion: false, }, watson: { supportsSSML: true, supportLevel: "full", supportedTags: [ "speak", "prosody", "break", "emphasis", "voice", "phoneme", "say-as", "sub", "p", "s", ], unsupportedTags: [], requiresNamespace: false, requiresVersion: false, }, // Partial SSML Support azure: { supportsSSML: true, supportLevel: "full", supportedTags: [ "speak", "prosody", "break", "emphasis", "voice", "phoneme", "say-as", "sub", "p", "s", "mstts:express-as", ], unsupportedTags: [], requiresNamespace: true, requiresVersion: true, }, polly: { supportsSSML: true, supportLevel: "limited", // Depends on voice engine type supportedTags: [ "speak", "prosody", "break", "voice", "phoneme", "say-as", "sub", "p", "s", "mark", "lang", ], unsupportedTags: [], // Depends on voice engine type requiresNamespace: true, requiresVersion: false, }, google: { supportsSSML: true, supportLevel: "limited", // Depends on voice type supportedTags: [ "speak", "prosody", "break", "emphasis", "voice", "phoneme", "say-as", "sub", "p", "s", "mark", "lang", "audio", ], unsupportedTags: [], // Depends on voice type requiresNamespace: false, requiresVersion: false, }, // No SSML Support elevenlabs: { supportsSSML: false, supportLevel: "none", supportedTags: [], unsupportedTags: ["*"], requiresNamespace: false, requiresVersion: false, }, openai: { supportsSSML: false, supportLevel: "none", supportedTags: [], unsupportedTags: ["*"], requiresNamespace: false, requiresVersion: false, }, playht: { supportsSSML: false, supportLevel: "none", supportedTags: [], unsupportedTags: ["*"], requiresNamespace: false, requiresVersion: false, }, upliftai: { supportsSSML: false, supportLevel: "none", supportedTags: [], unsupportedTags: ["*"], requiresNamespace: false, requiresVersion: false, }, sherpaonnx: { supportsSSML: false, supportLevel: "none", supportedTags: [], unsupportedTags: ["*"], requiresNamespace: false, requiresVersion: false, }, "sherpaonnx-wasm": { supportsSSML: false, supportLevel: "none", supportedTags: [], unsupportedTags: ["*"], requiresNamespace: false, requiresVersion: false, }, espeak: { supportsSSML: true, supportLevel: "limited", supportedTags: ["speak", "prosody", "break", "emphasis", "p", "s"], unsupportedTags: ["voice", "phoneme", "say-as", "sub"], requiresNamespace: false, requiresVersion: false, }, "espeak-wasm": { supportsSSML: true, supportLevel: "limited", supportedTags: ["speak", "prosody", "break", "emphasis", "p", "s"], unsupportedTags: ["voice", "phoneme", "say-as", "sub"], requiresNamespace: false, requiresVersion: false, }, }; /** * Voice-specific SSML capabilities for engines with dynamic support */ const VOICE_SPECIFIC_CAPABILITIES = { // Amazon Polly voice engine types polly: { standard: { supportLevel: "full", unsupportedTags: [], }, "long-form": { supportLevel: "full", unsupportedTags: [], }, neural: { supportLevel: "limited", unsupportedTags: ["emphasis", "amazon:auto-breaths", "amazon:effect"], }, generative: { supportLevel: "limited", unsupportedTags: ["emphasis", "amazon:auto-breaths", "amazon:effect", "mark"], }, }, // Google Cloud TTS voice types google: { standard: { supportLevel: "full", unsupportedTags: [], }, wavenet: { supportLevel: "full", unsupportedTags: [], }, neural2: { supportLevel: "limited", unsupportedTags: ["mark"], }, journey: { supportLevel: "none", unsupportedTags: ["*"], }, studio: { supportLevel: "none", unsupportedTags: ["*"], }, }, }; /** * SSML Compatibility Manager */ // biome-ignore lint/complexity/noStaticOnlyClass: using a static utility class for organization class SSMLCompatibilityManager { /** * Get SSML capabilities for a specific engine and voice */ static getCapabilities(engine, voiceId) { const baseCapabilities = ENGINE_SSML_CAPABILITIES[engine]; if (!baseCapabilities) { // Default to no SSML support for unknown engines return { supportsSSML: false, supportLevel: "none", supportedTags: [], unsupportedTags: ["*"], requiresNamespace: false, requiresVersion: false, }; } // For engines with voice-specific capabilities, adjust based on voice if (voiceId && VOICE_SPECIFIC_CAPABILITIES[engine]) { const voiceCapabilities = SSMLCompatibilityManager.getVoiceSpecificCapabilities(engine, voiceId); if (voiceCapabilities) { return { ...baseCapabilities, supportLevel: voiceCapabilities.supportLevel, unsupportedTags: voiceCapabilities.unsupportedTags, }; } } return baseCapabilities; } /** * Get voice-specific SSML capabilities */ static getVoiceSpecificCapabilities(engine, voiceId) { const engineCapabilities = VOICE_SPECIFIC_CAPABILITIES[engine]; if (!engineCapabilities) return null; // Determine voice type based on voice ID patterns const voiceType = SSMLCompatibilityManager.detectVoiceType(engine, voiceId); return engineCapabilities[voiceType] || null; } /** * Detect voice type from voice ID */ static detectVoiceType(engine, voiceId) { const lowerVoiceId = voiceId.toLowerCase(); switch (engine) { case "polly": // Amazon Polly voice engine detection if (lowerVoiceId.includes("neural")) return "neural"; if (lowerVoiceId.includes("generative")) return "generative"; if (lowerVoiceId.includes("long-form")) return "long-form"; return "standard"; case "google": // Google Cloud TTS voice type detection if (lowerVoiceId.includes("neural2")) return "neural2"; if (lowerVoiceId.includes("journey")) return "journey"; if (lowerVoiceId.includes("studio")) return "studio"; if (lowerVoiceId.includes("wavenet")) return "wavenet"; if (lowerVoiceId.includes("standard")) return "standard"; // Default to standard for older voice naming return "standard"; default: return "default"; } } /** * Validate SSML for a specific engine */ static validateSSML(ssml, engine, voiceId) { const capabilities = SSMLCompatibilityManager.getCapabilities(engine, voiceId); const errors = []; const warnings = []; // Basic SSML structure validation if (!ssml.trim().startsWith("<speak") || !ssml.trim().endsWith("</speak>")) { errors.push("SSML must be wrapped in <speak> tags"); } // Check if engine supports SSML at all if (!capabilities.supportsSSML) { warnings.push(`Engine '${engine}' does not support SSML. Tags will be stripped.`); return { isValid: true, // Valid for processing (will be stripped) errors, warnings, }; } // Validate unsupported tags if (capabilities.unsupportedTags.includes("*")) { warnings.push(`Engine '${engine}' does not support any SSML tags. All tags will be stripped.`); } else { for (const unsupportedTag of capabilities.unsupportedTags) { const tagRegex = new RegExp(`<${unsupportedTag}[^>]*>`, "gi"); if (tagRegex.test(ssml)) { warnings.push(`Tag '<${unsupportedTag}>' is not supported by engine '${engine}' and will be removed.`); } } } // Check for required attributes if (capabilities.requiresNamespace && !ssml.includes("xmlns=")) { warnings.push(`Engine '${engine}' requires xmlns attribute in <speak> tag.`); } if (capabilities.requiresVersion && !ssml.includes("version=")) { warnings.push(`Engine '${engine}' requires version attribute in <speak> tag.`); } return { isValid: errors.length === 0, errors, warnings, }; } /** * Process SSML for engine compatibility */ static processSSMLForEngine(ssml, engine, voiceId) { const capabilities = SSMLCompatibilityManager.getCapabilities(engine, voiceId); // If engine doesn't support SSML, strip all tags if (!capabilities.supportsSSML) { return SSMLCompatibilityManager.stripAllSSMLTags(ssml); } let processedSSML = ssml; // Remove unsupported tags if (capabilities.unsupportedTags.includes("*")) { return SSMLCompatibilityManager.stripAllSSMLTags(ssml); } for (const unsupportedTag of capabilities.unsupportedTags) { processedSSML = SSMLCompatibilityManager.removeSSMLTag(processedSSML, unsupportedTag); } // Add required attributes processedSSML = SSMLCompatibilityManager.addRequiredAttributes(processedSSML, capabilities); return processedSSML; } /** * Strip all SSML tags from text */ static stripAllSSMLTags(ssml) { let result = ssml; // Remove all SSML tags while preserving content // Use a more comprehensive approach to handle nested tags result = result.replace(/<speak[^>]*>/gi, ""); result = result.replace(/<\/speak>/gi, ""); result = result.replace(/<break[^>]*\/?>/gi, " "); // Handle nested tags by repeatedly removing them let previousResult = ""; while (result !== previousResult) { previousResult = result; result = result.replace(/<emphasis[^>]*>(.*?)<\/emphasis>/gis, "$1"); result = result.replace(/<prosody[^>]*>(.*?)<\/prosody>/gis, "$1"); result = result.replace(/<voice[^>]*>(.*?)<\/voice>/gis, "$1"); result = result.replace(/<say-as[^>]*>(.*?)<\/say-as>/gis, "$1"); result = result.replace(/<phoneme[^>]*>(.*?)<\/phoneme>/gis, "$1"); result = result.replace(/<sub[^>]*>(.*?)<\/sub>/gis, "$1"); result = result.replace(/<p[^>]*>(.*?)<\/p>/gis, "$1 "); result = result.replace(/<s[^>]*>(.*?)<\/s>/gis, "$1 "); result = result.replace(/<lang[^>]*>(.*?)<\/lang>/gis, "$1"); result = result.replace(/<audio[^>]*>(.*?)<\/audio>/gis, "$1"); result = result.replace(/<mark[^>]*\/?>/gi, ""); // Remove any remaining XML-like tags result = result.replace(/<[^>]+>/g, ""); } // Clean up whitespace result = result.replace(/\s+/g, " ").trim(); return result; } /** * Remove specific SSML tag */ static removeSSMLTag(ssml, tagName) { let result = ssml; // Remove self-closing tags const selfClosingRegex = new RegExp(`<${tagName}[^>]*\\/>`, "gi"); result = result.replace(selfClosingRegex, ""); // Remove paired tags, keeping content const pairedRegex = new RegExp(`<${tagName}[^>]*>(.*?)<\\/${tagName}>`, "gi"); result = result.replace(pairedRegex, "$1"); return result; } /** * Add required attributes to SSML */ static addRequiredAttributes(ssml, capabilities) { let processedSSML = ssml; // Add namespace if required if (capabilities.requiresNamespace && !ssml.includes("xmlns=")) { processedSSML = processedSSML.replace(/<speak([^>]*)>/i, '<speak$1 xmlns="http://www.w3.org/2001/10/synthesis">'); } // Add version if required if (capabilities.requiresVersion && !ssml.includes("version=")) { processedSSML = processedSSML.replace(/<speak([^>]*)>/i, '<speak version="1.0"$1>'); } return processedSSML; } } /** * Check if text is SSML * @param text Text to check * @returns True if the text is SSML */ function isSSML(text) { return text.trim().startsWith("<speak") && text.trim().endsWith("</speak>"); } /** * Validate SSML for a specific engine * @param ssml SSML text to validate * @param engine Target TTS engine * @param voiceId Optional voice ID for voice-specific validation * @returns Validation result with errors and warnings */ function validateSSMLForEngine(ssml, engine, voiceId) { return SSMLCompatibilityManager.validateSSML(ssml, engine, voiceId); } /** * Process SSML for engine compatibility * @param ssml SSML text to process * @param engine Target TTS engine * @param voiceId Optional voice ID for voice-specific processing * @returns Processed SSML compatible with the target engine */ function processSSMLForEngine(ssml, engine, voiceId) { return SSMLCompatibilityManager.processSSMLForEngine(ssml, engine, voiceId); } /** * Strip SSML tags from text * @param ssml SSML text * @returns Plain text without SSML tags */ function stripSSML(ssml) { // Simple implementation - for production, consider using a proper XML parser return ssml .replace(/<speak.*?>/g, "") .replace(/<\/speak>/g, "") .replace(/<break.*?\/>/g, " ") .replace(/<emphasis.*?>(.*?)<\/emphasis>/g, "$1") .replace(/<prosody.*?>(.*?)<\/prosody>/g, "$1") .replace(/<voice.*?>(.*?)<\/voice>/g, "$1") .replace(/<say-as.*?>(.*?)<\/say-as>/g, "$1") .replace(/<phoneme.*?>(.*?)<\/phoneme>/g, "$1") .replace(/<sub.*?>(.*?)<\/sub>/g, "$1") .replace(/<p>(.*?)<\/p>/g, "$1 ") .replace(/<s>(.*?)<\/s>/g, "$1 ") .replace(/\s+/g, " ") .trim(); } /** * Wrap text with speak tags if not already present * @param text Text to wrap * @returns SSML with speak tags */ function wrapWithSpeakTags(text) { if (isSSML(text)) return text; return `<speak>${text}</speak>`; } /** * Environment detection and cross-platform utilities */ /** * Check if code is running in a browser environment */ const isBrowser$1 = typeof window !== "undefined"; /** * Check if code is running in a Node.js environment */ const isNode = !isBrowser$1 && typeof process !== "undefined" && typeof process.versions !== "undefined" && typeof process.versions.node !== "undefined"; /** * File system utilities that work in both environments */ const fileSystem = { /** * Read a file asynchronously * @param path Path to the file * @returns Promise resolving to the file contents as a string */ readFile: async (path) => { if (isNode) { // Node.js implementation const fs = await (new Function('m', 'return import(m)'))('node:fs/promises'); return fs.readFile(path, "utf-8"); } // Browser implementation - fetch from URL const response = await fetch(path); if (!response.ok) { throw new Error(`Failed to fetch ${path}: ${response.status} ${response.statusText}`); } return response.text(); }, /** * Read a file synchronously * @param path Path to the file * @returns File contents as a string */ readFileSync: (path) => { if (isNode) { // Node.js implementation // eslint-disable-next-line @typescript-eslint/no-var-requires const fs = (new Function('n', 'return require(n)'))('node' + ':fs'); return fs.readFileSync(path, "utf-8"); } throw new Error("Synchronous file reading is not supported in browsers"); }, /** * Write a file asynchronously * @param path Path to the file * @param data Data to write * @returns Promise resolving when the file is written */ writeFile: async (path, data) => { if (isNode) { // Node.js implementation const fs = await (new Function('m', 'return import(m)'))('node:fs/promises'); return fs.writeFile(path, data); } // Browser implementation - download file const blob = new Blob([data], { type: "application/octet-stream" }); const url = URL.createObjectURL(blob); const a = document.createElement("a"); a.href = url; a.download = path.split("/").pop() || "download"; document.body.appendChild(a); a.click(); setTimeout(() => { if (document === null || document === void 0 ? void 0 : document.body) { document.body.removeChild(a); } URL.revokeObjectURL(url); }, 100); }, /** * Write a file synchronously * @param path Path to the file * @param data Data to write */ writeFileSync: (path, data) => { if (isNode) { // Node.js implementation // eslint-disable-next-line @typescript-eslint/no-var-requires const fs = (new Function('n', 'return require(n)'))('node' + ':fs'); fs.writeFileSync(path, data); } else { throw new Error("Synchronous file writing is not supported in browsers"); } }, /** * Check if a file exists asynchronously * @param path Path to the file * @returns Promise resolving to true if the file exists, false otherwise */ exists: async (path) => { if (isNode) { // Node.js implementation const fs = await (new Function('m', 'return import(m)'))('node:fs/promises'); try { await fs.access(path); return true; } catch (_a) { return false; } } else { // Browser implementation - try to fetch try { const response = await fetch(path, { method: "HEAD" }); return response.ok; } catch (_b) { return false; } } }, /** * Check if a file exists synchronously * @param path Path to the file * @returns True if the file exists, false otherwise */ existsSync: (path) => { if (isNode) { // Node.js implementation // eslint-disable-next-line @typescript-eslint/no-var-requires const fs = (new Function('n', 'return require(n)'))('node' + ':fs'); return fs.existsSync(path); } throw new Error("Synchronous file existence check is not supported in browsers"); }, }; /** * Path utilities that work in both environments */ const pathUtils = { /** * Join path segments * @param paths Path segments to join * @returns Joined path */ join: (...paths) => { if (isNode) { // Node.js implementation // eslint-disable-next-line @typescript-eslint/no-var-requires const path = (new Function('n', 'return require(n)'))('node' + ':path'); return path.join(...paths); } // Browser implementation return paths.join("/").replace(/\/+/g, "/"); }, /** * Get the directory name of a path * @param path Path * @returns Directory name */ dirname: (path) => { if (isNode) { // Node.js implementation // eslint-disable-next-line @typescript-eslint/no-var-requires const nodePath = (new Function('n', 'return require(n)'))('node' + ':path'); return nodePath.dirname(path); } // Browser implementation return path.split("/").slice(0, -1).join("/") || "."; }, /** * Get the base name of a path * @param path Path * @returns Base name */ basename: (path) => { if (isNode) { // Node.js implementation // eslint-disable-next-line @typescript-eslint/no-var-requires const nodePath = (new Function('n', 'return require(n)'))('node' + ':path'); return nodePath.basename(path); } // Browser implementation return path.split("/").pop() || ""; }, /** * Get the extension of a path * @param path Path * @returns Extension */ extname: (path) => { if (isNode) { // Node.js implementation // eslint-disable-next-line @typescript-eslint/no-var-requires const nodePath = (new Function('n', 'return require(n)'))('node' + ':path'); return nodePath.extname(path); } // Browser implementation const basename = path.split("/").pop() || ""; const dotIndex = basename.lastIndexOf("."); return dotIndex === -1 ? "" : basename.slice(dotIndex); }, }; /** * Reads a ReadableStream<Uint8Array> (Web) or NodeJS.ReadableStream completely * and returns its contents as a single Buffer (in Node.js) or Uint8Array (in Browser). * @param stream The stream to read. * @returns A promise that resolves with the stream contents. */ async function streamToBuffer(stream // Use imported Readable type ) { const chunks = []; // Use a union type for chunks array let totalLength = 0; // Check if it's a Web ReadableStream (has getReader) if ("getReader" in stream && typeof stream.getReader === "function") { const reader = stream.getReader(); try { while (true) { const { done, value } = await reader.read(); if (done) { break; } if (value) { // value is Uint8Array from Web Stream chunks.push(value); // Store as Uint8Array initially totalLength += value.length; } } } finally { reader.releaseLock(); } // Concatenate AFTER the loop for Web Streams if (isNode) { // Use isNode constant // Convert Uint8Array chunks to Buffer before concatenating in Node const bufferChunks = chunks.map((chunk) => Buffer.from(chunk)); return Buffer.concat(bufferChunks, totalLength); } // Browser environment: Concatenate Uint8Array chunks const result = new Uint8Array(totalLength); let offset = 0; for (const chunk of chunks) { result.set(chunk, offset); offset += chunk.length; } return result; } if (typeof stream.on === "function") { // Use type assertion // Assume it's a Node.js Readable stream return new Promise((resolve, reject) => { // Explicitly assert stream type for event listeners const nodeStream = stream; nodeStream.on("data", (chunk) => { const bufferChunk = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk); chunks.push(bufferChunk); totalLength += bufferChunk.length; }); nodeStream.on("end", () => { // Concatenate collected Buffer chunks resolve(Buffer.concat(chunks, totalLength)); }); nodeStream.on("error", (err) => { // Type the error parameter reject(err); }); }); } // Handle unexpected stream type if it's neither Web nor Node stream throw new Error("Unsupported stream type provided to streamToBuffer"); } /** * Utility functions for handling different audio input sources */ /** * Validates that only one input source is provided */ function validateSpeakInput(input) { const inputCount = [input.text, input.filename, input.audioBytes, input.audioStream].filter(Boolean).length; if (inputCount === 0) { throw new Error("No input provided. Please provide text, filename, audioBytes, or audioStream."); } if (inputCount > 1) { throw new Error("Multiple input sources provided. Please provide only one of: text, filename, audioBytes, or audioStream."); } } /** * Determines the audio format from a filename extension */ function getAudioFormatFromFilename(filename) { const extension = filename.toLowerCase().split(".").pop(); switch (extension) { case "mp3": return "audio/mpeg"; case "wav": return "audio/wav"; case "ogg": return "audio/ogg"; case "opus": return "audio/opus"; case "aac": return "audio/aac"; case "flac": return "audio/flac"; default: return "audio/wav"; // Default fallback } } /** * Attempts to detect audio format from byte signature */ function detectAudioFormat(audioBytes) { if (audioBytes.length < 4) { return "audio/wav"; // Default fallback } // Check for common audio file signatures const header = Array.from(audioBytes.slice(0, 12)); // MP3 - ID3 tag or MPEG frame sync if ((header[0] === 0x49 && header[1] === 0x44 && header[2] === 0x33) || // ID3 (header[0] === 0xff && (header[1] & 0xe0) === 0xe0)) { // MPEG frame sync return "audio/mpeg"; } // WAV - RIFF header if (header[0] === 0x52 && header[1] === 0x49 && header[2] === 0x46 && header[3] === 0x46 && header[8] === 0x57 && header[9] === 0x41 && header[10] === 0x56 && header[11] === 0x45) { return "audio/wav"; } // OGG if (header[0] === 0x4f && header[1] === 0x67 && header[2] === 0x67 && header[3] === 0x53) { return "audio/ogg"; } // FLAC if (header[0] === 0x66 && header[1] === 0x4c && header[2] === 0x61 && header[3] === 0x43) { return "audio/flac"; } return "audio/wav"; // Default fallback } /** * Reads an audio file and returns its contents as Uint8Array * Only works in Node.js environment */ async function readAudioFile(filename) { if (!isNode) { throw new Error("File reading is only supported in Node.js environment"); } try { const fs = await (new Function('m', 'return import(m)'))('node:fs/promises'); const buffer = await fs.readFile(filename); return new Uint8Array(buffer); } catch (error) { throw new Error(`Failed to read audio file "${filename}": ${error instanceof Error ? error.message : String(error)}`); } } /** * Converts an audio stream to bytes */ async function streamToBytes(stream) { const result = await streamToBuffer(stream); // Convert Buffer to Uint8Array if needed (Node.js) if (result instanceof Buffer) { return new Uint8Array(result); } return result; } /** * Processes the input and returns audio bytes with format information */ async function processAudioInput(input) { validateSpeakInput(input); if (input.audioBytes) { return { audioBytes: input.audioBytes, mimeType: detectAudioFormat(input.audioBytes), }; } if (input.audioStream) { const audioBytes = await streamToBytes(input.audioStream); return { audioBytes, mimeType: detectAudioFormat(audioBytes), }; } if (input.filename) { const audioBytes = await readAudioFile(input.filename); return { audioBytes, mimeType: getAudioFormatFromFilename(input.filename), }; } throw new Error("No valid audio input provided"); } var audioInput = /*#__PURE__*/Object.freeze({ __proto__: null, detectAudioFormat: detectAudioFormat, getAudioFormatFromFilename: getAudioFormatFromFilename, processAudioInput: processAudioInput, readAudioFile: readAudioFile, streamToBytes: streamToBytes, validateSpeakInput: validateSpeakInput }); /** * Abstract base class for all TTS clients * This provides a unified interface for all TTS providers */ class AbstractTTSClient { /** * Creates a new TTS client * @param credentials Provider-specific credentials */ constructor(credentials) { this.credentials = credentials; /** * Currently selected voice ID */ this.voiceId = null; /** * Currently selected language */ this.lang = "en-US"; /** * Event callbacks */ this.callbacks = {}; /** * TTS properties (rate, pitch, volume) */ this.properties = { volume: 100, rate: "medium", pitch: "medium", }; /** * Word timings for the current audio */ this.timings = []; /** * Capability signaling for UIs to filter providers without hardcoding names * Engines can override these in their constructors. */ this.capabilities = { browserSupported: true, nodeSupported: true, needsWasm: false, }; /** * Audio sample rate in Hz * This is used for playback and word timing estimation * Default is 24000 Hz, but engines can override this */ this.sampleRate = 24000; this.ssml = new SSMLBuilder(); this.audio = { isPlaying: false, isPaused: false, audioElement: null, position: 0, duration: 0, }; } /** * Synthesize text to audio bytes with format conversion support * This is the recommended method when you need a specific audio format * @param text Text or SSML to synthesize * @param options Synthesis options including format (mp3, wav, ogg) * @returns Promise resolving to audio bytes in the requested format * @example * // Get MP3 audio * const mp3Bytes = await tts.synthToBytesWithFormat('Hello world', { format: 'mp3' }); * * // Get WAV audio (default) * const wavBytes = await tts.synthToBytesWithFormat('Hello world', { format: 'wav' }); */ async synthToBytesWithFormat(text, options) { return this.synthToBytesWithConversion(text, options); } // --- Format conversion support --- /** * Synthesize text to audio bytes with format conversion support * This method wraps the engine's native synthToBytes and adds format conversion * @param text Text or SSML to synthesize * @param options Synthesis options including format * @returns Promise resolving to audio bytes in the requested format */ async synthToBytesWithConversion(text, options) { // Get audio from the engine's native implementation const nativeAudioBytes = await this.synthToBytes(text, options); // If no format specified, return native audio if (!(options === null || options === void 0 ? void 0 : options.format)) { return nativeAudioBytes; } // Check if conversion is needed and available const requestedFormat = options.format; const nativeFormat = this.detectNativeFormat(nativeAudioBytes); // If already in requested format, return as-is if (nativeFormat === requestedFormat) { return nativeAudioBytes; } // Try to convert if conversion is available (Node only) if (!isNode) { console.warn(`Audio format conversion not available in browser. Returning native format (${nativeFormat}) instead of requested format (${requestedFormat})`); return nativeAudioBytes; } try { const { isAudioConversionAvailable, convertAudioFormat } = await (new Function('m', 'return import(m)'))('../utils/audio-converter'); if (isAudioConversionAvailable()) { try { const conversionResult = await convertAudioFormat(nativeAudioBytes, requestedFormat); return conversionResult.audioBytes; } catch (error) { console.warn(`Audio format conversion failed: ${error instanceof Error ? error.message : String(error)}`); console.warn(`Returning native format (${nativeFormat}) instead of requested format (${requestedFormat})`); } } else { console.warn(`Audio format conversion not available. Returning native format (${nativeFormat}) instead of requested format (${requestedFormat})`); } } catch (_a) { console.warn(`Audio converter not available at runtime; returning native format (${nativeFormat})`); } // Fallback: return native audio return nativeAudioBytes; } /** * Detect the native audio format produced by this engine * @param audioBytes Audio bytes to analyze * @returns Detected audio format */ detectNativeFormat(audioBytes) { const detectedMimeType = detectAudioFormat(audioBytes); switch (detectedMimeType) { case "audio/mpeg": return "mp3"; case "audio/ogg": return "ogg"; case "audio/wav": default: return "wav"; } } /** * Get available voices from the provider with normalized language codes * @returns Promise resolving to an array of unified voice objects */ async getVoices() { // Get raw voices from the engine-specific implementation const rawVoices = await this._getVoices(); // Process and normali