js-tts-wrapper
Version:
A JavaScript/TypeScript library that provides a unified API for working with multiple cloud-based Text-to-Speech (TTS) services
1,378 lines (1,368 loc) • 581 kB
JavaScript
(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports, require('path')) :
typeof define === 'function' && define.amd ? define(['exports', 'path'], factory) :
(global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global.JSTTSWrapper = {}, global.require$$0));
})(this, (function (exports, require$$0) { 'use strict';
function _mergeNamespaces(n, m) {
m.forEach(function (e) {
e && typeof e !== 'string' && !Array.isArray(e) && Object.keys(e).forEach(function (k) {
if (k !== 'default' && !(k in n)) {
var d = Object.getOwnPropertyDescriptor(e, k);
Object.defineProperty(n, k, d.get ? d : {
enumerable: true,
get: function () { return e[k]; }
});
}
});
});
return Object.freeze(n);
}
/**
* SSML Builder class for creating SSML markup
*/
class SSMLBuilder {
constructor() {
this.ssml = "";
}
/**
* Add text or SSML to the builder
* @param text Text or SSML to add
* @returns The SSML string
*/
add(text) {
// If text doesn't start with <speak>, wrap it
if (text.trim().startsWith("<speak")) {
this.ssml = text;
}
else {
this.ssml = `<speak>${text}</speak>`;
}
return this.ssml;
}
/**
* Add a break to the SSML
* @param time Break duration (e.g., '500ms')
* @returns The SSML builder instance
*/
addBreak(time = "500ms") {
this.ssml = this.ssml.replace("</speak>", `<break time="${time}"/></speak>`);
return this;
}
/**
* Add prosody element to the SSML
* @param text Text to wrap with prosody
* @param rate Speech rate
* @param pitch Speech pitch
* @param volume Speech volume
* @returns The SSML builder instance
*/
addProsody(text, rate, pitch, volume) {
let prosodyAttrs = "";
if (rate)
prosodyAttrs += ` rate="${rate}"`;
if (pitch)
prosodyAttrs += ` pitch="${pitch}"`;
if (volume)
prosodyAttrs += ` volume="${volume}"`;
const prosodyElement = `<prosody${prosodyAttrs}>${text}</prosody>`;
if (this.ssml.includes("<speak>")) {
this.ssml = this.ssml.replace("<speak>", `<speak>${prosodyElement}`);
}
else {
this.ssml = `<speak>${prosodyElement}</speak>`;
}
return this;
}
/**
* Wrap text with speak tags
* @param text Text to wrap
* @returns SSML string with speak tags
*/
wrapWithSpeak(text) {
if (!text.trim().startsWith("<speak")) {
return `<speak>${text}</speak>`;
}
return text;
}
/**
* Clear the SSML content
*/
clearSSML() {
this.ssml = "";
}
/**
* Get the current SSML string
* @returns The current SSML string
*/
toString() {
return this.ssml;
}
}
/**
* Language utilities for normalizing language codes across different formats
*/
/**
* Language normalization utilities
*/
class LanguageNormalizer {
/**
* Normalize a language code to standard formats
* @param langCode Input language code (can be ISO639-1/2/3, BCP47, or locale)
* @param countryCode Optional country code to help with regionalization
* @returns StandardizedLanguage object containing normalized codes
*/
static normalize(langCode, countryCode) {
try {
// Handle MMS prefix if present
if (langCode.startsWith("mms_")) {
langCode = langCode.substring(4);
}
// Parse the language code
let language;
let region;
// Check if it's a BCP-47 code with region (e.g., en-US)
if (langCode.includes("-")) {
const parts = langCode.split("-");
language = parts[0].toLowerCase();
region = parts[1].toUpperCase();
}
else {
language = langCode.toLowerCase();
region = countryCode === null || countryCode === void 0 ? void 0 : countryCode.toUpperCase();
}
// Convert to ISO 639-3
const iso639_3 = LanguageNormalizer.iso1To3[language] || language;
// Create BCP-47 tag
const bcp47 = region ? `${language}-${region}` : language;
// Create display name
let display = LanguageNormalizer.languageNames[language] || language;
if (region && LanguageNormalizer.regionNames[region]) {
display += ` (${LanguageNormalizer.regionNames[region]})`;
}
else if (region) {
display += ` (${region})`;
}
return {
iso639_3,
bcp47,
display,
countryCode: region,
};
}
catch (_error) {
// Fallback for unknown codes
return {
iso639_3: "und",
bcp47: "und",
display: "Unknown",
};
}
}
/**
* Get the display name for a language code
* @param langCode Language code
* @returns Display name
*/
static getDisplayName(langCode) {
return LanguageNormalizer.normalize(langCode).display;
}
/**
* Get the ISO 639-3 code for a language code
* @param langCode Language code
* @returns ISO 639-3 code
*/
static getISO639_3(langCode) {
return LanguageNormalizer.normalize(langCode).iso639_3;
}
/**
* Get the BCP-47 tag for a language code
* @param langCode Language code
* @param countryCode Optional country code
* @returns BCP-47 tag
*/
static getBCP47(langCode, countryCode) {
return LanguageNormalizer.normalize(langCode, countryCode).bcp47;
}
}
/**
* Common language display names
*/
LanguageNormalizer.languageNames = {
en: "English",
fr: "French",
es: "Spanish",
de: "German",
it: "Italian",
ja: "Japanese",
ko: "Korean",
zh: "Chinese",
ru: "Russian",
pt: "Portuguese",
ar: "Arabic",
hi: "Hindi",
nl: "Dutch",
sv: "Swedish",
fi: "Finnish",
no: "Norwegian",
da: "Danish",
pl: "Polish",
tr: "Turkish",
cs: "Czech",
hu: "Hungarian",
el: "Greek",
he: "Hebrew",
th: "Thai",
vi: "Vietnamese",
id: "Indonesian",
ms: "Malay",
ro: "Romanian",
sk: "Slovak",
uk: "Ukrainian",
bg: "Bulgarian",
hr: "Croatian",
lt: "Lithuanian",
lv: "Latvian",
et: "Estonian",
sl: "Slovenian",
sr: "Serbian",
};
/**
* Common region display names
*/
LanguageNormalizer.regionNames = {
US: "United States",
GB: "United Kingdom",
AU: "Australia",
CA: "Canada",
IN: "India",
IE: "Ireland",
ZA: "South Africa",
NZ: "New Zealand",
FR: "France",
DE: "Germany",
IT: "Italy",
ES: "Spain",
MX: "Mexico",
JP: "Japan",
KR: "Korea",
CN: "China",
TW: "Taiwan",
HK: "Hong Kong",
BR: "Brazil",
PT: "Portugal",
RU: "Russia",
};
/**
* ISO 639-1 to ISO 639-3 mapping
*/
LanguageNormalizer.iso1To3 = {
ar: "ara",
bg: "bul",
ca: "cat",
cs: "ces",
da: "dan",
de: "deu",
el: "ell",
en: "eng",
es: "spa",
et: "est",
fi: "fin",
fr: "fra",
he: "heb",
hi: "hin",
hr: "hrv",
hu: "hun",
id: "ind",
it: "ita",
ja: "jpn",
ko: "kor",
lt: "lit",
lv: "lav",
ms: "msa",
nl: "nld",
no: "nor",
pl: "pol",
pt: "por",
ro: "ron",
ru: "rus",
sk: "slk",
sl: "slv",
sr: "srp",
sv: "swe",
th: "tha",
tr: "tur",
uk: "ukr",
vi: "vie",
zh: "zho",
};
/**
* SSML Compatibility Layer
*
* This module provides cross-engine SSML compatibility by:
* 1. Validating SSML structure
* 2. Converting SSML to engine-specific formats
* 3. Providing fallbacks for unsupported features
* 4. Ensuring proper SSML nesting and structure
*/
/**
* SSML capabilities for different TTS engines
*/
const ENGINE_SSML_CAPABILITIES = {
// Full SSML Support
sapi: {
supportsSSML: true,
supportLevel: "full",
supportedTags: [
"speak",
"prosody",
"break",
"emphasis",
"voice",
"phoneme",
"say-as",
"sub",
"p",
"s",
],
unsupportedTags: [],
requiresNamespace: false,
requiresVersion: true,
},
witai: {
supportsSSML: true,
supportLevel: "full",
supportedTags: [
"speak",
"prosody",
"break",
"emphasis",
"voice",
"phoneme",
"say-as",
"sub",
"p",
"s",
],
unsupportedTags: [],
requiresNamespace: false,
requiresVersion: false,
},
watson: {
supportsSSML: true,
supportLevel: "full",
supportedTags: [
"speak",
"prosody",
"break",
"emphasis",
"voice",
"phoneme",
"say-as",
"sub",
"p",
"s",
],
unsupportedTags: [],
requiresNamespace: false,
requiresVersion: false,
},
// Partial SSML Support
azure: {
supportsSSML: true,
supportLevel: "full",
supportedTags: [
"speak",
"prosody",
"break",
"emphasis",
"voice",
"phoneme",
"say-as",
"sub",
"p",
"s",
"mstts:express-as",
],
unsupportedTags: [],
requiresNamespace: true,
requiresVersion: true,
},
polly: {
supportsSSML: true,
supportLevel: "limited", // Depends on voice engine type
supportedTags: [
"speak",
"prosody",
"break",
"voice",
"phoneme",
"say-as",
"sub",
"p",
"s",
"mark",
"lang",
],
unsupportedTags: [], // Depends on voice engine type
requiresNamespace: true,
requiresVersion: false,
},
google: {
supportsSSML: true,
supportLevel: "limited", // Depends on voice type
supportedTags: [
"speak",
"prosody",
"break",
"emphasis",
"voice",
"phoneme",
"say-as",
"sub",
"p",
"s",
"mark",
"lang",
"audio",
],
unsupportedTags: [], // Depends on voice type
requiresNamespace: false,
requiresVersion: false,
},
// No SSML Support
elevenlabs: {
supportsSSML: false,
supportLevel: "none",
supportedTags: [],
unsupportedTags: ["*"],
requiresNamespace: false,
requiresVersion: false,
},
openai: {
supportsSSML: false,
supportLevel: "none",
supportedTags: [],
unsupportedTags: ["*"],
requiresNamespace: false,
requiresVersion: false,
},
playht: {
supportsSSML: false,
supportLevel: "none",
supportedTags: [],
unsupportedTags: ["*"],
requiresNamespace: false,
requiresVersion: false,
},
upliftai: {
supportsSSML: false,
supportLevel: "none",
supportedTags: [],
unsupportedTags: ["*"],
requiresNamespace: false,
requiresVersion: false,
},
sherpaonnx: {
supportsSSML: false,
supportLevel: "none",
supportedTags: [],
unsupportedTags: ["*"],
requiresNamespace: false,
requiresVersion: false,
},
"sherpaonnx-wasm": {
supportsSSML: false,
supportLevel: "none",
supportedTags: [],
unsupportedTags: ["*"],
requiresNamespace: false,
requiresVersion: false,
},
espeak: {
supportsSSML: true,
supportLevel: "limited",
supportedTags: ["speak", "prosody", "break", "emphasis", "p", "s"],
unsupportedTags: ["voice", "phoneme", "say-as", "sub"],
requiresNamespace: false,
requiresVersion: false,
},
"espeak-wasm": {
supportsSSML: true,
supportLevel: "limited",
supportedTags: ["speak", "prosody", "break", "emphasis", "p", "s"],
unsupportedTags: ["voice", "phoneme", "say-as", "sub"],
requiresNamespace: false,
requiresVersion: false,
},
};
/**
* Voice-specific SSML capabilities for engines with dynamic support
*/
const VOICE_SPECIFIC_CAPABILITIES = {
// Amazon Polly voice engine types
polly: {
standard: {
supportLevel: "full",
unsupportedTags: [],
},
"long-form": {
supportLevel: "full",
unsupportedTags: [],
},
neural: {
supportLevel: "limited",
unsupportedTags: ["emphasis", "amazon:auto-breaths", "amazon:effect"],
},
generative: {
supportLevel: "limited",
unsupportedTags: ["emphasis", "amazon:auto-breaths", "amazon:effect", "mark"],
},
},
// Google Cloud TTS voice types
google: {
standard: {
supportLevel: "full",
unsupportedTags: [],
},
wavenet: {
supportLevel: "full",
unsupportedTags: [],
},
neural2: {
supportLevel: "limited",
unsupportedTags: ["mark"],
},
journey: {
supportLevel: "none",
unsupportedTags: ["*"],
},
studio: {
supportLevel: "none",
unsupportedTags: ["*"],
},
},
};
/**
* SSML Compatibility Manager
*/
// biome-ignore lint/complexity/noStaticOnlyClass: using a static utility class for organization
class SSMLCompatibilityManager {
/**
* Get SSML capabilities for a specific engine and voice
*/
static getCapabilities(engine, voiceId) {
const baseCapabilities = ENGINE_SSML_CAPABILITIES[engine];
if (!baseCapabilities) {
// Default to no SSML support for unknown engines
return {
supportsSSML: false,
supportLevel: "none",
supportedTags: [],
unsupportedTags: ["*"],
requiresNamespace: false,
requiresVersion: false,
};
}
// For engines with voice-specific capabilities, adjust based on voice
if (voiceId &&
VOICE_SPECIFIC_CAPABILITIES[engine]) {
const voiceCapabilities = SSMLCompatibilityManager.getVoiceSpecificCapabilities(engine, voiceId);
if (voiceCapabilities) {
return {
...baseCapabilities,
supportLevel: voiceCapabilities.supportLevel,
unsupportedTags: voiceCapabilities.unsupportedTags,
};
}
}
return baseCapabilities;
}
/**
* Get voice-specific SSML capabilities
*/
static getVoiceSpecificCapabilities(engine, voiceId) {
const engineCapabilities = VOICE_SPECIFIC_CAPABILITIES[engine];
if (!engineCapabilities)
return null;
// Determine voice type based on voice ID patterns
const voiceType = SSMLCompatibilityManager.detectVoiceType(engine, voiceId);
return engineCapabilities[voiceType] || null;
}
/**
* Detect voice type from voice ID
*/
static detectVoiceType(engine, voiceId) {
const lowerVoiceId = voiceId.toLowerCase();
switch (engine) {
case "polly":
// Amazon Polly voice engine detection
if (lowerVoiceId.includes("neural"))
return "neural";
if (lowerVoiceId.includes("generative"))
return "generative";
if (lowerVoiceId.includes("long-form"))
return "long-form";
return "standard";
case "google":
// Google Cloud TTS voice type detection
if (lowerVoiceId.includes("neural2"))
return "neural2";
if (lowerVoiceId.includes("journey"))
return "journey";
if (lowerVoiceId.includes("studio"))
return "studio";
if (lowerVoiceId.includes("wavenet"))
return "wavenet";
if (lowerVoiceId.includes("standard"))
return "standard";
// Default to standard for older voice naming
return "standard";
default:
return "default";
}
}
/**
* Validate SSML for a specific engine
*/
static validateSSML(ssml, engine, voiceId) {
const capabilities = SSMLCompatibilityManager.getCapabilities(engine, voiceId);
const errors = [];
const warnings = [];
// Basic SSML structure validation
if (!ssml.trim().startsWith("<speak") || !ssml.trim().endsWith("</speak>")) {
errors.push("SSML must be wrapped in <speak> tags");
}
// Check if engine supports SSML at all
if (!capabilities.supportsSSML) {
warnings.push(`Engine '${engine}' does not support SSML. Tags will be stripped.`);
return {
isValid: true, // Valid for processing (will be stripped)
errors,
warnings,
};
}
// Validate unsupported tags
if (capabilities.unsupportedTags.includes("*")) {
warnings.push(`Engine '${engine}' does not support any SSML tags. All tags will be stripped.`);
}
else {
for (const unsupportedTag of capabilities.unsupportedTags) {
const tagRegex = new RegExp(`<${unsupportedTag}[^>]*>`, "gi");
if (tagRegex.test(ssml)) {
warnings.push(`Tag '<${unsupportedTag}>' is not supported by engine '${engine}' and will be removed.`);
}
}
}
// Check for required attributes
if (capabilities.requiresNamespace && !ssml.includes("xmlns=")) {
warnings.push(`Engine '${engine}' requires xmlns attribute in <speak> tag.`);
}
if (capabilities.requiresVersion && !ssml.includes("version=")) {
warnings.push(`Engine '${engine}' requires version attribute in <speak> tag.`);
}
return {
isValid: errors.length === 0,
errors,
warnings,
};
}
/**
* Process SSML for engine compatibility
*/
static processSSMLForEngine(ssml, engine, voiceId) {
const capabilities = SSMLCompatibilityManager.getCapabilities(engine, voiceId);
// If engine doesn't support SSML, strip all tags
if (!capabilities.supportsSSML) {
return SSMLCompatibilityManager.stripAllSSMLTags(ssml);
}
let processedSSML = ssml;
// Remove unsupported tags
if (capabilities.unsupportedTags.includes("*")) {
return SSMLCompatibilityManager.stripAllSSMLTags(ssml);
}
for (const unsupportedTag of capabilities.unsupportedTags) {
processedSSML = SSMLCompatibilityManager.removeSSMLTag(processedSSML, unsupportedTag);
}
// Add required attributes
processedSSML = SSMLCompatibilityManager.addRequiredAttributes(processedSSML, capabilities);
return processedSSML;
}
/**
* Strip all SSML tags from text
*/
static stripAllSSMLTags(ssml) {
let result = ssml;
// Remove all SSML tags while preserving content
// Use a more comprehensive approach to handle nested tags
result = result.replace(/<speak[^>]*>/gi, "");
result = result.replace(/<\/speak>/gi, "");
result = result.replace(/<break[^>]*\/?>/gi, " ");
// Handle nested tags by repeatedly removing them
let previousResult = "";
while (result !== previousResult) {
previousResult = result;
result = result.replace(/<emphasis[^>]*>(.*?)<\/emphasis>/gis, "$1");
result = result.replace(/<prosody[^>]*>(.*?)<\/prosody>/gis, "$1");
result = result.replace(/<voice[^>]*>(.*?)<\/voice>/gis, "$1");
result = result.replace(/<say-as[^>]*>(.*?)<\/say-as>/gis, "$1");
result = result.replace(/<phoneme[^>]*>(.*?)<\/phoneme>/gis, "$1");
result = result.replace(/<sub[^>]*>(.*?)<\/sub>/gis, "$1");
result = result.replace(/<p[^>]*>(.*?)<\/p>/gis, "$1 ");
result = result.replace(/<s[^>]*>(.*?)<\/s>/gis, "$1 ");
result = result.replace(/<lang[^>]*>(.*?)<\/lang>/gis, "$1");
result = result.replace(/<audio[^>]*>(.*?)<\/audio>/gis, "$1");
result = result.replace(/<mark[^>]*\/?>/gi, "");
// Remove any remaining XML-like tags
result = result.replace(/<[^>]+>/g, "");
}
// Clean up whitespace
result = result.replace(/\s+/g, " ").trim();
return result;
}
/**
* Remove specific SSML tag
*/
static removeSSMLTag(ssml, tagName) {
let result = ssml;
// Remove self-closing tags
const selfClosingRegex = new RegExp(`<${tagName}[^>]*\\/>`, "gi");
result = result.replace(selfClosingRegex, "");
// Remove paired tags, keeping content
const pairedRegex = new RegExp(`<${tagName}[^>]*>(.*?)<\\/${tagName}>`, "gi");
result = result.replace(pairedRegex, "$1");
return result;
}
/**
* Add required attributes to SSML
*/
static addRequiredAttributes(ssml, capabilities) {
let processedSSML = ssml;
// Add namespace if required
if (capabilities.requiresNamespace && !ssml.includes("xmlns=")) {
processedSSML = processedSSML.replace(/<speak([^>]*)>/i, '<speak$1 xmlns="http://www.w3.org/2001/10/synthesis">');
}
// Add version if required
if (capabilities.requiresVersion && !ssml.includes("version=")) {
processedSSML = processedSSML.replace(/<speak([^>]*)>/i, '<speak version="1.0"$1>');
}
return processedSSML;
}
}
/**
* Check if text is SSML
* @param text Text to check
* @returns True if the text is SSML
*/
function isSSML(text) {
return text.trim().startsWith("<speak") && text.trim().endsWith("</speak>");
}
/**
* Validate SSML for a specific engine
* @param ssml SSML text to validate
* @param engine Target TTS engine
* @param voiceId Optional voice ID for voice-specific validation
* @returns Validation result with errors and warnings
*/
function validateSSMLForEngine(ssml, engine, voiceId) {
return SSMLCompatibilityManager.validateSSML(ssml, engine, voiceId);
}
/**
* Process SSML for engine compatibility
* @param ssml SSML text to process
* @param engine Target TTS engine
* @param voiceId Optional voice ID for voice-specific processing
* @returns Processed SSML compatible with the target engine
*/
function processSSMLForEngine(ssml, engine, voiceId) {
return SSMLCompatibilityManager.processSSMLForEngine(ssml, engine, voiceId);
}
/**
* Strip SSML tags from text
* @param ssml SSML text
* @returns Plain text without SSML tags
*/
function stripSSML(ssml) {
// Simple implementation - for production, consider using a proper XML parser
return ssml
.replace(/<speak.*?>/g, "")
.replace(/<\/speak>/g, "")
.replace(/<break.*?\/>/g, " ")
.replace(/<emphasis.*?>(.*?)<\/emphasis>/g, "$1")
.replace(/<prosody.*?>(.*?)<\/prosody>/g, "$1")
.replace(/<voice.*?>(.*?)<\/voice>/g, "$1")
.replace(/<say-as.*?>(.*?)<\/say-as>/g, "$1")
.replace(/<phoneme.*?>(.*?)<\/phoneme>/g, "$1")
.replace(/<sub.*?>(.*?)<\/sub>/g, "$1")
.replace(/<p>(.*?)<\/p>/g, "$1 ")
.replace(/<s>(.*?)<\/s>/g, "$1 ")
.replace(/\s+/g, " ")
.trim();
}
/**
* Wrap text with speak tags if not already present
* @param text Text to wrap
* @returns SSML with speak tags
*/
function wrapWithSpeakTags(text) {
if (isSSML(text))
return text;
return `<speak>${text}</speak>`;
}
/**
* Environment detection and cross-platform utilities
*/
/**
* Check if code is running in a browser environment
*/
const isBrowser$1 = typeof window !== "undefined";
/**
* Check if code is running in a Node.js environment
*/
const isNode = !isBrowser$1 &&
typeof process !== "undefined" &&
typeof process.versions !== "undefined" &&
typeof process.versions.node !== "undefined";
/**
* File system utilities that work in both environments
*/
const fileSystem = {
/**
* Read a file asynchronously
* @param path Path to the file
* @returns Promise resolving to the file contents as a string
*/
readFile: async (path) => {
if (isNode) {
// Node.js implementation
const fs = await (new Function('m', 'return import(m)'))('node:fs/promises');
return fs.readFile(path, "utf-8");
}
// Browser implementation - fetch from URL
const response = await fetch(path);
if (!response.ok) {
throw new Error(`Failed to fetch ${path}: ${response.status} ${response.statusText}`);
}
return response.text();
},
/**
* Read a file synchronously
* @param path Path to the file
* @returns File contents as a string
*/
readFileSync: (path) => {
if (isNode) {
// Node.js implementation
// eslint-disable-next-line @typescript-eslint/no-var-requires
const fs = (new Function('n', 'return require(n)'))('node' + ':fs');
return fs.readFileSync(path, "utf-8");
}
throw new Error("Synchronous file reading is not supported in browsers");
},
/**
* Write a file asynchronously
* @param path Path to the file
* @param data Data to write
* @returns Promise resolving when the file is written
*/
writeFile: async (path, data) => {
if (isNode) {
// Node.js implementation
const fs = await (new Function('m', 'return import(m)'))('node:fs/promises');
return fs.writeFile(path, data);
}
// Browser implementation - download file
const blob = new Blob([data], { type: "application/octet-stream" });
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
a.download = path.split("/").pop() || "download";
document.body.appendChild(a);
a.click();
setTimeout(() => {
if (document === null || document === void 0 ? void 0 : document.body) {
document.body.removeChild(a);
}
URL.revokeObjectURL(url);
}, 100);
},
/**
* Write a file synchronously
* @param path Path to the file
* @param data Data to write
*/
writeFileSync: (path, data) => {
if (isNode) {
// Node.js implementation
// eslint-disable-next-line @typescript-eslint/no-var-requires
const fs = (new Function('n', 'return require(n)'))('node' + ':fs');
fs.writeFileSync(path, data);
}
else {
throw new Error("Synchronous file writing is not supported in browsers");
}
},
/**
* Check if a file exists asynchronously
* @param path Path to the file
* @returns Promise resolving to true if the file exists, false otherwise
*/
exists: async (path) => {
if (isNode) {
// Node.js implementation
const fs = await (new Function('m', 'return import(m)'))('node:fs/promises');
try {
await fs.access(path);
return true;
}
catch (_a) {
return false;
}
}
else {
// Browser implementation - try to fetch
try {
const response = await fetch(path, { method: "HEAD" });
return response.ok;
}
catch (_b) {
return false;
}
}
},
/**
* Check if a file exists synchronously
* @param path Path to the file
* @returns True if the file exists, false otherwise
*/
existsSync: (path) => {
if (isNode) {
// Node.js implementation
// eslint-disable-next-line @typescript-eslint/no-var-requires
const fs = (new Function('n', 'return require(n)'))('node' + ':fs');
return fs.existsSync(path);
}
throw new Error("Synchronous file existence check is not supported in browsers");
},
};
/**
* Path utilities that work in both environments
*/
const pathUtils = {
/**
* Join path segments
* @param paths Path segments to join
* @returns Joined path
*/
join: (...paths) => {
if (isNode) {
// Node.js implementation
// eslint-disable-next-line @typescript-eslint/no-var-requires
const path = (new Function('n', 'return require(n)'))('node' + ':path');
return path.join(...paths);
}
// Browser implementation
return paths.join("/").replace(/\/+/g, "/");
},
/**
* Get the directory name of a path
* @param path Path
* @returns Directory name
*/
dirname: (path) => {
if (isNode) {
// Node.js implementation
// eslint-disable-next-line @typescript-eslint/no-var-requires
const nodePath = (new Function('n', 'return require(n)'))('node' + ':path');
return nodePath.dirname(path);
}
// Browser implementation
return path.split("/").slice(0, -1).join("/") || ".";
},
/**
* Get the base name of a path
* @param path Path
* @returns Base name
*/
basename: (path) => {
if (isNode) {
// Node.js implementation
// eslint-disable-next-line @typescript-eslint/no-var-requires
const nodePath = (new Function('n', 'return require(n)'))('node' + ':path');
return nodePath.basename(path);
}
// Browser implementation
return path.split("/").pop() || "";
},
/**
* Get the extension of a path
* @param path Path
* @returns Extension
*/
extname: (path) => {
if (isNode) {
// Node.js implementation
// eslint-disable-next-line @typescript-eslint/no-var-requires
const nodePath = (new Function('n', 'return require(n)'))('node' + ':path');
return nodePath.extname(path);
}
// Browser implementation
const basename = path.split("/").pop() || "";
const dotIndex = basename.lastIndexOf(".");
return dotIndex === -1 ? "" : basename.slice(dotIndex);
},
};
/**
* Reads a ReadableStream<Uint8Array> (Web) or NodeJS.ReadableStream completely
* and returns its contents as a single Buffer (in Node.js) or Uint8Array (in Browser).
* @param stream The stream to read.
* @returns A promise that resolves with the stream contents.
*/
async function streamToBuffer(stream // Use imported Readable type
) {
const chunks = []; // Use a union type for chunks array
let totalLength = 0;
// Check if it's a Web ReadableStream (has getReader)
if ("getReader" in stream && typeof stream.getReader === "function") {
const reader = stream.getReader();
try {
while (true) {
const { done, value } = await reader.read();
if (done) {
break;
}
if (value) {
// value is Uint8Array from Web Stream
chunks.push(value); // Store as Uint8Array initially
totalLength += value.length;
}
}
}
finally {
reader.releaseLock();
}
// Concatenate AFTER the loop for Web Streams
if (isNode) {
// Use isNode constant
// Convert Uint8Array chunks to Buffer before concatenating in Node
const bufferChunks = chunks.map((chunk) => Buffer.from(chunk));
return Buffer.concat(bufferChunks, totalLength);
}
// Browser environment: Concatenate Uint8Array chunks
const result = new Uint8Array(totalLength);
let offset = 0;
for (const chunk of chunks) {
result.set(chunk, offset);
offset += chunk.length;
}
return result;
}
if (typeof stream.on === "function") {
// Use type assertion
// Assume it's a Node.js Readable stream
return new Promise((resolve, reject) => {
// Explicitly assert stream type for event listeners
const nodeStream = stream;
nodeStream.on("data", (chunk) => {
const bufferChunk = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
chunks.push(bufferChunk);
totalLength += bufferChunk.length;
});
nodeStream.on("end", () => {
// Concatenate collected Buffer chunks
resolve(Buffer.concat(chunks, totalLength));
});
nodeStream.on("error", (err) => {
// Type the error parameter
reject(err);
});
});
}
// Handle unexpected stream type if it's neither Web nor Node stream
throw new Error("Unsupported stream type provided to streamToBuffer");
}
/**
* Utility functions for handling different audio input sources
*/
/**
* Validates that only one input source is provided
*/
function validateSpeakInput(input) {
const inputCount = [input.text, input.filename, input.audioBytes, input.audioStream].filter(Boolean).length;
if (inputCount === 0) {
throw new Error("No input provided. Please provide text, filename, audioBytes, or audioStream.");
}
if (inputCount > 1) {
throw new Error("Multiple input sources provided. Please provide only one of: text, filename, audioBytes, or audioStream.");
}
}
/**
* Determines the audio format from a filename extension
*/
function getAudioFormatFromFilename(filename) {
const extension = filename.toLowerCase().split(".").pop();
switch (extension) {
case "mp3":
return "audio/mpeg";
case "wav":
return "audio/wav";
case "ogg":
return "audio/ogg";
case "opus":
return "audio/opus";
case "aac":
return "audio/aac";
case "flac":
return "audio/flac";
default:
return "audio/wav"; // Default fallback
}
}
/**
* Attempts to detect audio format from byte signature
*/
function detectAudioFormat(audioBytes) {
if (audioBytes.length < 4) {
return "audio/wav"; // Default fallback
}
// Check for common audio file signatures
const header = Array.from(audioBytes.slice(0, 12));
// MP3 - ID3 tag or MPEG frame sync
if ((header[0] === 0x49 && header[1] === 0x44 && header[2] === 0x33) || // ID3
(header[0] === 0xff && (header[1] & 0xe0) === 0xe0)) {
// MPEG frame sync
return "audio/mpeg";
}
// WAV - RIFF header
if (header[0] === 0x52 &&
header[1] === 0x49 &&
header[2] === 0x46 &&
header[3] === 0x46 &&
header[8] === 0x57 &&
header[9] === 0x41 &&
header[10] === 0x56 &&
header[11] === 0x45) {
return "audio/wav";
}
// OGG
if (header[0] === 0x4f && header[1] === 0x67 && header[2] === 0x67 && header[3] === 0x53) {
return "audio/ogg";
}
// FLAC
if (header[0] === 0x66 && header[1] === 0x4c && header[2] === 0x61 && header[3] === 0x43) {
return "audio/flac";
}
return "audio/wav"; // Default fallback
}
/**
* Reads an audio file and returns its contents as Uint8Array
* Only works in Node.js environment
*/
async function readAudioFile(filename) {
if (!isNode) {
throw new Error("File reading is only supported in Node.js environment");
}
try {
const fs = await (new Function('m', 'return import(m)'))('node:fs/promises');
const buffer = await fs.readFile(filename);
return new Uint8Array(buffer);
}
catch (error) {
throw new Error(`Failed to read audio file "${filename}": ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Converts an audio stream to bytes
*/
async function streamToBytes(stream) {
const result = await streamToBuffer(stream);
// Convert Buffer to Uint8Array if needed (Node.js)
if (result instanceof Buffer) {
return new Uint8Array(result);
}
return result;
}
/**
* Processes the input and returns audio bytes with format information
*/
async function processAudioInput(input) {
validateSpeakInput(input);
if (input.audioBytes) {
return {
audioBytes: input.audioBytes,
mimeType: detectAudioFormat(input.audioBytes),
};
}
if (input.audioStream) {
const audioBytes = await streamToBytes(input.audioStream);
return {
audioBytes,
mimeType: detectAudioFormat(audioBytes),
};
}
if (input.filename) {
const audioBytes = await readAudioFile(input.filename);
return {
audioBytes,
mimeType: getAudioFormatFromFilename(input.filename),
};
}
throw new Error("No valid audio input provided");
}
var audioInput = /*#__PURE__*/Object.freeze({
__proto__: null,
detectAudioFormat: detectAudioFormat,
getAudioFormatFromFilename: getAudioFormatFromFilename,
processAudioInput: processAudioInput,
readAudioFile: readAudioFile,
streamToBytes: streamToBytes,
validateSpeakInput: validateSpeakInput
});
/**
* Abstract base class for all TTS clients
* This provides a unified interface for all TTS providers
*/
class AbstractTTSClient {
/**
* Creates a new TTS client
* @param credentials Provider-specific credentials
*/
constructor(credentials) {
this.credentials = credentials;
/**
* Currently selected voice ID
*/
this.voiceId = null;
/**
* Currently selected language
*/
this.lang = "en-US";
/**
* Event callbacks
*/
this.callbacks = {};
/**
* TTS properties (rate, pitch, volume)
*/
this.properties = {
volume: 100,
rate: "medium",
pitch: "medium",
};
/**
* Word timings for the current audio
*/
this.timings = [];
/**
* Capability signaling for UIs to filter providers without hardcoding names
* Engines can override these in their constructors.
*/
this.capabilities = {
browserSupported: true,
nodeSupported: true,
needsWasm: false,
};
/**
* Audio sample rate in Hz
* This is used for playback and word timing estimation
* Default is 24000 Hz, but engines can override this
*/
this.sampleRate = 24000;
this.ssml = new SSMLBuilder();
this.audio = {
isPlaying: false,
isPaused: false,
audioElement: null,
position: 0,
duration: 0,
};
}
/**
* Synthesize text to audio bytes with format conversion support
* This is the recommended method when you need a specific audio format
* @param text Text or SSML to synthesize
* @param options Synthesis options including format (mp3, wav, ogg)
* @returns Promise resolving to audio bytes in the requested format
* @example
* // Get MP3 audio
* const mp3Bytes = await tts.synthToBytesWithFormat('Hello world', { format: 'mp3' });
*
* // Get WAV audio (default)
* const wavBytes = await tts.synthToBytesWithFormat('Hello world', { format: 'wav' });
*/
async synthToBytesWithFormat(text, options) {
return this.synthToBytesWithConversion(text, options);
}
// --- Format conversion support ---
/**
* Synthesize text to audio bytes with format conversion support
* This method wraps the engine's native synthToBytes and adds format conversion
* @param text Text or SSML to synthesize
* @param options Synthesis options including format
* @returns Promise resolving to audio bytes in the requested format
*/
async synthToBytesWithConversion(text, options) {
// Get audio from the engine's native implementation
const nativeAudioBytes = await this.synthToBytes(text, options);
// If no format specified, return native audio
if (!(options === null || options === void 0 ? void 0 : options.format)) {
return nativeAudioBytes;
}
// Check if conversion is needed and available
const requestedFormat = options.format;
const nativeFormat = this.detectNativeFormat(nativeAudioBytes);
// If already in requested format, return as-is
if (nativeFormat === requestedFormat) {
return nativeAudioBytes;
}
// Try to convert if conversion is available (Node only)
if (!isNode) {
console.warn(`Audio format conversion not available in browser. Returning native format (${nativeFormat}) instead of requested format (${requestedFormat})`);
return nativeAudioBytes;
}
try {
const { isAudioConversionAvailable, convertAudioFormat } = await (new Function('m', 'return import(m)'))('../utils/audio-converter');
if (isAudioConversionAvailable()) {
try {
const conversionResult = await convertAudioFormat(nativeAudioBytes, requestedFormat);
return conversionResult.audioBytes;
}
catch (error) {
console.warn(`Audio format conversion failed: ${error instanceof Error ? error.message : String(error)}`);
console.warn(`Returning native format (${nativeFormat}) instead of requested format (${requestedFormat})`);
}
}
else {
console.warn(`Audio format conversion not available. Returning native format (${nativeFormat}) instead of requested format (${requestedFormat})`);
}
}
catch (_a) {
console.warn(`Audio converter not available at runtime; returning native format (${nativeFormat})`);
}
// Fallback: return native audio
return nativeAudioBytes;
}
/**
* Detect the native audio format produced by this engine
* @param audioBytes Audio bytes to analyze
* @returns Detected audio format
*/
detectNativeFormat(audioBytes) {
const detectedMimeType = detectAudioFormat(audioBytes);
switch (detectedMimeType) {
case "audio/mpeg":
return "mp3";
case "audio/ogg":
return "ogg";
case "audio/wav":
default:
return "wav";
}
}
/**
* Get available voices from the provider with normalized language codes
* @returns Promise resolving to an array of unified voice objects
*/
async getVoices() {
// Get raw voices from the engine-specific implementation
const rawVoices = await this._getVoices();
// Process and normali