UNPKG

@ziplayer/plugin

Version:

A modular Discord voice player with plugin system

316 lines 12.3 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.TTSPlugin = void 0; const ziplayer_1 = require("ziplayer"); const stream_1 = require("stream"); const zitts_1 = require("@zibot/zitts"); const axios_1 = __importDefault(require("axios")); /** * A plugin for Text-to-Speech (TTS) functionality. * * This plugin provides support for: * - Converting text to speech using Google TTS * - Custom TTS providers via the createStream hook * - Multiple language support * - Configurable speech rate (normal/slow) * - TTS query parsing with language and speed options * * @example * const ttsPlugin = new TTSPlugin({ * defaultLang: "en", * slow: false * }); * * // Add to PlayerManager * const manager = new PlayerManager({ * plugins: [ttsPlugin] * }); * * // Search for TTS content * const result = await ttsPlugin.search("tts:Hello world", "user123"); * const stream = await ttsPlugin.getStream(result.tracks[0]); * * @example * // Custom TTS provider * const customTTSPlugin = new TTSPlugin({ * defaultLang: "en", * createStream: async (text, ctx) => { * // Custom TTS implementation * return customTTSProvider.synthesize(text, ctx.lang); * } * }); * * @since 1.0.0 */ class TTSPlugin extends ziplayer_1.BasePlugin { /** * Creates a new TTSPlugin instance. * * @param opts - Configuration options for the TTS plugin * @param opts.defaultLang - Default language code for TTS (default: "vi") * @param opts.slow - Whether to use slow speech rate (default: false) * @param opts.createStream - Optional custom TTS provider function * * @example * // Basic TTS with Vietnamese as default * const ttsPlugin = new TTSPlugin(); * * // TTS with English as default and slow speech * const slowTTSPlugin = new TTSPlugin({ * defaultLang: "en", * slow: true * }); * * // TTS with custom provider * const customTTSPlugin = new TTSPlugin({ * defaultLang: "en", * createStream: async (text, ctx) => { * return await myCustomTTS.synthesize(text, ctx.lang); * } * }); */ constructor(opts) { super(); this.name = "tts"; this.version = "1.0.0"; this.opts = { defaultLang: opts?.defaultLang || "vi", slow: !!opts?.slow, createStream: opts?.createStream, }; } /** * Determines if this plugin can handle the given query. * * @param query - The search query to check * @returns `true` if the query starts with "tts:" or "say ", `false` otherwise * * @example * plugin.canHandle("tts:Hello world"); // true * plugin.canHandle("say Hello world"); // true * plugin.canHandle("youtube.com/watch?v=123"); // false */ canHandle(query) { if (!query) return false; const q = query.trim().toLowerCase(); return q.startsWith("tts:") || q.startsWith("say "); } /** * Creates a TTS track from the given query. * * This method parses TTS queries and creates a track that can be played as audio. * It supports various query formats including language and speed specifications. * * @param query - The TTS query to process * @param requestedBy - The user ID who requested the TTS * @returns A SearchResult containing a single TTS track * * @example * // Basic TTS * const result = await plugin.search("tts:Hello world", "user123"); * * // TTS with specific language * const result2 = await plugin.search("tts:en:Hello world", "user123"); * * // TTS with language and slow speed * const result3 = await plugin.search("tts:en:true:Hello world", "user123"); * * // Using "say" prefix * const result4 = await plugin.search("say Hello world", "user123"); */ async search(query, requestedBy) { if (!this.canHandle(query)) { return { tracks: [] }; } const { text, lang, slow } = this.parseQuery(query); const config = { text, lang, slow }; const url = this.encodeConfig(config); const title = `TTS (${lang}${slow ? ", slow" : ""}): ${text.slice(0, 64)}${text.length > 64 ? "…" : ""}`; const estimatedSeconds = Math.max(1, Math.min(60, Math.ceil(text.length / 12))); const track = { id: `tts-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`, title, url, duration: estimatedSeconds, requestedBy, source: this.name, metadata: { tts: config }, }; return { tracks: [track] }; } /** * Generates an audio stream for a TTS track. * * This method converts the text in the track to speech using either the custom * TTS provider (if configured) or the built-in Google TTS service. It handles * various return types from custom providers and ensures proper stream formatting. * * @param track - The TTS track to convert to audio * @returns A StreamInfo object containing the audio stream * @throws {Error} If TTS generation fails or no audio URLs are returned * * @example * const track = { id: "tts-123", title: "TTS: Hello world", ... }; * const streamInfo = await plugin.getStream(track); * console.log(streamInfo.type); // "arbitrary" * console.log(streamInfo.stream); // Readable stream with audio */ async getStream(track) { const cfg = this.extractConfig(track); if (this.opts.createStream && typeof this.opts.createStream === "function") { const out = await this.opts.createStream(cfg.text, { lang: cfg.lang, slow: cfg.slow, track }); let type; let metadata; let stream = null; const normType = (t) => { if (!t || typeof t !== "string") return undefined; const v = t.toLowerCase(); if (v.includes("webm") && v.includes("opus")) return "webm/opus"; if (v.includes("ogg") && v.includes("opus")) return "ogg/opus"; return undefined; }; if (out && typeof out === "object") { // If it's already a Readable/Buffer/Uint8Array/ArrayBuffer/URL, let toReadable handle it if (out instanceof stream_1.Readable || out instanceof Buffer || out instanceof Uint8Array || out instanceof ArrayBuffer || out instanceof URL) { stream = await this.toReadable(out); } else if (out.stream) { const o = out; stream = o.stream; type = normType(o.type); metadata = o.metadata; } else if (out.url) { const o = out; const urlStr = o.url.toString(); try { type = normType(o.type) || (urlStr.endsWith(".webm") ? "webm/opus" : urlStr.endsWith(".ogg") ? "ogg/opus" : undefined); const res = await axios_1.default.get(urlStr, { responseType: "stream" }); stream = res.data; metadata = o.metadata; } catch (e) { throw new Error(`Failed to fetch custom TTS URL: ${e}`); } } } if (!stream) { stream = await this.toReadable(out); } return { stream, type: type || "arbitrary", metadata: { provider: "custom", ...(metadata || {}) } }; } const urls = (0, zitts_1.getTTSUrls)(cfg.text, { lang: cfg.lang, slow: cfg.slow }); if (!urls || urls.length === 0) { throw new Error("TTS returned no audio URLs"); } const parts = await Promise.all(urls.map((u) => axios_1.default.get(u, { responseType: "arraybuffer" }).then((r) => Buffer.from(r.data)))); const merged = Buffer.concat(parts); const stream = stream_1.Readable.from([merged]); return { stream, type: "arbitrary", metadata: { size: merged.length } }; } async toReadable(out) { if (out instanceof stream_1.Readable) return out; if (typeof out === "string" || out instanceof URL) { const url = out instanceof URL ? out.toString() : out; if (/^https?:\/\//i.test(url)) { const res = await axios_1.default.get(url, { responseType: "stream" }); return res.data; } return stream_1.Readable.from([Buffer.from(url)]); } if (out instanceof Buffer) return stream_1.Readable.from([out]); if (out instanceof Uint8Array) return stream_1.Readable.from([Buffer.from(out)]); if (out instanceof ArrayBuffer) return stream_1.Readable.from([Buffer.from(out)]); throw new Error("Unsupported return type from createStream"); } parseQuery(query) { const isLangCode = (s) => /^[a-z]{2,3}(?:-[A-Z]{2})?$/.test(s); const raw = query.trim(); let text = raw; let lang = this.opts.defaultLang; let slow = this.opts.slow; const lower = raw.toLowerCase(); if (lower.startsWith("say ")) { text = raw.slice(4).trim(); } else if (lower.startsWith("tts:")) { const body = raw.slice(4).trim(); // Supported: // - "tts: <text>" (text may contain colons) // - "tts:<lang>:<text>" // - "tts:<lang>:<slow>:<text>" where slow in {0,1,true,false} const firstSep = body.indexOf(":"); if (firstSep === -1) { text = body; } else { const maybeLang = body.slice(0, firstSep).trim(); const rest = body.slice(firstSep + 1).trim(); if (isLangCode(maybeLang)) { lang = maybeLang; const secondSep = rest.indexOf(":"); if (secondSep !== -1) { const maybeSlow = rest.slice(0, secondSep).trim().toLowerCase(); const remaining = rest.slice(secondSep + 1).trim(); if (["0", "1", "true", "false"].includes(maybeSlow)) { slow = maybeSlow === "1" || maybeSlow === "true"; text = remaining; } else { text = rest; } } else { text = rest; } } else { text = body; } } } text = (text || "").trim(); if (!text) throw new Error("No text provided for TTS"); return { text, lang, slow }; } encodeConfig(cfg) { const payload = encodeURIComponent(JSON.stringify(cfg)); return `tts://${payload}`; } extractConfig(track) { const meta = track.metadata?.tts; if (meta && meta.text) return meta; try { const url = track.url || ""; const encoded = url.startsWith("tts://") ? url.slice("tts://".length) : url; const cfg = JSON.parse(decodeURIComponent(encoded)); return { text: cfg.text, lang: cfg.lang || this.opts.defaultLang, slow: !!cfg.slow }; } catch { return { text: track.title || "", lang: this.opts.defaultLang, slow: this.opts.slow }; } } } exports.TTSPlugin = TTSPlugin; //# sourceMappingURL=TTSPlugin.js.map