UNPKG

speechflow

Version:

Speech Processing Flow Graph

198 lines 8.6 kB
"use strict"; /* ** SpeechFlow - Speech Processing Flow Graph ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com> ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only> */ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); /* standard dependencies */ const node_stream_1 = __importDefault(require("node:stream")); /* external dependencies */ const get_stream_1 = require("get-stream"); const speex_resampler_1 = __importDefault(require("speex-resampler")); const client_polly_1 = require("@aws-sdk/client-polly"); /* internal dependencies */ const speechflow_node_1 = __importDefault(require("./speechflow-node")); const util = __importStar(require("./speechflow-util")); /* SpeechFlow node for Amazon Polly text-to-speech conversion */ class SpeechFlowNodeT2AAmazon extends speechflow_node_1.default { /* declare official node name */ static name = "t2a-amazon"; /* internal state */ client = null; destroyed = false; resampler = null; /* construct node */ constructor(id, cfg, opts, args) { super(id, cfg, opts, args); /* declare node configuration parameters */ this.configure({ key: { type: "string", val: process.env.SPEECHFLOW_AMAZON_KEY }, secKey: { type: "string", val: process.env.SPEECHFLOW_AMAZON_KEY_SEC }, region: { type: "string", val: "eu-central-1" }, voice: { type: "string", val: "Amy", pos: 0, match: /^(?:Amy|Danielle|Joanna|Matthew|Ruth|Stephen|Vicki|Daniel)$/ }, language: { type: "string", val: "en", pos: 1, match: /^(?:de|en)$/ } }); /* sanity check parameters */ if (!this.params.key) throw new Error("AWS Access Key not configured"); if (!this.params.secKey) throw new Error("AWS Secret Access Key not configured"); /* declare node input/output format */ this.input = "text"; this.output = "audio"; } /* one-time status of node */ async status() { return {}; } /* open node */ async open() { /* clear destruction flag */ this.destroyed = false; /* establish AWS Polly connection */ this.client = new client_polly_1.PollyClient({ region: this.params.region, credentials: { accessKeyId: this.params.key, secretAccessKey: this.params.secKey } }); if (this.client === null) throw new Error("failed to establish AWS Polly client"); /* list of voices */ const voices = { "Amy": { language: "en", languageCode: "en-GB", engine: "generative" }, "Danielle": { language: "en", languageCode: "en-US", engine: "generative" }, "Joanna": { language: "en", languageCode: "en-US", engine: "generative" }, "Matthew": { language: "en", languageCode: "en-US", engine: "generative" }, "Ruth": { language: "en", languageCode: "en-US", engine: "generative" }, "Stephen": { language: "en", languageCode: "en-US", engine: "generative" }, "Vicki": { language: "de", languageCode: "de-DE", engine: "generative" }, "Daniel": { language: "de", languageCode: "de-DE", engine: "generative" }, }; const voiceConfig = voices[this.params.voice]; if (voiceConfig === undefined) throw new Error("unsupported voice"); if (voiceConfig.language !== this.params.language) throw new Error(`voice does only support language "${voiceConfig.language}"`); /* perform text-to-speech operation with AWS Polly API */ const textToSpeech = async (text) => { const cmd = new client_polly_1.SynthesizeSpeechCommand({ LanguageCode: voiceConfig.languageCode, Engine: voiceConfig.engine, VoiceId: this.params.voice, OutputFormat: "pcm", SampleRate: "16000", /* maximum supported for PCM output */ TextType: "text", Text: text }); const res = await this.client.send(cmd); const stream = res.AudioStream; if (stream === null) throw new Error("stream not returned"); const buffer = await (0, get_stream_1.getStreamAsBuffer)(stream); const bufferResampled = this.resampler.processChunk(buffer); return bufferResampled; }; /* establish resampler from AWS Polly's maximum 16Khz output (for PCM output) to our standard audio sample rate (48KHz) */ this.resampler = new speex_resampler_1.default(1, 16000, this.config.audioSampleRate, 7); /* create transform stream and connect it to the AWS Polly API */ const self = this; this.stream = new node_stream_1.default.Transform({ writableObjectMode: true, readableObjectMode: true, decodeStrings: false, highWaterMark: 1, transform(chunk, encoding, callback) { if (self.destroyed) { callback(new Error("stream already destroyed")); return; } if (Buffer.isBuffer(chunk.payload)) callback(new Error("invalid chunk payload type")); else if (chunk.payload.length > 0) { self.log("debug", `send data (${chunk.payload.length} bytes): "${chunk.payload}"`); textToSpeech(chunk.payload).then((buffer) => { if (self.destroyed) throw new Error("stream destroyed during processing"); const chunkNew = chunk.clone(); chunkNew.type = "audio"; chunkNew.payload = buffer; this.push(chunkNew); callback(); }).catch((error) => { callback(util.ensureError(error, "failed to send to AWS Polly")); }); } else callback(); }, final(callback) { if (self.destroyed) { callback(); return; } this.push(null); callback(); } }); } /* close node */ async close() { /* indicate destruction */ this.destroyed = true; /* destroy resampler */ if (this.resampler !== null) this.resampler = null; /* destroy AWS Polly API */ if (this.client !== null) { this.client.destroy(); this.client = null; } /* destroy stream */ if (this.stream !== null) { this.stream.destroy(); this.stream = null; } } } exports.default = SpeechFlowNodeT2AAmazon; //# sourceMappingURL=speechflow-node-t2a-amazon.js.map