speechflow
Version:
Speech Processing Flow Graph
198 lines • 8.6 kB
JavaScript
"use strict";
/*
** SpeechFlow - Speech Processing Flow Graph
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
/* standard dependencies */
const node_stream_1 = __importDefault(require("node:stream"));
/* external dependencies */
const get_stream_1 = require("get-stream");
const speex_resampler_1 = __importDefault(require("speex-resampler"));
const client_polly_1 = require("@aws-sdk/client-polly");
/* internal dependencies */
const speechflow_node_1 = __importDefault(require("./speechflow-node"));
const util = __importStar(require("./speechflow-util"));
/* SpeechFlow node for Amazon Polly text-to-speech conversion */
class SpeechFlowNodeT2AAmazon extends speechflow_node_1.default {
/* declare official node name */
static name = "t2a-amazon";
/* internal state */
client = null;
destroyed = false;
resampler = null;
/* construct node */
constructor(id, cfg, opts, args) {
super(id, cfg, opts, args);
/* declare node configuration parameters */
this.configure({
key: { type: "string", val: process.env.SPEECHFLOW_AMAZON_KEY },
secKey: { type: "string", val: process.env.SPEECHFLOW_AMAZON_KEY_SEC },
region: { type: "string", val: "eu-central-1" },
voice: { type: "string", val: "Amy", pos: 0, match: /^(?:Amy|Danielle|Joanna|Matthew|Ruth|Stephen|Vicki|Daniel)$/ },
language: { type: "string", val: "en", pos: 1, match: /^(?:de|en)$/ }
});
/* sanity check parameters */
if (!this.params.key)
throw new Error("AWS Access Key not configured");
if (!this.params.secKey)
throw new Error("AWS Secret Access Key not configured");
/* declare node input/output format */
this.input = "text";
this.output = "audio";
}
/* one-time status of node */
async status() {
return {};
}
/* open node */
async open() {
/* clear destruction flag */
this.destroyed = false;
/* establish AWS Polly connection */
this.client = new client_polly_1.PollyClient({
region: this.params.region,
credentials: {
accessKeyId: this.params.key,
secretAccessKey: this.params.secKey
}
});
if (this.client === null)
throw new Error("failed to establish AWS Polly client");
/* list of voices */
const voices = {
"Amy": { language: "en", languageCode: "en-GB", engine: "generative" },
"Danielle": { language: "en", languageCode: "en-US", engine: "generative" },
"Joanna": { language: "en", languageCode: "en-US", engine: "generative" },
"Matthew": { language: "en", languageCode: "en-US", engine: "generative" },
"Ruth": { language: "en", languageCode: "en-US", engine: "generative" },
"Stephen": { language: "en", languageCode: "en-US", engine: "generative" },
"Vicki": { language: "de", languageCode: "de-DE", engine: "generative" },
"Daniel": { language: "de", languageCode: "de-DE", engine: "generative" },
};
const voiceConfig = voices[this.params.voice];
if (voiceConfig === undefined)
throw new Error("unsupported voice");
if (voiceConfig.language !== this.params.language)
throw new Error(`voice does only support language "${voiceConfig.language}"`);
/* perform text-to-speech operation with AWS Polly API */
const textToSpeech = async (text) => {
const cmd = new client_polly_1.SynthesizeSpeechCommand({
LanguageCode: voiceConfig.languageCode,
Engine: voiceConfig.engine,
VoiceId: this.params.voice,
OutputFormat: "pcm",
SampleRate: "16000", /* maximum supported for PCM output */
TextType: "text",
Text: text
});
const res = await this.client.send(cmd);
const stream = res.AudioStream;
if (stream === null)
throw new Error("stream not returned");
const buffer = await (0, get_stream_1.getStreamAsBuffer)(stream);
const bufferResampled = this.resampler.processChunk(buffer);
return bufferResampled;
};
/* establish resampler from AWS Polly's maximum 16Khz output
(for PCM output) to our standard audio sample rate (48KHz) */
this.resampler = new speex_resampler_1.default(1, 16000, this.config.audioSampleRate, 7);
/* create transform stream and connect it to the AWS Polly API */
const self = this;
this.stream = new node_stream_1.default.Transform({
writableObjectMode: true,
readableObjectMode: true,
decodeStrings: false,
highWaterMark: 1,
transform(chunk, encoding, callback) {
if (self.destroyed) {
callback(new Error("stream already destroyed"));
return;
}
if (Buffer.isBuffer(chunk.payload))
callback(new Error("invalid chunk payload type"));
else if (chunk.payload.length > 0) {
self.log("debug", `send data (${chunk.payload.length} bytes): "${chunk.payload}"`);
textToSpeech(chunk.payload).then((buffer) => {
if (self.destroyed)
throw new Error("stream destroyed during processing");
const chunkNew = chunk.clone();
chunkNew.type = "audio";
chunkNew.payload = buffer;
this.push(chunkNew);
callback();
}).catch((error) => {
callback(util.ensureError(error, "failed to send to AWS Polly"));
});
}
else
callback();
},
final(callback) {
if (self.destroyed) {
callback();
return;
}
this.push(null);
callback();
}
});
}
/* close node */
async close() {
/* indicate destruction */
this.destroyed = true;
/* destroy resampler */
if (this.resampler !== null)
this.resampler = null;
/* destroy AWS Polly API */
if (this.client !== null) {
this.client.destroy();
this.client = null;
}
/* destroy stream */
if (this.stream !== null) {
this.stream.destroy();
this.stream = null;
}
}
}
exports.default = SpeechFlowNodeT2AAmazon;
//# sourceMappingURL=speechflow-node-t2a-amazon.js.map