speechflow
Version:
Speech Processing Flow Graph
312 lines • 13.4 kB
JavaScript
"use strict";
/*
** SpeechFlow - Speech Processing Flow Graph
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
/* standard dependencies */
const node_stream_1 = __importDefault(require("node:stream"));
/* external dependencies */
const client_transcribe_streaming_1 = require("@aws-sdk/client-transcribe-streaming");
const luxon_1 = require("luxon");
/* internal dependencies */
const speechflow_node_1 = __importStar(require("./speechflow-node"));
const util = __importStar(require("./speechflow-util"));
/* helper class for an asynchronous queue */
class AsyncQueue {
queue = [];
resolvers = [];
push(v) {
const resolve = this.resolvers.shift();
if (resolve) {
if (v !== null)
resolve({ value: v });
else
resolve({ value: null, done: true });
}
else
this.queue.push(v);
}
destroy() {
while (this.resolvers.length > 0) {
const resolve = this.resolvers.shift();
resolve?.({ value: null, done: true });
}
this.queue.length = 0;
}
async *[Symbol.asyncIterator]() {
while (true) {
if (this.queue.length > 0) {
const v = this.queue.shift();
if (v === undefined || v === null)
return;
yield v;
continue;
}
else {
const it = await new Promise((resolve) => this.resolvers.push(resolve));
if (it.done)
return;
yield it.value;
}
}
}
}
/* SpeechFlow node for Amazon Transcribe speech-to-text conversion */
class SpeechFlowNodeA2TAmazon extends speechflow_node_1.default {
/* declare official node name */
static name = "a2t-amazon";
/* internal state */
client = null;
clientStream = null;
destroyed = false;
initTimeout = null;
connectionTimeout = null;
queue = null;
/* construct node */
constructor(id, cfg, opts, args) {
super(id, cfg, opts, args);
/* declare node configuration parameters */
this.configure({
key: { type: "string", val: process.env.SPEECHFLOW_AMAZON_KEY },
secKey: { type: "string", val: process.env.SPEECHFLOW_AMAZON_KEY_SEC },
region: { type: "string", val: "eu-central-1" },
language: { type: "string", val: "en", match: /^(?:de|en)$/ },
interim: { type: "boolean", val: false }
});
/* sanity check parameters */
if (!this.params.key)
throw new Error("AWS Access Key not configured");
if (!this.params.secKey)
throw new Error("AWS Secret Access Key not configured");
/* declare node input/output format */
this.input = "audio";
this.output = "text";
}
/* one-time status of node */
async status() {
return {};
}
/* open node */
async open() {
/* sanity check situation */
if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
throw new Error("Amazon Transcribe node currently supports PCM-S16LE audio only");
/* clear destruction flag */
this.destroyed = false;
/* create queue for results */
this.queue = new util.SingleQueue();
/* create a store for the meta information */
const metastore = new util.TimeStore();
/* connect to Amazon Transcribe API */
this.client = new client_transcribe_streaming_1.TranscribeStreamingClient({
region: this.params.region,
credentials: {
accessKeyId: this.params.key,
secretAccessKey: this.params.secKey
}
});
if (this.client === null)
throw new Error("failed to establish Amazon Transcribe client");
/* create an AudioStream for Amazon Transcribe */
const audioQueue = new AsyncQueue();
const audioStream = (async function* (q) {
for await (const chunk of q) {
yield { AudioEvent: { AudioChunk: chunk } };
}
})(audioQueue);
/* start streaming */
const ensureAudioStreamActive = async () => {
if (this.clientStream !== null || this.destroyed)
return;
const language = this.params.language === "de" ? "de-DE" : "en-US";
const command = new client_transcribe_streaming_1.StartStreamTranscriptionCommand({
LanguageCode: language,
EnablePartialResultsStabilization: this.params.interim,
...(this.params.interim ? { PartialResultsStability: "low" } : {}),
MediaEncoding: "pcm",
MediaSampleRateHertz: this.config.audioSampleRate,
AudioStream: audioStream,
});
const response = await this.client.send(command);
const stream = response.TranscriptResultStream;
if (!stream)
throw new Error("no TranscriptResultStream returned");
this.clientStream = stream;
(async () => {
for await (const event of stream) {
const te = event.TranscriptEvent;
if (!te?.Transcript?.Results)
continue;
for (const result of te.Transcript.Results) {
const alt = result.Alternatives?.[0];
if (!alt?.Transcript)
continue;
if (result.IsPartial && !this.params.interim)
continue;
const text = alt.Transcript ?? "";
const kind = result.IsPartial ? "intermediate" : "final";
const tsStart = luxon_1.Duration.fromMillis((result.StartTime ?? 0) * 1000).plus(this.timeZeroOffset);
const tsEnd = luxon_1.Duration.fromMillis((result.EndTime ?? 0) * 1000).plus(this.timeZeroOffset);
const metas = metastore.fetch(tsStart, tsEnd);
const meta = metas.reduce((prev, curr) => {
curr.forEach((val, key) => { prev.set(key, val); });
return prev;
}, new Map());
if (this.params.interim) {
const words = [];
for (const item of alt.Items ?? []) {
if (item.Type === "pronunciation") {
words.push({
word: item.Content,
start: luxon_1.Duration.fromMillis((item.StartTime ?? 0) * 1000).plus(this.timeZeroOffset),
end: luxon_1.Duration.fromMillis((item.EndTime ?? 0) * 1000).plus(this.timeZeroOffset)
});
}
}
meta.set("words", words);
}
metastore.prune(tsStart);
const chunk = new speechflow_node_1.SpeechFlowChunk(tsStart, tsEnd, kind, "text", text, meta);
this.queue?.write(chunk);
}
}
})().catch((err) => {
this.log("warning", `failed to establish connectivity to Amazon Transcribe: ${err}`);
});
};
/* remember opening time to receive time zero offset */
this.timeOpen = luxon_1.DateTime.now();
/* provide Duplex stream and internally attach to Deepgram API */
const self = this;
this.stream = new node_stream_1.default.Duplex({
writableObjectMode: true,
readableObjectMode: true,
decodeStrings: false,
highWaterMark: 1,
write(chunk, encoding, callback) {
if (self.destroyed || self.client === null) {
callback(new Error("stream already destroyed"));
return;
}
if (chunk.type !== "audio")
callback(new Error("expected audio input chunk"));
else if (!Buffer.isBuffer(chunk.payload))
callback(new Error("expected Buffer input chunk"));
else {
if (chunk.payload.byteLength > 0) {
self.log("debug", `send data (${chunk.payload.byteLength} bytes)`);
if (chunk.meta.size > 0)
metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta);
audioQueue.push(new Uint8Array(chunk.payload)); /* intentionally discard all time information */
ensureAudioStreamActive().catch((error) => {
self.log("error", `failed to start audio stream: ${util.ensureError(error).message}`);
});
}
callback();
}
},
read(size) {
if (self.destroyed || self.queue === null) {
this.push(null);
return;
}
self.queue.read().then((chunk) => {
if (self.destroyed) {
this.push(null);
return;
}
if (chunk === null) {
self.log("info", "received EOF signal");
this.push(null);
}
else {
self.log("debug", `received data (${chunk.payload.length} bytes): "${chunk.payload}"`);
this.push(chunk);
}
}).catch((error) => {
if (!self.destroyed)
self.log("error", `queue read error: ${util.ensureError(error).message}`);
});
},
final(callback) {
if (self.destroyed || self.client === null) {
callback();
return;
}
util.run(() => self.client.destroy(), (error) => self.log("warning", `error closing Amazon Transcribe connection: ${error}`));
audioQueue.push(null); /* do not push null to stream, let Amazon Transcribe do it */
audioQueue.destroy();
callback();
}
});
}
/* close node */
async close() {
/* indicate destruction first to stop all async operations */
this.destroyed = true;
/* cleanup all timers */
if (this.initTimeout !== null) {
clearTimeout(this.initTimeout);
this.initTimeout = null;
}
if (this.connectionTimeout !== null) {
clearTimeout(this.connectionTimeout);
this.connectionTimeout = null;
}
/* close queue */
if (this.queue !== null) {
this.queue.write(null);
this.queue = null;
}
/* close Amazon Transcribe connection */
if (this.client !== null) {
this.client.destroy();
this.client = null;
}
/* close stream */
if (this.stream !== null) {
this.stream.destroy();
this.stream = null;
}
}
}
exports.default = SpeechFlowNodeA2TAmazon;
//# sourceMappingURL=speechflow-node-a2t-amazon.js.map