speechflow
Version:
Speech Processing Flow Graph
314 lines • 13.7 kB
JavaScript
;
/*
** SpeechFlow - Speech Processing Flow Graph
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
/* standard dependencies */
const node_stream_1 = __importDefault(require("node:stream"));
/* external dependencies */
const Deepgram = __importStar(require("@deepgram/sdk"));
const luxon_1 = require("luxon");
/* internal dependencies */
const speechflow_node_1 = __importStar(require("./speechflow-node"));
const util = __importStar(require("./speechflow-util"));
/* SpeechFlow node for Deepgram speech-to-text conversion */
class SpeechFlowNodeA2TDeepgram extends speechflow_node_1.default {
/* declare official node name */
static name = "a2t-deepgram";
/* internal state */
dg = null;
destroyed = false;
initTimeout = null;
connectionTimeout = null;
queue = null;
/* construct node */
constructor(id, cfg, opts, args) {
super(id, cfg, opts, args);
/* declare node configuration parameters */
this.configure({
key: { type: "string", val: process.env.SPEECHFLOW_DEEPGRAM_KEY },
keyAdm: { type: "string", val: process.env.SPEECHFLOW_DEEPGRAM_KEY_ADM },
model: { type: "string", val: "nova-2", pos: 0 },
version: { type: "string", val: "latest", pos: 1 },
language: { type: "string", val: "multi", pos: 2 },
interim: { type: "boolean", val: false, pos: 3 }
});
/* declare node input/output format */
this.input = "audio";
this.output = "text";
}
/* one-time status of node */
async status() {
let balance = 0;
try {
const deepgram = Deepgram.createClient(this.params.keyAdm);
const response = await deepgram.manage.getProjects();
if (response !== null && response.error === null && response.result?.projects) {
for (const project of response.result.projects) {
const balanceResponse = await deepgram.manage.getProjectBalances(project.project_id);
if (balanceResponse !== null && balanceResponse.error === null && balanceResponse.result?.balances)
balance += balanceResponse.result.balances[0]?.amount ?? 0;
}
}
else if (response?.error !== null)
this.log("warning", `API error fetching projects: ${response.error}`);
}
catch (error) {
this.log("warning", `failed to fetch balance: ${error}`);
}
return { balance: balance.toFixed(2) };
}
/* open node */
async open() {
/* sanity check situation */
if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
throw new Error("Deepgram node currently supports PCM-S16LE audio only");
/* clear destruction flag */
this.destroyed = false;
/* create queue for results */
this.queue = new util.SingleQueue();
/* create a store for the meta information */
const metastore = new util.TimeStore();
/* connect to Deepgram API */
const deepgram = Deepgram.createClient(this.params.key);
let language = "en";
if (this.params.language !== "en") {
if (this.params.model.match(/^nova-2/))
language = this.params.language;
else if (this.params.model.match(/^nova-3/))
language = "multi";
}
this.dg = deepgram.listen.live({
mip_opt_out: true,
model: this.params.model,
version: this.params.version,
language,
channels: this.config.audioChannels,
sample_rate: this.config.audioSampleRate,
encoding: "linear16",
multichannel: false,
endpointing: false,
interim_results: this.params.interim,
smart_format: true,
punctuate: true,
filler_words: true,
numerals: true,
diarize: false,
profanity_filter: false,
redact: false
});
/* hook onto Deepgram API events */
this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
if (this.destroyed || this.queue === null)
return;
const text = (data.channel?.alternatives[0]?.transcript ?? "");
const words = (data.channel?.alternatives[0]?.words ?? []);
const isFinal = (data.is_final ?? false);
if (text === "")
this.log("info", `empty/dummy text received (start: ${data.start}s, duration: ${data.duration.toFixed(2)}s)`);
else {
this.log("info", `text received (start: ${data.start}s, ` +
`duration: ${data.duration.toFixed(2)}s, ` +
`kind: ${isFinal ? "final" : "intermediate"}): ` +
`${text}"`);
const start = luxon_1.Duration.fromMillis(data.start * 1000).plus(this.timeZeroOffset);
const end = start.plus({ seconds: data.duration });
const metas = metastore.fetch(start, end);
const meta = metas.reduce((prev, curr) => {
curr.forEach((val, key) => { prev.set(key, val); });
return prev;
}, new Map());
metastore.prune(start);
meta.set("words", words.map((word) => {
const start = luxon_1.Duration.fromMillis(word.start * 1000).plus(this.timeZeroOffset);
const end = luxon_1.Duration.fromMillis(word.end * 1000).plus(this.timeZeroOffset);
return { word: word.punctuated_word ?? word.word, start, end };
}));
const chunk = new speechflow_node_1.SpeechFlowChunk(start, end, isFinal ? "final" : "intermediate", "text", text, meta);
this.queue.write(chunk);
}
});
this.dg.on(Deepgram.LiveTranscriptionEvents.SpeechStarted, (data) => {
this.log("info", "speech started", data);
});
this.dg.on(Deepgram.LiveTranscriptionEvents.UtteranceEnd, (data) => {
this.log("info", "utterance end received", data);
});
this.dg.on(Deepgram.LiveTranscriptionEvents.Metadata, (data) => {
this.log("info", "metadata received");
});
this.dg.on(Deepgram.LiveTranscriptionEvents.Close, () => {
this.log("info", "connection close");
if (!this.destroyed && this.queue !== null)
this.queue.write(null);
});
this.dg.on(Deepgram.LiveTranscriptionEvents.Error, (error) => {
this.log("error", `error: ${error.message}`);
if (!this.destroyed && this.queue !== null)
this.queue.write(null);
this.emit("error");
});
/* wait for Deepgram API to be available */
await new Promise((resolve, reject) => {
this.connectionTimeout = setTimeout(() => {
this.connectionTimeout = null;
reject(new Error("Deepgram: timeout waiting for connection open"));
}, 8000);
this.dg.once(Deepgram.LiveTranscriptionEvents.Open, () => {
this.log("info", "connection open");
if (this.connectionTimeout !== null) {
clearTimeout(this.connectionTimeout);
this.connectionTimeout = null;
}
resolve(true);
});
});
/* remember opening time to receive time zero offset */
this.timeOpen = luxon_1.DateTime.now();
/* provide Duplex stream and internally attach to Deepgram API */
const self = this;
this.stream = new node_stream_1.default.Duplex({
writableObjectMode: true,
readableObjectMode: true,
decodeStrings: false,
highWaterMark: 1,
write(chunk, encoding, callback) {
if (self.destroyed || self.dg === null) {
callback(new Error("stream already destroyed"));
return;
}
if (chunk.type !== "audio")
callback(new Error("expected audio input chunk"));
else if (!Buffer.isBuffer(chunk.payload))
callback(new Error("expected Buffer input chunk"));
else {
if (chunk.payload.byteLength > 0) {
self.log("debug", `send data (${chunk.payload.byteLength} bytes)`);
if (chunk.meta.size > 0)
metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta);
try {
self.dg.send(chunk.payload.buffer); /* intentionally discard all time information */
}
catch (error) {
callback(error instanceof Error ? error : new Error("failed to send to Deepgram"));
return;
}
}
callback();
}
},
read(size) {
if (self.destroyed || self.queue === null) {
this.push(null);
return;
}
self.queue.read().then((chunk) => {
if (self.destroyed) {
this.push(null);
return;
}
if (chunk === null) {
self.log("info", "received EOF signal");
this.push(null);
}
else {
self.log("debug", `received data (${chunk.payload.length} bytes)`);
this.push(chunk);
}
}).catch((error) => {
if (!self.destroyed)
self.log("error", `queue read error: ${util.ensureError(error).message}`);
});
},
final(callback) {
if (self.destroyed || self.dg === null) {
callback();
return;
}
try {
self.dg.requestClose();
}
catch (error) {
self.log("warning", `error closing Deepgram connection: ${error}`);
}
/* NOTICE: do not push null here -- let the Deepgram close event handle it */
callback();
}
});
}
/* close node */
async close() {
/* indicate destruction first to stop all async operations */
this.destroyed = true;
/* cleanup all timers */
if (this.initTimeout !== null) {
clearTimeout(this.initTimeout);
this.initTimeout = null;
}
if (this.connectionTimeout !== null) {
clearTimeout(this.connectionTimeout);
this.connectionTimeout = null;
}
/* close stream */
if (this.stream !== null) {
this.stream.destroy();
this.stream = null;
}
/* close Deepgram connection and remove listeners */
if (this.dg !== null) {
try {
this.dg.removeAllListeners();
this.dg.requestClose();
}
catch (error) {
this.log("warning", `error during Deepgram cleanup: ${error}`);
}
this.dg = null;
}
/* signal EOF to any pending read operations */
if (this.queue !== null) {
this.queue.write(null);
this.queue = null;
}
}
}
exports.default = SpeechFlowNodeA2TDeepgram;
//# sourceMappingURL=speechflow-node-a2t-deepgram.js.map