UNPKG

speechflow

Version:

Speech Processing Flow Graph

github.com/rse/speechflow

314 lines • 13.7 kB

JavaScript

"use strict"; /* ** SpeechFlow - Speech Processing Flow Graph ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com> ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only> */ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); /* standard dependencies */ const node_stream_1 = __importDefault(require("node:stream")); /* external dependencies */ const Deepgram = __importStar(require("@deepgram/sdk")); const luxon_1 = require("luxon"); /* internal dependencies */ const speechflow_node_1 = __importStar(require("./speechflow-node")); const util = __importStar(require("./speechflow-util")); /* SpeechFlow node for Deepgram speech-to-text conversion */ class SpeechFlowNodeA2TDeepgram extends speechflow_node_1.default { /* declare official node name */ static name = "a2t-deepgram"; /* internal state */ dg = null; destroyed = false; initTimeout = null; connectionTimeout = null; queue = null; /* construct node */ constructor(id, cfg, opts, args) { super(id, cfg, opts, args); /* declare node configuration parameters */ this.configure({ key: { type: "string", val: process.env.SPEECHFLOW_DEEPGRAM_KEY }, keyAdm: { type: "string", val: process.env.SPEECHFLOW_DEEPGRAM_KEY_ADM }, model: { type: "string", val: "nova-2", pos: 0 }, version: { type: "string", val: "latest", pos: 1 }, language: { type: "string", val: "multi", pos: 2 }, interim: { type: "boolean", val: false, pos: 3 } }); /* declare node input/output format */ this.input = "audio"; this.output = "text"; } /* one-time status of node */ async status() { let balance = 0; try { const deepgram = Deepgram.createClient(this.params.keyAdm); const response = await deepgram.manage.getProjects(); if (response !== null && response.error === null && response.result?.projects) { for (const project of response.result.projects) { const balanceResponse = await deepgram.manage.getProjectBalances(project.project_id); if (balanceResponse !== null && balanceResponse.error === null && balanceResponse.result?.balances) balance += balanceResponse.result.balances[0]?.amount ?? 0; } } else if (response?.error !== null) this.log("warning", `API error fetching projects: ${response.error}`); } catch (error) { this.log("warning", `failed to fetch balance: ${error}`); } return { balance: balance.toFixed(2) }; } /* open node */ async open() { /* sanity check situation */ if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian) throw new Error("Deepgram node currently supports PCM-S16LE audio only"); /* clear destruction flag */ this.destroyed = false; /* create queue for results */ this.queue = new util.SingleQueue(); /* create a store for the meta information */ const metastore = new util.TimeStore(); /* connect to Deepgram API */ const deepgram = Deepgram.createClient(this.params.key); let language = "en"; if (this.params.language !== "en") { if (this.params.model.match(/^nova-2/)) language = this.params.language; else if (this.params.model.match(/^nova-3/)) language = "multi"; } this.dg = deepgram.listen.live({ mip_opt_out: true, model: this.params.model, version: this.params.version, language, channels: this.config.audioChannels, sample_rate: this.config.audioSampleRate, encoding: "linear16", multichannel: false, endpointing: false, interim_results: this.params.interim, smart_format: true, punctuate: true, filler_words: true, numerals: true, diarize: false, profanity_filter: false, redact: false }); /* hook onto Deepgram API events */ this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => { if (this.destroyed || this.queue === null) return; const text = (data.channel?.alternatives[0]?.transcript ?? ""); const words = (data.channel?.alternatives[0]?.words ?? []); const isFinal = (data.is_final ?? false); if (text === "") this.log("info", `empty/dummy text received (start: ${data.start}s, duration: ${data.duration.toFixed(2)}s)`); else { this.log("info", `text received (start: ${data.start}s, ` + `duration: ${data.duration.toFixed(2)}s, ` + `kind: ${isFinal ? "final" : "intermediate"}): ` + `${text}"`); const start = luxon_1.Duration.fromMillis(data.start * 1000).plus(this.timeZeroOffset); const end = start.plus({ seconds: data.duration }); const metas = metastore.fetch(start, end); const meta = metas.reduce((prev, curr) => { curr.forEach((val, key) => { prev.set(key, val); }); return prev; }, new Map()); metastore.prune(start); meta.set("words", words.map((word) => { const start = luxon_1.Duration.fromMillis(word.start * 1000).plus(this.timeZeroOffset); const end = luxon_1.Duration.fromMillis(word.end * 1000).plus(this.timeZeroOffset); return { word: word.punctuated_word ?? word.word, start, end }; })); const chunk = new speechflow_node_1.SpeechFlowChunk(start, end, isFinal ? "final" : "intermediate", "text", text, meta); this.queue.write(chunk); } }); this.dg.on(Deepgram.LiveTranscriptionEvents.SpeechStarted, (data) => { this.log("info", "speech started", data); }); this.dg.on(Deepgram.LiveTranscriptionEvents.UtteranceEnd, (data) => { this.log("info", "utterance end received", data); }); this.dg.on(Deepgram.LiveTranscriptionEvents.Metadata, (data) => { this.log("info", "metadata received"); }); this.dg.on(Deepgram.LiveTranscriptionEvents.Close, () => { this.log("info", "connection close"); if (!this.destroyed && this.queue !== null) this.queue.write(null); }); this.dg.on(Deepgram.LiveTranscriptionEvents.Error, (error) => { this.log("error", `error: ${error.message}`); if (!this.destroyed && this.queue !== null) this.queue.write(null); this.emit("error"); }); /* wait for Deepgram API to be available */ await new Promise((resolve, reject) => { this.connectionTimeout = setTimeout(() => { this.connectionTimeout = null; reject(new Error("Deepgram: timeout waiting for connection open")); }, 8000); this.dg.once(Deepgram.LiveTranscriptionEvents.Open, () => { this.log("info", "connection open"); if (this.connectionTimeout !== null) { clearTimeout(this.connectionTimeout); this.connectionTimeout = null; } resolve(true); }); }); /* remember opening time to receive time zero offset */ this.timeOpen = luxon_1.DateTime.now(); /* provide Duplex stream and internally attach to Deepgram API */ const self = this; this.stream = new node_stream_1.default.Duplex({ writableObjectMode: true, readableObjectMode: true, decodeStrings: false, highWaterMark: 1, write(chunk, encoding, callback) { if (self.destroyed || self.dg === null) { callback(new Error("stream already destroyed")); return; } if (chunk.type !== "audio") callback(new Error("expected audio input chunk")); else if (!Buffer.isBuffer(chunk.payload)) callback(new Error("expected Buffer input chunk")); else { if (chunk.payload.byteLength > 0) { self.log("debug", `send data (${chunk.payload.byteLength} bytes)`); if (chunk.meta.size > 0) metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta); try { self.dg.send(chunk.payload.buffer); /* intentionally discard all time information */ } catch (error) { callback(error instanceof Error ? error : new Error("failed to send to Deepgram")); return; } } callback(); } }, read(size) { if (self.destroyed || self.queue === null) { this.push(null); return; } self.queue.read().then((chunk) => { if (self.destroyed) { this.push(null); return; } if (chunk === null) { self.log("info", "received EOF signal"); this.push(null); } else { self.log("debug", `received data (${chunk.payload.length} bytes)`); this.push(chunk); } }).catch((error) => { if (!self.destroyed) self.log("error", `queue read error: ${util.ensureError(error).message}`); }); }, final(callback) { if (self.destroyed || self.dg === null) { callback(); return; } try { self.dg.requestClose(); } catch (error) { self.log("warning", `error closing Deepgram connection: ${error}`); } /* NOTICE: do not push null here -- let the Deepgram close event handle it */ callback(); } }); } /* close node */ async close() { /* indicate destruction first to stop all async operations */ this.destroyed = true; /* cleanup all timers */ if (this.initTimeout !== null) { clearTimeout(this.initTimeout); this.initTimeout = null; } if (this.connectionTimeout !== null) { clearTimeout(this.connectionTimeout); this.connectionTimeout = null; } /* close stream */ if (this.stream !== null) { this.stream.destroy(); this.stream = null; } /* close Deepgram connection and remove listeners */ if (this.dg !== null) { try { this.dg.removeAllListeners(); this.dg.requestClose(); } catch (error) { this.log("warning", `error during Deepgram cleanup: ${error}`); } this.dg = null; } /* signal EOF to any pending read operations */ if (this.queue !== null) { this.queue.write(null); this.queue = null; } } } exports.default = SpeechFlowNodeA2TDeepgram; //# sourceMappingURL=speechflow-node-a2t-deepgram.js.map