speechflow
Version:
Speech Processing Flow Graph
273 lines (255 loc) • 11.1 kB
text/typescript
/*
** SpeechFlow - Speech Processing Flow Graph
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
*/
/* standard dependencies */
import path from "node:path"
import http from "node:http"
import Stream from "node:stream"
/* external dependencies */
import { Duration } from "luxon"
import * as HAPI from "@hapi/hapi"
import Inert from "@hapi/inert"
import WebSocket from "ws"
import HAPIWebSocket from "hapi-plugin-websocket"
/* internal dependencies */
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
import * as util from "./speechflow-util"
type WSPeerInfo = {
ctx: Record<string, any>
ws: WebSocket
req: http.IncomingMessage
}
/* SpeechFlow node for subtitle (text-to-text) "translations" */
export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
/* declare official node name */
public static name = "t2t-subtitle"
/* internal state */
private sequenceNo = 1
private hapi: HAPI.Server | null = null
/* construct node */
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
super(id, cfg, opts, args)
/* declare node configuration parameters */
this.configure({
format: { type: "string", pos: 0, val: "srt", match: /^(?:srt|vtt)$/ },
words: { type: "boolean", val: false },
mode: { type: "string", val: "export", match: /^(?:export|render)$/ },
addr: { type: "string", val: "127.0.0.1" },
port: { type: "number", val: 8585 }
})
/* declare node input/output format */
this.input = "text"
this.output = this.params.mode === "export" ? "text" : "none"
}
/* open node */
async open () {
if (this.params.mode === "export") {
this.sequenceNo = 1
/* provide text-to-subtitle conversion */
const convert = async (chunk: SpeechFlowChunk) => {
if (typeof chunk.payload !== "string")
throw new Error("chunk payload type must be string")
const convertSingle = (
start: Duration,
end: Duration,
text: string,
word?: string,
occurrence?: number
) => {
if (word) {
occurrence ??= 1
let match = 1
word = word.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")
text = text.replaceAll(new RegExp(`\\b${word}\\b`, "g"), (m) => {
if (match++ === occurrence)
return `<b>${m}</b>`
else
return m
})
}
if (this.params.format === "srt") {
const startFmt = start.toFormat("hh:mm:ss,SSS")
const endFmt = end.toFormat("hh:mm:ss,SSS")
text = `${this.sequenceNo++}\n` +
`${startFmt} --> ${endFmt}\n` +
`${text}\n\n`
}
else if (this.params.format === "vtt") {
const startFmt = start.toFormat("hh:mm:ss.SSS")
const endFmt = end.toFormat("hh:mm:ss.SSS")
text = `${startFmt} --> ${endFmt}\n` +
`${text}\n\n`
}
return text
}
let output = ""
if (this.params.words) {
output += convertSingle(chunk.timestampStart, chunk.timestampEnd, chunk.payload)
const words = (chunk.meta.get("words") ?? []) as
{ word: string, start: Duration, end: Duration }[]
const occurrences = new Map<string, number>()
for (const word of words) {
let occurrence = occurrences.get(word.word) ?? 0
occurrence++
occurrences.set(word.word, occurrence)
output += convertSingle(word.start, word.end, chunk.payload, word.word, occurrence)
}
}
else
output += convertSingle(chunk.timestampStart, chunk.timestampEnd, chunk.payload)
return output
}
/* establish a duplex stream */
const self = this
let firstChunk = true
this.stream = new Stream.Transform({
readableObjectMode: true,
writableObjectMode: true,
decodeStrings: false,
highWaterMark: 1,
transform (chunk: SpeechFlowChunk, encoding, callback) {
if (firstChunk && self.params.format === "vtt") {
this.push(new SpeechFlowChunk(
Duration.fromMillis(0), Duration.fromMillis(0),
"final", "text",
"WEBVTT\n\n"
))
firstChunk = false
}
if (Buffer.isBuffer(chunk.payload))
callback(new Error("invalid chunk payload type"))
else {
if (chunk.payload === "") {
this.push(chunk)
callback()
}
else {
convert(chunk).then((payload) => {
const chunkNew = chunk.clone()
chunkNew.payload = payload
this.push(chunkNew)
callback()
}).catch((error: unknown) => {
callback(util.ensureError(error))
})
}
}
},
final (callback) {
this.push(null)
callback()
}
})
}
else if (this.params.mode === "render") {
/* establish REST/WebSocket API */
const wsPeers = new Map<string, WSPeerInfo>()
this.hapi = new HAPI.Server({
address: this.params.addr,
port: this.params.port
})
await this.hapi.register({ plugin: Inert })
await this.hapi.register({ plugin: HAPIWebSocket })
this.hapi.events.on({ name: "request", channels: [ "error" ] }, (request: HAPI.Request, event: HAPI.RequestEvent, tags: { [key: string]: true }) => {
if (event.error instanceof Error)
this.log("error", `HAPI: request-error: ${event.error.message}`)
else
this.log("error", `HAPI: request-error: ${event.error}`)
})
this.hapi.events.on("log", (event: HAPI.LogEvent, tags: { [key: string]: true }) => {
if (tags.error) {
const err = event.error
if (err instanceof Error)
this.log("error", `HAPI: log: ${err.message}`)
else
this.log("error", `HAPI: log: ${err}`)
}
})
this.hapi.route({
method: "GET",
path: "/{param*}",
handler: {
directory: {
path: path.join(__dirname, "../../speechflow-ui-st/dst"),
redirectToSlash: true,
index: true
}
}
})
this.hapi.route({
method: "POST",
path: "/api",
options: {
payload: {
output: "data",
parse: true,
allow: "application/json"
},
plugins: {
websocket: {
autoping: 30 * 1000,
connect: ({ ctx, ws, req }) => {
const peer = `${req.socket.remoteAddress}:${req.socket.remotePort}`
ctx.peer = peer
wsPeers.set(peer, { ctx, ws, req })
this.log("info", `HAPI: WebSocket: connect: peer ${peer}`)
},
disconnect: ({ ctx, ws }) => {
const peer = ctx.peer
wsPeers.delete(peer)
ws.removeAllListeners()
if (ws.readyState === WebSocket.OPEN)
ws.close()
this.log("info", `HAPI: WebSocket: disconnect: peer ${peer}`)
}
}
}
},
handler: (request: HAPI.Request, h: HAPI.ResponseToolkit) =>
h.response({}).code(204)
})
await this.hapi.start()
this.log("info", `HAPI: started REST/WebSocket network service: http://${this.params.addr}:${this.params.port}`)
const emit = (chunk: SpeechFlowChunk) => {
const data = JSON.stringify(chunk)
for (const info of wsPeers.values())
info.ws.send(data)
}
this.stream = new Stream.Writable({
objectMode: true,
decodeStrings: false,
highWaterMark: 1,
write (chunk: SpeechFlowChunk, encoding, callback) {
if (Buffer.isBuffer(chunk.payload))
callback(new Error("invalid chunk payload type"))
else {
if (chunk.payload === "")
callback()
else {
emit(chunk)
callback()
}
}
},
final (callback) {
callback()
}
})
}
}
/* close node */
async close () {
/* close stream */
if (this.stream !== null) {
this.stream.destroy()
this.stream = null
}
/* shutdown HAPI */
if (this.hapi !== null) {
await this.hapi.stop()
this.hapi = null
}
}
}