speechflow
Version:
Speech Processing Flow Graph
249 lines (227 loc) • 10.6 kB
text/typescript
/*
** SpeechFlow - Speech Processing Flow Graph
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
*/
/* standard dependencies */
import Stream from "node:stream"
/* external dependencies */
import OpenAI from "openai"
/* internal dependencies */
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
import * as util from "./speechflow-util"
/* internal utility types */
type ConfigEntry = { systemPrompt: string, chat: OpenAI.ChatCompletionMessageParam[] }
type Config = { [ key: string ]: ConfigEntry }
/* SpeechFlow node for OpenAI/GPT text-to-text translation */
export default class SpeechFlowNodeT2TOpenAI extends SpeechFlowNode {
/* declare official node name */
public static name = "t2t-openai"
/* internal state */
private openai: OpenAI | null = null
/* internal LLM setup */
private setup: Config = {
/* English (EN) spellchecking only */
"en-en": {
systemPrompt:
"You are a proofreader and spellchecker for English.\n" +
"Output only the corrected text.\n" +
"Do NOT use markdown.\n" +
"Do NOT give any explanations.\n" +
"Do NOT give any introduction.\n" +
"Do NOT give any comments.\n" +
"Do NOT give any preamble.\n" +
"Do NOT give any prolog.\n" +
"Do NOT give any epilog.\n" +
"Do NOT change the grammar.\n" +
"Do NOT use synonyms for words.\n" +
"Keep all words.\n" +
"Fill in missing commas.\n" +
"Fill in missing points.\n" +
"Fill in missing question marks.\n" +
"Fill in missing hyphens.\n" +
"Focus ONLY on the word spelling.\n" +
"The text you have to correct is:\n",
chat: [
{ role: "user", content: "I luve my wyfe" },
{ role: "assistant", content: "I love my wife." },
{ role: "user", content: "The weether is wunderfull!" },
{ role: "assistant", content: "The weather is wonderful!" },
{ role: "user", content: "The life awesome but I'm hungry." },
{ role: "assistant", content: "The life is awesome, but I'm hungry." }
]
},
/* German (DE) spellchecking only */
"de-de": {
systemPrompt:
"Du bist ein Korrekturleser und Rechtschreibprüfer für Deutsch.\n" +
"Gib nur den korrigierten Text aus.\n" +
"Benutze KEIN Markdown.\n" +
"Gib KEINE Erklärungen.\n" +
"Gib KEINE Einleitung.\n" +
"Gib KEINE Kommentare.\n" +
"Gib KEINE Preamble.\n" +
"Gib KEINEN Prolog.\n" +
"Gib KEINEN Epilog.\n" +
"Ändere NICHT die Grammatik.\n" +
"Verwende KEINE Synonyme für Wörter.\n" +
"Behalte alle Wörter bei.\n" +
"Füge fehlende Kommas ein.\n" +
"Füge fehlende Punkte ein.\n" +
"Füge fehlende Fragezeichen ein.\n" +
"Füge fehlende Bindestriche ein.\n" +
"Füge fehlende Gedankenstriche ein.\n" +
"Fokussiere dich NUR auf die Rechtschreibung der Wörter.\n" +
"Der von dir zu korrigierende Text ist:\n",
chat: [
{ role: "user", content: "Ich ljebe meine Frao" },
{ role: "assistant", content: "Ich liebe meine Frau." },
{ role: "user", content: "Die Wedter ist wunderschoen." },
{ role: "assistant", content: "Das Wetter ist wunderschön." },
{ role: "user", content: "Das Leben einfach großartig aber ich bin hungrig." },
{ role: "assistant", content: "Das Leben ist einfach großartig, aber ich bin hungrig." }
]
},
/* English (EN) to German (DE) translation */
"en-de": {
systemPrompt:
"You are a translator.\n" +
"Output only the requested text.\n" +
"Do not use markdown.\n" +
"Do not chat.\n" +
"Do not show any explanations.\n" +
"Do not show any introduction.\n" +
"Do not show any preamble.\n" +
"Do not show any prolog.\n" +
"Do not show any epilog.\n" +
"Get to the point.\n" +
"Preserve the original meaning, tone, and nuance.\n" +
"Directly translate text from English (EN) to fluent and natural German (DE) language.\n",
chat: [
{ role: "user", content: "I love my wife." },
{ role: "assistant", content: "Ich liebe meine Frau." },
{ role: "user", content: "The weather is wonderful." },
{ role: "assistant", content: "Das Wetter ist wunderschön." },
{ role: "user", content: "The life is awesome." },
{ role: "assistant", content: "Das Leben ist einfach großartig." }
]
},
/* German (DE) to English (EN) translation */
"de-en": {
systemPrompt:
"You are a translator.\n" +
"Output only the requested text.\n" +
"Do not use markdown.\n" +
"Do not chat.\n" +
"Do not show any explanations.\n" +
"Do not show any introduction.\n" +
"Do not show any preamble.\n" +
"Do not show any prolog.\n" +
"Do not show any epilog.\n" +
"Get to the point.\n" +
"Preserve the original meaning, tone, and nuance.\n" +
"Directly translate text from German (DE) to fluent and natural English (EN) language.\n",
chat: [
{ role: "user", content: "Ich liebe meine Frau." },
{ role: "assistant", content: "I love my wife." },
{ role: "user", content: "Das Wetter ist wunderschön." },
{ role: "assistant", content: "The weather is wonderful." },
{ role: "user", content: "Das Leben ist einfach großartig." },
{ role: "assistant", content: "The life is awesome." }
]
}
}
/* construct node */
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
super(id, cfg, opts, args)
/* declare node configuration parameters */
this.configure({
src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ },
key: { type: "string", val: process.env.SPEECHFLOW_OPENAI_KEY, match: /^.+$/ },
api: { type: "string", val: "https://api.openai.com/v1", match: /^https?:\/\/.+/ },
model: { type: "string", val: "gpt-5-mini", match: /^.+$/ }
})
/* tell effective mode */
if (this.params.src === this.params.dst)
this.log("info", `OpenAI: operation mode: spellchecking for language "${this.params.src}"`)
else
this.log("info", `OpenAI: operation mode: translation from language "${this.params.src}"` +
` to language "${this.params.dst}"`)
/* declare node input/output format */
this.input = "text"
this.output = "text"
}
/* open node */
async open () {
/* validate API key */
if (!this.params.key)
throw new Error("OpenAI API key is required")
/* instantiate OpenAI API */
this.openai = new OpenAI({
baseURL: this.params.api,
apiKey: this.params.key,
timeout: 30000
})
/* provide text-to-text translation */
const translate = async (text: string) => {
const key = `${this.params.src}-${this.params.dst}`
const cfg = this.setup[key]
if (!this.openai)
throw new Error("OpenAI client not available")
const completion = await this.openai.chat.completions.create({
model: this.params.model,
temperature: this.params.model.endsWith("-mini") ? 1.0 : 0.7,
messages: [
{ role: "system", content: cfg.systemPrompt },
...cfg.chat,
{ role: "user", content: text }
]
})
const content = completion?.choices?.[0]?.message?.content
if (!content)
throw new Error("OpenAI API returned empty content")
return content
}
/* establish a duplex stream and connect it to OpenAI */
this.stream = new Stream.Transform({
readableObjectMode: true,
writableObjectMode: true,
decodeStrings: false,
highWaterMark: 1,
transform (chunk: SpeechFlowChunk, encoding, callback) {
if (Buffer.isBuffer(chunk.payload))
callback(new Error("invalid chunk payload type"))
else if (chunk.payload === "") {
this.push(chunk)
callback()
}
else {
translate(chunk.payload).then((payload) => {
const chunkNew = chunk.clone()
chunkNew.payload = payload
this.push(chunkNew)
callback()
}).catch((error: unknown) => {
callback(util.ensureError(error))
})
}
},
final (callback) {
this.push(null)
callback()
}
})
}
/* close node */
async close () {
/* close stream */
if (this.stream !== null) {
this.stream.destroy()
this.stream = null
}
/* shutdown OpenAI */
if (this.openai !== null)
this.openai = null
}
}