echogarden
Version:
An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.
1,546 lines (1,136 loc) • 64.8 kB
text/typescript
import * as API from '../api/API.js'
import { parseCLIArguments } from './CLIParser.js'
import { parseJSONAndGetType, getWithDefault, logToStderr, parseJson, setupUnhandledExceptionListeners, splitFilenameOnExtendedExtension, stringifyAndFormatJson } from '../utilities/Utilities.js'
import { getOptionTypeFromSchema, SchemaTypeDefinition } from './CLIOptionsSchema.js'
import { ParsedConfigFile, parseConfigFile, parseJSONConfigFile } from './CLIConfigFile.js'
import chalk from 'chalk'
import { RawAudio, applyGainDecibels, encodeRawAudioToWave, getEmptyRawAudio, getRawAudioDuration, normalizeAudioLevel, sliceRawAudioByTime } from '../audio/AudioUtilities.js'
import { SubtitlesConfig, subtitlesToText, timelineToSubtitles } from '../subtitles/Subtitles.js'
import { Logger, resetActiveLogger } from '../utilities/Logger.js'
import { isMainThread, parentPort } from 'node:worker_threads'
import { encodeFromChannels, getDefaultFFMpegOptionsForSpeech } from '../codecs/FFMpegTranscoder.js'
import { splitToParagraphs, splitToWords } from '../nlp/Segmentation.js'
import { playAudioSamplesWithKeyboardControls, playAudioWithWordTimeline } from '../audio/AudioPlayer.js'
import { extendDeep } from '../utilities/ObjectUtilities.js'
import { Timeline, TimelineEntry, addTimeOffsetToTimeline, addWordTextOffsetsToTimelineInPlace, roundTimelineProperties } from '../utilities/Timeline.js'
import { ensureDir, existsSync, readAndParseJsonFile, readdir, readFileAsUtf8, writeFileSafe } from '../utilities/FileSystem.js'
import { formatLanguageCodeWithName, getShortLanguageCode } from '../utilities/Locale.js'
import { APIOptions } from '../api/APIOptions.js'
import { ensureAndGetPackagesDir, getVersionTagFromPackageName, loadPackage, resolveVersionTagForUnversionedPackageName } from '../utilities/PackageManager.js'
import { removePackage } from '../utilities/PackageManager.js'
import { appName } from '../api/Common.js'
import { ServerOptions, startServer } from '../server/Server.js'
import { OpenPromise } from '../utilities/OpenPromise.js'
import { getDirName, getFileNameWithoutExtension, getLowercaseFileExtension, joinPath, parsePath, resolveToModuleRootDir } from '../utilities/PathUtilities.js'
import { CLIOptions, CLIOptionsKeys } from './CLIOptions.js'
import { convertHtmlToText, formatIntegerWithLeadingZeros, formatListWithQuotedElements } from '../utilities/StringUtilities.js'
//const log = logToStderr
async function startIfInWorkerThread() {
if (isMainThread || !parentPort) {
return
}
setupUnhandledExceptionListeners()
const initOpenPromise = new OpenPromise<void>()
parentPort.once('message', (message) => {
if (message.name == 'init') {
process.stderr.isTTY = message.stdErrIsTTY
process.stderr.hasColors = () => message.hasColors
process.stderr.write = (text) => {
parentPort!.postMessage({ name: 'writeToStdErr', text })
return true
}
initOpenPromise.resolve()
}
})
await initOpenPromise.promise
start(process.argv.slice(2))
}
type CLIOperationData = {
operation: string
operationArgs: string[]
globalOptions: API.GlobalOptions
cliOptions: CLIOptions
operationOptionsLookup: Map<string, string>
}
export async function start(processArgs: string[]) {
const logger = new Logger()
const operationData: CLIOperationData = {
operation: '',
operationArgs: [],
globalOptions: {},
cliOptions: {},
operationOptionsLookup: new Map<string, string>(),
}
try {
const packageData = await readAndParseJsonFile(resolveToModuleRootDir('package.json'))
logger.log(chalk.magentaBright(`Echogarden v${packageData.version}\n`))
const operation = processArgs[0]
if (!operation || operation == 'help') {
logger.log(`Supported operations:\n\n${help.join('\n')}`)
process.exit(0)
}
if (operation == '--help' || operation == '-h') {
logger.log(`There's no operation called '${operation}'. Did you mean to run 'echogarden help'?`)
process.exit(1)
}
if (operation.startsWith('-')) {
logger.log(`Operation name '${operation}' is invalid. It cannot start with a hyphen.`)
process.exit(1)
}
const { operationArgs, parsedArgumentsLookup } = parseCLIArguments(processArgs.slice(1))
const globalOptionsLookup = new Map<string, string>()
const cliOptionsLookup = new Map<string, string>()
const operationsOptionsLookup = new Map<string, string>()
if (!parsedArgumentsLookup.has('config')) {
const defaultConfigFile = `./${appName}.config`
const defaultJsonConfigFile = defaultConfigFile + '.json'
if (existsSync(defaultConfigFile)) {
parsedArgumentsLookup.set('config', defaultConfigFile)
} else if (existsSync(defaultJsonConfigFile)) {
parsedArgumentsLookup.set('config', defaultJsonConfigFile)
}
}
if (parsedArgumentsLookup.has('config')) {
const configFilePath = parsedArgumentsLookup.get('config')!
parsedArgumentsLookup.delete('config')
let parsedConfigFile: ParsedConfigFile
if (configFilePath.endsWith('.config')) {
parsedConfigFile = await parseConfigFile(configFilePath)
} else if (configFilePath.endsWith('.config.json')) {
parsedConfigFile = await parseJSONConfigFile(configFilePath)
} else {
throw new Error(`Specified config file '${configFilePath}' doesn't have a supported extension. Should be either '.config' or '.config.json'`)
}
let sectionName = operation
if (sectionName.startsWith('speak-')) {
sectionName = 'speak'
}
if (parsedConfigFile.has('global')) {
for (const [key, value] of parsedConfigFile.get('global')!) {
globalOptionsLookup.set(key, value)
}
}
if (parsedConfigFile.has('cli')) {
for (const [key, value] of parsedConfigFile.get('cli')!) {
cliOptionsLookup.set(key, value)
}
}
if (parsedConfigFile.has(sectionName)) {
for (const [key, value] of parsedConfigFile.get(sectionName)!) {
operationsOptionsLookup.set(key, value)
}
}
}
const globalOptionsKeys = API.listGlobalOptions()
const cliOptionsKeys = CLIOptionsKeys
for (const [key, value] of parsedArgumentsLookup) {
if (globalOptionsKeys.includes(key)) {
globalOptionsLookup.set(key, value)
} else if (cliOptionsKeys.includes(key as any)) {
cliOptionsLookup.set(key, value)
} else {
operationsOptionsLookup.set(key, value)
}
}
operationData.operation = operation
operationData.operationArgs = operationArgs
operationData.globalOptions = await optionsLookupToTypedObject(globalOptionsLookup, 'GlobalOptions')
operationData.cliOptions = await optionsLookupToTypedObject(cliOptionsLookup, 'CLIOptions')
operationData.operationOptionsLookup = operationsOptionsLookup
} catch (e: any) {
resetActiveLogger()
logger.logTitledMessage(`Error`, e.message, chalk.redBright, 'error')
process.exit(1)
}
for (const key in operationData.globalOptions) {
const value = (operationData.globalOptions as any)[key]
API.setGlobalOption(key as any, value)
}
const debugMode = operationData.cliOptions.debug || false
try {
await startWithArgs(operationData)
} catch (e: any) {
resetActiveLogger()
if (debugMode) {
logger.log(e, 'error')
} else {
logger.logTitledMessage(`Error`, e.message, chalk.redBright, 'error')
}
process.exit(1)
}
process.exit(0)
}
const executableName = `${chalk.cyanBright('echogarden')}`
const help = [
`${executableName} ${chalk.magentaBright('speak')} text [output files...] [options...]`,
` Speak the given text\n`,
`${executableName} ${chalk.magentaBright('speak-file')} inputFile [output files...] [options...]`,
` Speak the given text file\n`,
`${executableName} ${chalk.magentaBright('speak-url')} url [output files...] [options...]`,
` Speak the HTML document on the given URL\n`,
`${executableName} ${chalk.magentaBright('speak-wikipedia')} articleName [output files...] [options...]`,
` Speak the given Wikipedia article. Language edition can be specified by --language=<langCode>\n`,
`${executableName} ${chalk.magentaBright('transcribe')} audioFile [output files...] [options...]`,
` Transcribe a spoken audio file\n`,
`${executableName} ${chalk.magentaBright('align')} audioFile transcriptFile [output files...] [options...]`,
` Align spoken audio file to its transcript\n`,
`${executableName} ${chalk.magentaBright('translate-text')} inputFile [output files...] [options...]`,
` Translate text to a different language\n`,
`${executableName} ${chalk.magentaBright('translate-speech')} audioFile [output files...] [options...]`,
` Transcribe spoken audio file directly to a different language\n`,
`${executableName} ${chalk.magentaBright('align-translation')} audioFile translatedTranscriptFile [output files...] [options...]`,
` Align spoken audio file to its translated transcript\n`,
`${executableName} ${chalk.magentaBright('align-transcript-and-translation')} audioFile transcriptFile translatedTranscriptFile [output files...] [options...]`,
` Align spoken audio file to both its transcript and its translated transcript using a two-stage approach.\n`,
`${executableName} ${chalk.magentaBright('align-timeline-translation')} timelineFile translatedFile [output files...] [options...]`,
` Align a given timeline file to its translated text\n`,
`${executableName} ${chalk.magentaBright('detect-text-language')} inputFile [output files...] [options...]`,
` Detect language of textual file\n`,
`${executableName} ${chalk.magentaBright('detect-speech-language')} audioFile [output files...] [options...]`,
` Detect language of spoken audio file\n`,
`${executableName} ${chalk.magentaBright('detect-voice-activity')} audioFile [output files...] [options...]`,
` Detect voice activity in audio file\n`,
`${executableName} ${chalk.magentaBright('denoise')} audioFile [output files...] [options...]`,
` Apply speech denoising to audio file\n`,
`${executableName} ${chalk.magentaBright('isolate')} audioFile [output files...] [options...]`,
` Extract isolated voice track from an audio file\n`,
`${executableName} ${chalk.magentaBright('list-engines')} operation`,
` List available engines for the specified operation\n`,
`${executableName} ${chalk.magentaBright('list-voices')} tts-engine [output files...] [options...]`,
` List available voices for the specified TTS engine\n`,
`${executableName} ${chalk.magentaBright('install')} [package names...] [options...]`,
` Install one or more Echogarden packages\n`,
`${executableName} ${chalk.magentaBright('uninstall')} [package names...] [options...]`,
` Uninstall one or more Echogarden packages\n`,
`${executableName} ${chalk.magentaBright('list-packages')} [options...]`,
` List installed Echogarden packages\n`,
`${executableName} ${chalk.magentaBright('serve')} [options...]`,
` Start a server\n`,
`Options reference: ${chalk.blueBright('https://bit.ly/echogarden-options')}`
]
async function startWithArgs(operationData: CLIOperationData) {
const logger = new Logger()
switch (operationData.operation) {
case 'speak':
case 'speak-file':
case 'speak-url':
case 'speak-wikipedia': {
await speak(operationData)
break
}
case 'transcribe': {
await transcribe(operationData)
break
}
case 'align': {
await align(operationData)
break
}
case 'translate-text': {
await translateText(operationData)
break
}
case 'translate-speech': {
await translateSpeech(operationData)
break
}
case 'align-translation': {
await alignTranslation(operationData)
break
}
case 'align-transcript-and-translation': {
await alignTranscriptAndTranslation(operationData)
break
}
case 'align-timeline-translation': {
await alignTimelineTranslation(operationData)
break
}
case 'detect-language': {
await detectLanguage(operationData, 'auto')
break
}
case 'detect-speech-language': {
await detectLanguage(operationData, 'speech')
break
}
case 'detect-text-language': {
await detectLanguage(operationData, 'text')
break
}
case 'detect-voice-activity': {
await detectVoiceActivity(operationData)
break
}
case 'denoise': {
await denoise(operationData)
break
}
case 'isolate': {
await isolate(operationData)
break
}
case 'list-engines': {
await listEngines(operationData)
break
}
case 'list-voices': {
await listTTSVoices(operationData)
break
}
case 'install': {
await installPackages(operationData)
break
}
case 'uninstall': {
await uninstallPackages(operationData)
break
}
case 'list-packages': {
await listPackages(operationData)
break
}
case 'serve': {
await serve(operationData)
break
}
default: {
logger.logTitledMessage(`Unknown operation`, operationData.operation, chalk.redBright, 'error')
process.exit(1)
}
}
}
export async function speak(operationData: CLIOperationData) {
const logger = new Logger()
const { operationArgs, operation, operationOptionsLookup, cliOptions } = operationData
const mainArg = operationArgs[0]
const outputFilenames = operationArgs.slice(1)
if (mainArg == undefined) {
if (operation == 'speak') {
throw new Error(`'speak' requires an argument containing the text to speak.`)
} else if (operation == 'speak-file') {
throw new Error(`'speak-file' requires an argument containing the file to speak.`)
} else if (operation == 'speak-url') {
throw new Error(`'speak-url' requires an argument containing the url to speak.`)
} else if (operation == 'speak-wikipedia') {
throw new Error(`'speak-wikipedia' requires an argument containing the name of the Wikipedia article to speak.`)
}
return
}
const additionalOptionsSchema = new Map<string, SchemaTypeDefinition>()
additionalOptionsSchema.set('play', { type: 'boolean' })
additionalOptionsSchema.set('overwrite', { type: 'boolean' })
if (cliOptions.play == null) {
cliOptions.play = outputFilenames.length === 0
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'SynthesisOptions', additionalOptionsSchema)
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault)
const { includesPlaceholderPattern } = await checkOutputFilenames(outputFilenames, true, true, true)
let plainText: string | undefined = undefined
let textSegments: string[]
const plainTextParagraphBreaks = options.plainText?.paragraphBreaks || API.defaultSynthesisOptions.plainText!.paragraphBreaks!
const plainTextWhitespace = options.plainText?.whitespace || API.defaultSynthesisOptions.plainText!.whitespace!
if (operation == 'speak') {
if (options.ssml) {
textSegments = [mainArg]
} else {
textSegments = splitToParagraphs(mainArg, plainTextParagraphBreaks, plainTextWhitespace)
}
plainText = mainArg
} else if (operation == 'speak-file') {
const sourceFile = mainArg
if (!existsSync(sourceFile)) {
throw new Error(`The given source file '${sourceFile}' was not found.`)
}
const sourceFileExtension = getLowercaseFileExtension(sourceFile)
const fileContent = await readFileAsUtf8(sourceFile)
if (options.ssml && sourceFileExtension != 'xml' && sourceFileExtension != 'ssml') {
throw new Error(`SSML option is set, but source file doesn't have an 'xml' or 'ssml' extension.`)
}
if (sourceFileExtension == 'txt') {
textSegments = splitToParagraphs(fileContent, plainTextParagraphBreaks, plainTextWhitespace)
plainText = fileContent
} else if (sourceFileExtension == 'html' || sourceFileExtension == 'htm') {
const textContent = await convertHtmlToText(fileContent)
textSegments = splitToParagraphs(textContent, 'single', 'preserve')
} else if (sourceFileExtension == 'srt' || sourceFileExtension == 'vtt') {
const fileContent = await readFileAsUtf8(sourceFile)
//textSegments = subtitlesToTimeline(fileContent).map(entry => entry.text)
textSegments = [subtitlesToText(fileContent)]
} else if (sourceFileExtension == 'xml' || sourceFileExtension == 'ssml') {
options.ssml = true
textSegments = [fileContent]
} else {
throw new Error(`'speak-file' only supports inputs with extensions 'txt', 'html', 'htm', 'xml', 'ssml', 'srt', 'vtt'`)
}
} else if (operation == 'speak-url') {
if (options.ssml) {
throw new Error(`speak-url doesn't accept SSML inputs`)
}
const url = mainArg
if (!url.startsWith('http://') && !url.startsWith('https://')) {
throw new Error(`'${url}' is not a valid URL. Only 'http://' and 'https://' protocols are supported`)
}
const { fetchDocumentText } = await import('../utilities/WebReader.js')
const textContent = await fetchDocumentText(url)
textSegments = splitToParagraphs(textContent, 'single', 'preserve')
} else if (operation == 'speak-wikipedia') {
if (options.ssml) {
throw new Error(`speak-wikipedia doesn't provide SSML inputs`)
}
const { parseWikipediaArticle } = await import('../utilities/WikipediaReader.js')
if (!options.language) {
options.language = 'en'
}
textSegments = await parseWikipediaArticle(mainArg, getShortLanguageCode(options.language))
} else {
throw new Error(`Invalid operation specified: '${operation}'`)
}
async function onSegment(segmentData: API.SynthesisSegmentEventData) {
if (includesPlaceholderPattern) {
logger.start('Write output files for segment')
}
await writeOutputFilesForSegment(outputFilenames, segmentData.index, segmentData.total, segmentData.audio as RawAudio, segmentData.timeline, segmentData.transcript, segmentData.language, allowOverwrite)
logger.end()
if (cliOptions.play) {
let gainAmount = -3 - segmentData.peakDecibelsSoFar
//gainAmount = Math.min(gainAmount, 0)
const audioWithAddedGain = applyGainDecibels(segmentData.audio as RawAudio, gainAmount)
const segmentWordTimeline = segmentData.timeline.flatMap(sentenceTimeline => sentenceTimeline.timeline!)
await playAudioWithWordTimeline(audioWithAddedGain, segmentWordTimeline, segmentData.transcript, cliOptions.player)
}
}
if (options.outputAudioFormat?.codec) {
options.outputAudioFormat!.codec = undefined
}
const { audio: synthesizedAudio, timeline } = await API.synthesize(textSegments, options, onSegment, undefined)
if (plainText) {
addWordTextOffsetsToTimelineInPlace(timeline, plainText)
}
if (outputFilenames.length > 0) {
logger.start('\nWrite output files')
}
for (const outputFilename of outputFilenames) {
if (isPlaceholderFilePath(outputFilename)) {
continue
}
const fileSaver = getFileSaver(outputFilename, allowOverwrite)
await fileSaver(synthesizedAudio as RawAudio, timeline, textSegments.join('\n\n'), options.subtitles)
}
logger.end()
}
export async function transcribe(operationData: CLIOperationData) {
const logger = new Logger()
const { operationArgs, operationOptionsLookup, cliOptions } = operationData
const sourceFilename = operationArgs[0]
const outputFilenames = operationArgs.slice(1)
if (sourceFilename == undefined) {
throw new Error(`'transcribe' requires an argument containing the source file name.`)
}
if (!existsSync(sourceFilename)) {
throw new Error(`The given source audio file '${sourceFilename}' was not found.`)
}
if (cliOptions.play == null) {
cliOptions.play = outputFilenames.length === 0
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'RecognitionOptions')
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault)
const { includesPlaceholderPattern } = await checkOutputFilenames(outputFilenames, true, true, true)
const { transcript, timeline, wordTimeline, language, inputRawAudio, isolatedRawAudio, backgroundRawAudio } = await API.recognize(sourceFilename, options)
if (outputFilenames.length > 0) {
logger.start('\nWrite output files')
}
for (const outputFilename of outputFilenames) {
if (isPlaceholderFilePath(outputFilename)) {
continue
}
const fileSaver = getFileSaver(outputFilename, allowOverwrite)
await fileSaver(inputRawAudio, timeline, transcript, options.subtitles)
await writeSourceSeparationOutputIfNeeded(outputFilename, isolatedRawAudio, backgroundRawAudio, allowOverwrite, true)
}
logger.end()
if (cliOptions.play) {
let audioToPlay: RawAudio
if (isolatedRawAudio) {
audioToPlay = isolatedRawAudio
} else {
audioToPlay = inputRawAudio
}
const normalizedAudioToPlay = normalizeAudioLevel(audioToPlay)
await playAudioWithWordTimeline(normalizedAudioToPlay, wordTimeline, transcript, cliOptions.player)
}
}
export async function align(operationData: CLIOperationData) {
const logger = new Logger()
const { operationArgs, operationOptionsLookup, cliOptions } = operationData
const audioFilename = operationArgs[0]
const outputFilenames = operationArgs.slice(2)
if (audioFilename == undefined) {
throw new Error(`align requires an argument containing the audio file path.`)
}
if (!existsSync(audioFilename)) {
throw new Error(`The given source file '${audioFilename}' was not found.`)
}
const alignmentReferenceFile = operationArgs[1]
if (alignmentReferenceFile == undefined) {
throw new Error(`align requires a second argument containing the alignment reference file path.`)
}
if (!existsSync(alignmentReferenceFile)) {
throw new Error(`The given reference file '${alignmentReferenceFile}' was not found.`)
}
const referenceFileExtension = getLowercaseFileExtension(alignmentReferenceFile)
const fileContent = await readFileAsUtf8(alignmentReferenceFile)
let text: string
if (referenceFileExtension == 'txt') {
text = fileContent
} else if (referenceFileExtension == 'html' || referenceFileExtension == 'htm') {
text = await convertHtmlToText(fileContent)
} else if (referenceFileExtension == 'srt' || referenceFileExtension == 'vtt') {
text = subtitlesToText(fileContent)
} else {
throw new Error(`align only supports reference files with extensions 'txt', 'html', 'htm', 'srt' or 'vtt'`)
}
if (cliOptions.play == null) {
cliOptions.play = outputFilenames.length === 0
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'AlignmentOptions')
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault)
const { includesPlaceholderPattern } = await checkOutputFilenames(outputFilenames, true, true, true)
const { timeline, wordTimeline, transcript, language, inputRawAudio, isolatedRawAudio, backgroundRawAudio } = await API.align(audioFilename, text, options)
if (outputFilenames.length > 0) {
logger.start('\nWrite output files')
}
if (includesPlaceholderPattern) {
for (let segmentIndex = 0; segmentIndex < timeline.length; segmentIndex++) {
const segmentEntry = timeline[segmentIndex]
const segmentAudio = sliceRawAudioByTime(inputRawAudio, segmentEntry.startTime, segmentEntry.endTime)
const sentenceTimeline = addTimeOffsetToTimeline(segmentEntry.timeline!, -segmentEntry.startTime)
await writeOutputFilesForSegment(outputFilenames, segmentIndex, timeline.length, segmentAudio, sentenceTimeline, segmentEntry.text, language, allowOverwrite)
}
}
for (const outputFilename of outputFilenames) {
if (isPlaceholderFilePath(outputFilename)) {
continue
}
const fileSaver = getFileSaver(outputFilename, allowOverwrite)
await fileSaver(inputRawAudio, timeline, transcript, options.subtitles)
await writeSourceSeparationOutputIfNeeded(outputFilename, isolatedRawAudio, backgroundRawAudio, allowOverwrite, true)
}
logger.end()
if (cliOptions.play) {
let audioToPlay: RawAudio
if (isolatedRawAudio) {
audioToPlay = isolatedRawAudio
} else {
audioToPlay = inputRawAudio
}
const normalizedAudioToPlay = normalizeAudioLevel(audioToPlay)
await playAudioWithWordTimeline(normalizedAudioToPlay, wordTimeline, transcript, cliOptions.player)
}
}
export async function alignTranslation(operationData: CLIOperationData) {
const logger = new Logger()
const { operationArgs, operationOptionsLookup, cliOptions } = operationData
const audioFilename = operationArgs[0]
const outputFilenames = operationArgs.slice(2)
if (audioFilename == undefined) {
throw new Error(`align-translation requires a first argument containing the audio file path.`)
}
if (!existsSync(audioFilename)) {
throw new Error(`The given source file '${audioFilename}' was not found.`)
}
const alignmentReferenceFile = operationArgs[1]
if (alignmentReferenceFile == undefined) {
throw new Error(`align-translation requires a second argument containing the translated reference file path.`)
}
if (!existsSync(alignmentReferenceFile)) {
throw new Error(`The given reference file '${alignmentReferenceFile}' was not found.`)
}
const referenceFileExtension = getLowercaseFileExtension(alignmentReferenceFile)
const fileContent = await readFileAsUtf8(alignmentReferenceFile)
let text: string
if (referenceFileExtension == 'txt') {
text = fileContent
} else if (referenceFileExtension == 'html' || referenceFileExtension == 'htm') {
text = await convertHtmlToText(fileContent)
} else if (referenceFileExtension == 'srt' || referenceFileExtension == 'vtt') {
text = subtitlesToText(fileContent)
} else {
throw new Error(`align-translation only supports reference files with extensions 'txt', 'html', 'htm', 'srt' or 'vtt'`)
}
if (cliOptions.play == null) {
cliOptions.play = outputFilenames.length === 0
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'TranslationAlignmentOptions')
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault)
const { includesPlaceholderPattern } = await checkOutputFilenames(outputFilenames, true, true, true)
const {
timeline,
wordTimeline,
translatedTranscript,
sourceLanguage,
targetLanguage,
inputRawAudio,
isolatedRawAudio,
backgroundRawAudio } = await API.alignTranslation(audioFilename, text, options)
if (outputFilenames.length > 0) {
logger.start('\nWrite output files')
}
if (includesPlaceholderPattern) {
for (let segmentIndex = 0; segmentIndex < timeline.length; segmentIndex++) {
const segmentEntry = timeline[segmentIndex]
const segmentAudio = sliceRawAudioByTime(inputRawAudio, segmentEntry.startTime, segmentEntry.endTime)
const sentenceTimeline = addTimeOffsetToTimeline(segmentEntry.timeline!, -segmentEntry.startTime)
await writeOutputFilesForSegment(outputFilenames, segmentIndex, timeline.length, segmentAudio, sentenceTimeline, segmentEntry.text, targetLanguage, allowOverwrite)
}
}
for (const outputFilename of outputFilenames) {
if (isPlaceholderFilePath(outputFilename)) {
continue
}
const fileSaver = getFileSaver(outputFilename, allowOverwrite)
await fileSaver(inputRawAudio, timeline, translatedTranscript, options.subtitles)
await writeSourceSeparationOutputIfNeeded(outputFilename, isolatedRawAudio, backgroundRawAudio, allowOverwrite, true)
}
logger.end()
if (cliOptions.play) {
let audioToPlay: RawAudio
if (isolatedRawAudio) {
audioToPlay = isolatedRawAudio
} else {
audioToPlay = inputRawAudio
}
const normalizedAudioToPlay = normalizeAudioLevel(audioToPlay)
await playAudioWithWordTimeline(normalizedAudioToPlay, wordTimeline, translatedTranscript, cliOptions.player)
}
}
export async function alignTranscriptAndTranslation(operationData: CLIOperationData) {
const logger = new Logger()
const { operationArgs, operationOptionsLookup, cliOptions } = operationData
const audioFilename = operationArgs[0]
const outputFilenames = operationArgs.slice(3)
if (audioFilename == undefined) {
throw new Error(`align-transcript-and-translation requires a first argument containing the audio file path.`)
}
if (!existsSync(audioFilename)) {
throw new Error(`The given source file '${audioFilename}' was not found.`)
}
const nativeTranscriptFilePath = operationArgs[1]
if (nativeTranscriptFilePath == undefined) {
throw new Error(`align-transcript-and-translation requires a second argument containing the native language transcript file path.`)
}
if (!existsSync(nativeTranscriptFilePath)) {
throw new Error(`The given transcript file '${nativeTranscriptFilePath}' was not found.`)
}
const translatedTranscriptFilePath = operationArgs[2]
if (translatedTranscriptFilePath == undefined) {
throw new Error(`align-transcript-and-translation requires a third argument containing the translated language transcript file path.`)
}
if (!existsSync(translatedTranscriptFilePath)) {
throw new Error(`The given translated transcript file '${nativeTranscriptFilePath}' was not found.`)
}
let transcript: string
{
const nativeTranscriptFileExtension = getLowercaseFileExtension(nativeTranscriptFilePath)
const fileContent = await readFileAsUtf8(nativeTranscriptFilePath)
if (nativeTranscriptFileExtension == 'txt') {
transcript = fileContent
} else if (nativeTranscriptFileExtension == 'html' || nativeTranscriptFileExtension == 'htm') {
transcript = await convertHtmlToText(fileContent)
} else if (nativeTranscriptFileExtension == 'srt' || nativeTranscriptFileExtension == 'vtt') {
transcript = subtitlesToText(fileContent)
} else {
throw new Error(`align-transcript-and-translation only supports transcript files with extensions 'txt', 'html', 'htm', 'srt' or 'vtt'`)
}
}
let translatedTranscript: string
{
const translatedTranscriptFileExtension = getLowercaseFileExtension(translatedTranscriptFilePath)
const fileContent = await readFileAsUtf8(translatedTranscriptFilePath)
if (translatedTranscriptFileExtension == 'txt') {
translatedTranscript = fileContent
} else if (translatedTranscriptFileExtension == 'html' || translatedTranscriptFileExtension == 'htm') {
translatedTranscript = await convertHtmlToText(fileContent)
} else if (translatedTranscriptFileExtension == 'srt' || translatedTranscriptFileExtension == 'vtt') {
translatedTranscript = subtitlesToText(fileContent)
} else {
throw new Error(`align-transcript-and-translation only supports transcript files with extensions 'txt', 'html', 'htm', 'srt' or 'vtt'`)
}
}
if (cliOptions.play == null) {
cliOptions.play = outputFilenames.length === 0
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'TranscriptAndTranslationAlignmentOptions')
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault)
const { includesPlaceholderPattern } = await checkOutputFilenames(outputFilenames, true, true, true)
const {
timeline,
wordTimeline,
translatedTimeline,
translatedWordTimeline,
sourceLanguage,
targetLanguage,
inputRawAudio,
isolatedRawAudio,
backgroundRawAudio } = await API.alignTranscriptAndTranslation(audioFilename, transcript, translatedTranscript, options)
if (outputFilenames.length > 0) {
logger.start('\nWrite output files')
}
for (const outputFilename of outputFilenames) {
if (isPlaceholderFilePath(outputFilename)) {
continue
}
const fileSaver = getFileSaver(outputFilename, allowOverwrite)
await fileSaver(inputRawAudio, timeline, transcript, options.subtitles)
await writeSourceSeparationOutputIfNeeded(outputFilename, isolatedRawAudio, backgroundRawAudio, allowOverwrite, true)
const fileExtension = getLowercaseFileExtension(outputFilename)
if (['json', 'txt', 'srt', 'vtt'].includes(fileExtension)) {
const pathWithoutExtension = outputFilename.substring(0, outputFilename.lastIndexOf('.'))
const translatedOutputPath = `${pathWithoutExtension}.translated.${fileExtension}`
const translatedFileSaver = getFileSaver(translatedOutputPath, allowOverwrite)
await translatedFileSaver(inputRawAudio, translatedTimeline, translatedTranscript, options.subtitles)
}
}
logger.end()
if (cliOptions.play) {
let audioToPlay: RawAudio
if (isolatedRawAudio) {
audioToPlay = isolatedRawAudio
} else {
audioToPlay = inputRawAudio
}
const normalizedAudioToPlay = normalizeAudioLevel(audioToPlay)
await playAudioWithWordTimeline(normalizedAudioToPlay, translatedWordTimeline, translatedTranscript, cliOptions.player)
}
}
export async function alignTimelineTranslation(operationData: CLIOperationData) {
const logger = new Logger()
const { operationArgs, operationOptionsLookup, cliOptions } = operationData
const timelineFilename = operationArgs[0]
const outputFilenames = operationArgs.slice(2)
if (timelineFilename == undefined) {
throw new Error(`align-timeline-translation requires a first argument containing the timeline file path.`)
}
if (getLowercaseFileExtension(timelineFilename) != 'json') {
throw new Error(`align-timeline-translation only supports timeline files with extension 'json'`)
}
if (!existsSync(timelineFilename)) {
throw new Error(`The given timeline file '${timelineFilename}' was not found.`)
}
const timeline = await readAndParseJsonFile(timelineFilename) as Timeline
const translationFilePath = operationArgs[1]
if (translationFilePath == undefined) {
throw new Error(`align-timeline-translation requires a second argument containing the translated reference file path.`)
}
if (!existsSync(translationFilePath)) {
throw new Error(`The given reference file '${translationFilePath}' was not found.`)
}
const translationFileExtension = getLowercaseFileExtension(translationFilePath)
const translationFileContent = await readFileAsUtf8(translationFilePath)
let translationText: string
if (translationFileExtension == 'txt') {
translationText = translationFileContent
} else if (translationFileExtension == 'html' || translationFileExtension == 'htm') {
translationText = await convertHtmlToText(translationFileContent)
} else if (translationFileExtension == 'srt' || translationFileExtension == 'vtt') {
translationText = subtitlesToText(translationFileContent)
} else {
throw new Error(`align only supports reference files with extensions 'txt', 'html', 'htm', 'srt' or 'vtt'`)
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'TimelineTranslationAlignmentOptions')
const {
timeline: translationTimeline,
wordTimeline: translationWordTimeline,
rawAudio
} = await API.alignTimelineTranslation(timeline, translationText, options)
if (outputFilenames.length > 0) {
logger.start('\nWrite output files')
}
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault)
for (const outputFilename of outputFilenames) {
if (isPlaceholderFilePath(outputFilename)) {
continue
}
const fileSaver = getFileSaver(outputFilename, allowOverwrite)
await fileSaver(getEmptyRawAudio(1, 16000), translationTimeline, translationText, options.subtitles)
}
logger.end()
if (cliOptions.play && rawAudio) {
const normalizedAudioToPlay = normalizeAudioLevel(rawAudio)
let transcriptToPlay: string
let timelineToPlay: Timeline
transcriptToPlay = translationText
timelineToPlay = translationWordTimeline
await playAudioWithWordTimeline(normalizedAudioToPlay, timelineToPlay, transcriptToPlay, cliOptions.player)
}
}
export async function translateText(operationData: CLIOperationData) {
const logger = new Logger()
const { operationArgs, operationOptionsLookup, cliOptions } = operationData
const inputFilename = operationArgs[0]
const outputFilenames = operationArgs.slice(1)
if (inputFilename == undefined) {
throw new Error(`translate-text requires an argument containing the input file path.`)
}
if (!existsSync(inputFilename)) {
throw new Error(`The given input file '${inputFilename}' was not found.`)
}
const inputFileExtension = getLowercaseFileExtension(inputFilename)
const inputFileContent = await readFileAsUtf8(inputFilename)
let inputText: string
if (inputFileExtension === 'txt') {
inputText = inputFileContent
} else if (inputFileExtension === 'html' || inputFileExtension === 'htm') {
inputText = await convertHtmlToText(inputFileContent)
} else if (inputFileExtension == 'srt' || inputFileExtension == 'vtt') {
inputText = subtitlesToText(inputFileContent)
} else {
throw new Error(`translate-text only supports input files with extensions 'txt', 'html', 'htm', 'srt' or 'vtt'`)
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'TextTranslationOptions')
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault)
await checkOutputFilenames(outputFilenames, false, true, true)
const {
text,
translatedText,
translationPairs,
sourceLanguage,
targetLanguage,
} = await API.translateText(inputText, options)
if (outputFilenames.length > 0) {
logger.start('\nWrite output files')
for (const outputFilename of outputFilenames) {
if (isPlaceholderFilePath(outputFilename)) {
continue
}
const fileSaver = getFileSaver(outputFilename, allowOverwrite)
await fileSaver(getEmptyRawAudio(1, 16000), translationPairs as any as Timeline, translatedText, undefined)
}
logger.end()
} else {
logger.log(``)
logger.log(translatedText)
}
}
export async function translateSpeech(operationData: CLIOperationData) {
const logger = new Logger()
const { operationArgs, operationOptionsLookup, cliOptions } = operationData
const inputFilename = operationArgs[0]
const outputFilenames = operationArgs.slice(1)
if (inputFilename == undefined) {
throw new Error(`translate-speech requires an argument containing the input file path.`)
}
if (!existsSync(inputFilename)) {
throw new Error(`The given input file '${inputFilename}' was not found.`)
}
if (cliOptions.play == null) {
cliOptions.play = outputFilenames.length === 0
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'SpeechTranslationOptions')
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault)
await checkOutputFilenames(outputFilenames, true, true, true)
const {
transcript,
timeline,
wordTimeline,
sourceLanguage,
targetLanguage,
inputRawAudio,
isolatedRawAudio,
backgroundRawAudio
} = await API.translateSpeech(inputFilename, options)
if (outputFilenames.length > 0) {
logger.start('\nWrite output files')
}
for (const outputFilename of outputFilenames) {
if (isPlaceholderFilePath(outputFilename)) {
continue
}
const fileSaver = getFileSaver(outputFilename, allowOverwrite)
await fileSaver(inputRawAudio, timeline, transcript, options.subtitles)
await writeSourceSeparationOutputIfNeeded(outputFilename, isolatedRawAudio, backgroundRawAudio, allowOverwrite, true)
}
logger.end()
if (cliOptions.play) {
let audioToPlay: RawAudio
if (isolatedRawAudio) {
audioToPlay = isolatedRawAudio
} else {
audioToPlay = inputRawAudio
}
const normalizedAudioToPlay = normalizeAudioLevel(audioToPlay)
let transcriptToPlay: string
let timelineToPlay: Timeline
if (wordTimeline) {
transcriptToPlay = transcript
timelineToPlay = wordTimeline
} else {
timelineToPlay = timeline.map(entry => ({
type: 'word',
text: entry.text.trim(),
startTime: entry.startTime,
endTime: entry.endTime
}))
transcriptToPlay = ''
for (const entry of timelineToPlay) {
transcriptToPlay += entry.text
transcriptToPlay += ' '
}
transcriptToPlay = transcriptToPlay.trim()
}
await playAudioWithWordTimeline(normalizedAudioToPlay, timelineToPlay, transcriptToPlay, cliOptions.player)
}
}
export async function detectLanguage(operationData: CLIOperationData, mode: 'speech' | 'text' | 'auto') {
const logger = new Logger()
const { operationArgs, operationOptionsLookup, cliOptions } = operationData
const inputFilePath = operationArgs[0]
const outputFilenames = operationArgs.slice(1)
if (!existsSync(inputFilePath)) {
throw new Error(`The given input file '${inputFilePath}' was not found.`)
}
const inputFileExtension = getLowercaseFileExtension(inputFilePath)
const supportedInputTextFormats = ['txt', 'srt', 'vtt']
let results: API.LanguageDetectionResults
let allowOverwrite: boolean
if (mode == 'text' || (mode == 'auto' && supportedInputTextFormats.includes(inputFileExtension))) {
if (inputFilePath == undefined) {
throw new Error(`detect-text-language requires an argument containing the input file path.`)
}
if (!supportedInputTextFormats.includes(inputFileExtension)) {
throw new Error(`'detect-text-language' doesn't support input file extension '${inputFileExtension}'`)
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'TextLanguageDetectionOptions')
allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault)
await checkOutputFilenames(outputFilenames, false, true, false)
let text = await readFileAsUtf8(inputFilePath)
if (inputFileExtension == 'srt' || inputFileExtension == 'vtt') {
text = subtitlesToText(text)
}
const { detectedLanguage, detectedLanguageProbabilities } = await API.detectTextLanguage(text, options)
results = detectedLanguageProbabilities
} else {
if (inputFilePath == undefined) {
throw new Error(`detect-speech-language requires an argument containing the input audio file path.`)
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'SpeechLanguageDetectionOptions')
allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault)
await checkOutputFilenames(outputFilenames, false, true, false)
const { detectedLanguage, detectedLanguageProbabilities } = await API.detectSpeechLanguage(inputFilePath, options)
results = detectedLanguageProbabilities
}
if (outputFilenames.length > 0) {
logger.start('\nWrite output files')
const resultsAsText = results.map(result => `${formatLanguageCodeWithName(result.language)}: ${result.probability.toFixed(5)}`).join('\n')
for (const outputFilename of outputFilenames) {
const fileSaver = getFileSaver(outputFilename, allowOverwrite)
await fileSaver(getEmptyRawAudio(0, 0), results as any, resultsAsText)
}
} else {
const resultsAsText = results.slice(0, 10).map(result => `${formatLanguageCodeWithName(result.language)}: ${result.probability.toFixed(5)}`).join('\n')
logger.log('', 'output')
logger.log(resultsAsText, 'output')
}
logger.end()
}
export async function detectVoiceActivity(operationData: CLIOperationData) {
const logger = new Logger()
const { operationArgs, operationOptionsLookup, cliOptions } = operationData
const audioFilename = operationArgs[0]
const outputFilenames = operationArgs.slice(1)
if (audioFilename == undefined) {
throw new Error(`detect-voice-activity requires an argument containing the audio file path.`)
}
if (!existsSync(audioFilename)) {
throw new Error(`The given source audio file '${audioFilename}' was not found.`)
}
if (cliOptions.play == null) {
cliOptions.play = outputFilenames.length === 0
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'VADOptions')
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault)
await checkOutputFilenames(outputFilenames, true, true, true)
let { timeline, verboseTimeline, inputRawAudio, croppedRawAudio } = await API.detectVoiceActivity(audioFilename, options)
if (outputFilenames.length > 0) {
logger.start('\nWrite output files')
}
for (const outputFilename of outputFilenames) {
if (isPlaceholderFilePath(outputFilename)) {
continue
}
const fileSaver = getFileSaver(outputFilename, allowOverwrite)
await fileSaver(inputRawAudio, timeline, '', { maxAddedDuration: 0 })
const fileExtension = getLowercaseFileExtension(outputFilename)
if (supportedOutputMediaFileExtensions.includes(fileExtension)) {
const pathWithoutExtension = outputFilename.substring(0, outputFilename.lastIndexOf('.'))
const isolatedOutputFilePath = `${pathWithoutExtension}.cropped.${fileExtension}`
const fileSaver = getFileSaver(isolatedOutputFilePath, allowOverwrite)
await fileSaver(croppedRawAudio, [], '')
}
}
logger.end()
if (cliOptions.play) {
const normalizedAudio = normalizeAudioLevel(inputRawAudio)
const timelineToPlay = verboseTimeline.map(entry => {
return { ...entry, type: 'word' } as TimelineEntry
})
await playAudioWithWordTimeline(normalizedAudio, timelineToPlay, cliOptions.player)
}
}
export async function denoise(operationData: CLIOperationData) {
const logger = new Logger()
const { operationArgs, operationOptionsLookup, cliOptions } = operationData
const audioFilename = operationArgs[0]
const outputFilenames = operationArgs.slice(1)
if (audioFilename == undefined) {
throw new Error(`'denoise' requires an argument containing the audio file path.`)
}
if (!existsSync(audioFilename)) {
throw new Error(`The given source audio file '${audioFilename}' was not found.`)
}
if (cliOptions.play == null) {
cliOptions.play = outputFilenames.length === 0
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'DenoisingOptions')
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault)
await checkOutputFilenames(outputFilenames, true, false, false)
const { denoisedAudio } = await API.denoise(audioFilename, options)
if (outputFilenames.length > 0) {
logger.start('\nWrite output files')
}
for (const outputFilename of outputFilenames) {
const fileSaver = getFileSaver(outputFilename, allowOverwrite)
await fileSaver(denoisedAudio, [], '')
}
logger.end()
if (cliOptions.play) {
await playAudioSamplesWithKeyboardControls(denoisedAudio, cliOptions.player)
}
}
export async function isolate(operationData: CLIOperationData) {
const logger = new Logger()
const { operationArgs, operationOptionsLookup, cliOptions } = operationData
const audioFilename = operationArgs[0]
const outputFilenames = operationArgs.slice(1)
if (audioFilename == undefined) {
throw new Error(`'isolate' requires an argument containing the audio file path.`)
}
if (!existsSync(audioFilename)) {
throw new Error(`The given source audio file '${audioFilename}' was not found.`)
}
if (cliOptions.play == null) {
cliOptions.play = outputFilenames.length === 0
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'SourceSeparationOptions')
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault)
await checkOutputFilenames(outputFilenames, true, false, false)
const { inputRawAudio, isolatedRawAudio, backgroundRawAudio } = await API.isolate(audioFilename, options)
if (outputFilenames.length > 0) {
logger.start('\nWrite output files')
}
for (let outputFilename of outputFilenames) {
await writeSourceSeparationOutputIfNeeded(outputFilename, isolatedRawAudio, backgroundRawAudio, allowOverwrite, false)
}
logger.end()
if (cliOptions.play) {
await playAudioSamplesWithKeyboardControls(isolatedRawAudio, cliOptions.player)
}
}
export async function listEngines(operationData: CLIOperationData) {
const logger = new Logger()
const { operationArgs } = operationData
const targetOperation = operationArgs[0]
if (!targetOperation) {
throw new Error(`The 'list-engines' operation requires an argument specifying the operation to list engines for, like 'echogarden list-engines transcribe'.`)
}
let engines: API.EngineMetadata[]
switch (targetOperation) {
case 'speak':
case 'speak-file':
case 'speak-url':
case 'speak-wikipedia': {
engines = API.synthesisEngines
break
}
case 'list-voices': {
engines = API.synthesisEngines
break
}
case 'transcribe': {
engines = API.recognitionEngines
break
}
case 'align': {
engines = API.alignmentEngines
break
}
case 'align-translation': {
engines = API.translationAlignmentEngines
break
}
case 'translate-text': {
engines = API.textTranslationEngines
break
}
case 'translate-speech': {
engines = API.speechTranslationEngines
break
}
case 'detect-language': {
engines = [...API.speechLanguageDetectionEngines, ...API.textLanguageDetectionEngines]
break
}
case 'detect-speech-language': {
engines = API.speechLanguageDetectionEngines
break
}
case 'detect-text-language': {
engines = API.textLanguageDetectionEngines
break
}
case 'detect-voice-activity': {
engines = API.vadEngines
break
}
case 'denoise': {
engines = API.denoisingEngines
break
}
case 'isolate': {
engines = API.sourceSeparationEngines
break
}
case 'list-engines':
case 'install':
case 'uninstall':
case 'list-packages': {
throw new Error(`The operation '${targetOperation}' is not associated with a list of engines.`)
}
default: {
throw new Error(`Unrecognized operation name: '${targetOperation}'`)
}
}
for (const [index, engine] of engines.entries()) {
logger.logTitledMessage('Identifier', chalk.magentaBright(engine.id), undefined, 'output')
logger.logTitledMessage('Name', engine.name, undefined, 'output')
logger.logTitledMessage('Description', engine.description, undefined, 'output')
logger.logTitledMessage('Type', engine.type, undefined, 'output')
if (index < engines.length - 1) {
logger.log('', 'output')
}
}
}
export async function listTTSVoices(operationData: CLIOperationData) {
const logger = new Logger()
const { operationArgs, operationOptionsLookup, cliOptions } = operationData
const targetEngine = operationArgs[0]
const outputFilenames = operationArgs.slice(1)
if (!targetEngine) {
const optionsSchema = await getOptionsSchema()
const { enum: ttsEnginesEnum } = getOptionTypeFromSchema(['VoiceListRequestOptions', 'engine'], optionsSchema)
throw new Error(`list-voices requires an argument specifying one of these supported engines:\n${ttsEnginesEnum!.join(', ')}`)
}
const additionalOptionsSchema = new Map<string, SchemaTypeDefinition>()
additionalOptionsSchema.set('overwrite', { type: 'boolean' })
operationOptionsLookup.set('engine', targetEngine)
const options = await optionsLookupToTypedObject(operationOptions