UNPKG

echogarden

Version:

An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.

1,016 lines 71.2 kB
import * as API from '../api/API.js'; import { parseCLIArguments } from './CLIParser.js'; import { parseJSONAndGetType, getWithDefault, parseJson, setupUnhandledExceptionListeners, splitFilenameOnExtendedExtension, stringifyAndFormatJson } from '../utilities/Utilities.js'; import { getOptionTypeFromSchema } from './CLIOptionsSchema.js'; import { parseConfigFile, parseJSONConfigFile } from './CLIConfigFile.js'; import chalk from 'chalk'; import { applyGainDecibels, encodeRawAudioToWave, getEmptyRawAudio, getRawAudioDuration, normalizeAudioLevel, sliceRawAudioByTime } from '../audio/AudioUtilities.js'; import { subtitlesToText, timelineToSubtitles } from '../subtitles/Subtitles.js'; import { Logger, resetActiveLogger } from '../utilities/Logger.js'; import { isMainThread, parentPort } from 'node:worker_threads'; import { encodeFromChannels, getDefaultFFMpegOptionsForSpeech } from '../codecs/FFMpegTranscoder.js'; import { splitToParagraphs, splitToWords } from '../nlp/Segmentation.js'; import { playAudioSamplesWithKeyboardControls, playAudioWithWordTimeline } from '../audio/AudioPlayer.js'; import { extendDeep } from '../utilities/ObjectUtilities.js'; import { addTimeOffsetToTimeline, addWordTextOffsetsToTimelineInPlace, roundTimelineProperties } from '../utilities/Timeline.js'; import { ensureDir, existsSync, readAndParseJsonFile, readdir, readFileAsUtf8, writeFileSafe } from '../utilities/FileSystem.js'; import { formatLanguageCodeWithName, getShortLanguageCode } from '../utilities/Locale.js'; import { ensureAndGetPackagesDir, getVersionTagFromPackageName, loadPackage, resolveVersionTagForUnversionedPackageName } from '../utilities/PackageManager.js'; import { removePackage } from '../utilities/PackageManager.js'; import { appName } from '../api/Common.js'; import { startServer } from '../server/Server.js'; import { OpenPromise } from '../utilities/OpenPromise.js'; import { getDirName, getFileNameWithoutExtension, getLowercaseFileExtension, joinPath, parsePath, resolveToModuleRootDir } from '../utilities/PathUtilities.js'; import { CLIOptionsKeys } from './CLIOptions.js'; import { convertHtmlToText, formatIntegerWithLeadingZeros, formatListWithQuotedElements } from '../utilities/StringUtilities.js'; //const log = logToStderr async function startIfInWorkerThread() { if (isMainThread || !parentPort) { return; } setupUnhandledExceptionListeners(); const initOpenPromise = new OpenPromise(); parentPort.once('message', (message) => { if (message.name == 'init') { process.stderr.isTTY = message.stdErrIsTTY; process.stderr.hasColors = () => message.hasColors; process.stderr.write = (text) => { parentPort.postMessage({ name: 'writeToStdErr', text }); return true; }; initOpenPromise.resolve(); } }); await initOpenPromise.promise; start(process.argv.slice(2)); } export async function start(processArgs) { const logger = new Logger(); const operationData = { operation: '', operationArgs: [], globalOptions: {}, cliOptions: {}, operationOptionsLookup: new Map(), }; try { const packageData = await readAndParseJsonFile(resolveToModuleRootDir('package.json')); logger.log(chalk.magentaBright(`Echogarden v${packageData.version}\n`)); const operation = processArgs[0]; if (!operation || operation == 'help') { logger.log(`Supported operations:\n\n${help.join('\n')}`); process.exit(0); } if (operation == '--help' || operation == '-h') { logger.log(`There's no operation called '${operation}'. Did you mean to run 'echogarden help'?`); process.exit(1); } if (operation.startsWith('-')) { logger.log(`Operation name '${operation}' is invalid. It cannot start with a hyphen.`); process.exit(1); } const { operationArgs, parsedArgumentsLookup } = parseCLIArguments(processArgs.slice(1)); const globalOptionsLookup = new Map(); const cliOptionsLookup = new Map(); const operationsOptionsLookup = new Map(); if (!parsedArgumentsLookup.has('config')) { const defaultConfigFile = `./${appName}.config`; const defaultJsonConfigFile = defaultConfigFile + '.json'; if (existsSync(defaultConfigFile)) { parsedArgumentsLookup.set('config', defaultConfigFile); } else if (existsSync(defaultJsonConfigFile)) { parsedArgumentsLookup.set('config', defaultJsonConfigFile); } } if (parsedArgumentsLookup.has('config')) { const configFilePath = parsedArgumentsLookup.get('config'); parsedArgumentsLookup.delete('config'); let parsedConfigFile; if (configFilePath.endsWith('.config')) { parsedConfigFile = await parseConfigFile(configFilePath); } else if (configFilePath.endsWith('.config.json')) { parsedConfigFile = await parseJSONConfigFile(configFilePath); } else { throw new Error(`Specified config file '${configFilePath}' doesn't have a supported extension. Should be either '.config' or '.config.json'`); } let sectionName = operation; if (sectionName.startsWith('speak-')) { sectionName = 'speak'; } if (parsedConfigFile.has('global')) { for (const [key, value] of parsedConfigFile.get('global')) { globalOptionsLookup.set(key, value); } } if (parsedConfigFile.has('cli')) { for (const [key, value] of parsedConfigFile.get('cli')) { cliOptionsLookup.set(key, value); } } if (parsedConfigFile.has(sectionName)) { for (const [key, value] of parsedConfigFile.get(sectionName)) { operationsOptionsLookup.set(key, value); } } } const globalOptionsKeys = API.listGlobalOptions(); const cliOptionsKeys = CLIOptionsKeys; for (const [key, value] of parsedArgumentsLookup) { if (globalOptionsKeys.includes(key)) { globalOptionsLookup.set(key, value); } else if (cliOptionsKeys.includes(key)) { cliOptionsLookup.set(key, value); } else { operationsOptionsLookup.set(key, value); } } operationData.operation = operation; operationData.operationArgs = operationArgs; operationData.globalOptions = await optionsLookupToTypedObject(globalOptionsLookup, 'GlobalOptions'); operationData.cliOptions = await optionsLookupToTypedObject(cliOptionsLookup, 'CLIOptions'); operationData.operationOptionsLookup = operationsOptionsLookup; } catch (e) { resetActiveLogger(); logger.logTitledMessage(`Error`, e.message, chalk.redBright, 'error'); process.exit(1); } for (const key in operationData.globalOptions) { const value = operationData.globalOptions[key]; API.setGlobalOption(key, value); } const debugMode = operationData.cliOptions.debug || false; try { await startWithArgs(operationData); } catch (e) { resetActiveLogger(); if (debugMode) { logger.log(e, 'error'); } else { logger.logTitledMessage(`Error`, e.message, chalk.redBright, 'error'); } process.exit(1); } process.exit(0); } const executableName = `${chalk.cyanBright('echogarden')}`; const help = [ `${executableName} ${chalk.magentaBright('speak')} text [output files...] [options...]`, ` Speak the given text\n`, `${executableName} ${chalk.magentaBright('speak-file')} inputFile [output files...] [options...]`, ` Speak the given text file\n`, `${executableName} ${chalk.magentaBright('speak-url')} url [output files...] [options...]`, ` Speak the HTML document on the given URL\n`, `${executableName} ${chalk.magentaBright('speak-wikipedia')} articleName [output files...] [options...]`, ` Speak the given Wikipedia article. Language edition can be specified by --language=<langCode>\n`, `${executableName} ${chalk.magentaBright('transcribe')} audioFile [output files...] [options...]`, ` Transcribe a spoken audio file\n`, `${executableName} ${chalk.magentaBright('align')} audioFile transcriptFile [output files...] [options...]`, ` Align spoken audio file to its transcript\n`, `${executableName} ${chalk.magentaBright('translate-text')} inputFile [output files...] [options...]`, ` Translate text to a different language\n`, `${executableName} ${chalk.magentaBright('translate-speech')} audioFile [output files...] [options...]`, ` Transcribe spoken audio file directly to a different language\n`, `${executableName} ${chalk.magentaBright('align-translation')} audioFile translatedTranscriptFile [output files...] [options...]`, ` Align spoken audio file to its translated transcript\n`, `${executableName} ${chalk.magentaBright('align-transcript-and-translation')} audioFile transcriptFile translatedTranscriptFile [output files...] [options...]`, ` Align spoken audio file to both its transcript and its translated transcript using a two-stage approach.\n`, `${executableName} ${chalk.magentaBright('align-timeline-translation')} timelineFile translatedFile [output files...] [options...]`, ` Align a given timeline file to its translated text\n`, `${executableName} ${chalk.magentaBright('detect-text-language')} inputFile [output files...] [options...]`, ` Detect language of textual file\n`, `${executableName} ${chalk.magentaBright('detect-speech-language')} audioFile [output files...] [options...]`, ` Detect language of spoken audio file\n`, `${executableName} ${chalk.magentaBright('detect-voice-activity')} audioFile [output files...] [options...]`, ` Detect voice activity in audio file\n`, `${executableName} ${chalk.magentaBright('denoise')} audioFile [output files...] [options...]`, ` Apply speech denoising to audio file\n`, `${executableName} ${chalk.magentaBright('isolate')} audioFile [output files...] [options...]`, ` Extract isolated voice track from an audio file\n`, `${executableName} ${chalk.magentaBright('list-engines')} operation`, ` List available engines for the specified operation\n`, `${executableName} ${chalk.magentaBright('list-voices')} tts-engine [output files...] [options...]`, ` List available voices for the specified TTS engine\n`, `${executableName} ${chalk.magentaBright('install')} [package names...] [options...]`, ` Install one or more Echogarden packages\n`, `${executableName} ${chalk.magentaBright('uninstall')} [package names...] [options...]`, ` Uninstall one or more Echogarden packages\n`, `${executableName} ${chalk.magentaBright('list-packages')} [options...]`, ` List installed Echogarden packages\n`, `${executableName} ${chalk.magentaBright('serve')} [options...]`, ` Start a server\n`, `Options reference: ${chalk.blueBright('https://bit.ly/echogarden-options')}` ]; async function startWithArgs(operationData) { const logger = new Logger(); switch (operationData.operation) { case 'speak': case 'speak-file': case 'speak-url': case 'speak-wikipedia': { await speak(operationData); break; } case 'transcribe': { await transcribe(operationData); break; } case 'align': { await align(operationData); break; } case 'translate-text': { await translateText(operationData); break; } case 'translate-speech': { await translateSpeech(operationData); break; } case 'align-translation': { await alignTranslation(operationData); break; } case 'align-transcript-and-translation': { await alignTranscriptAndTranslation(operationData); break; } case 'align-timeline-translation': { await alignTimelineTranslation(operationData); break; } case 'detect-language': { await detectLanguage(operationData, 'auto'); break; } case 'detect-speech-language': { await detectLanguage(operationData, 'speech'); break; } case 'detect-text-language': { await detectLanguage(operationData, 'text'); break; } case 'detect-voice-activity': { await detectVoiceActivity(operationData); break; } case 'denoise': { await denoise(operationData); break; } case 'isolate': { await isolate(operationData); break; } case 'list-engines': { await listEngines(operationData); break; } case 'list-voices': { await listTTSVoices(operationData); break; } case 'install': { await installPackages(operationData); break; } case 'uninstall': { await uninstallPackages(operationData); break; } case 'list-packages': { await listPackages(operationData); break; } case 'serve': { await serve(operationData); break; } default: { logger.logTitledMessage(`Unknown operation`, operationData.operation, chalk.redBright, 'error'); process.exit(1); } } } export async function speak(operationData) { const logger = new Logger(); const { operationArgs, operation, operationOptionsLookup, cliOptions } = operationData; const mainArg = operationArgs[0]; const outputFilenames = operationArgs.slice(1); if (mainArg == undefined) { if (operation == 'speak') { throw new Error(`'speak' requires an argument containing the text to speak.`); } else if (operation == 'speak-file') { throw new Error(`'speak-file' requires an argument containing the file to speak.`); } else if (operation == 'speak-url') { throw new Error(`'speak-url' requires an argument containing the url to speak.`); } else if (operation == 'speak-wikipedia') { throw new Error(`'speak-wikipedia' requires an argument containing the name of the Wikipedia article to speak.`); } return; } const additionalOptionsSchema = new Map(); additionalOptionsSchema.set('play', { type: 'boolean' }); additionalOptionsSchema.set('overwrite', { type: 'boolean' }); if (cliOptions.play == null) { cliOptions.play = outputFilenames.length === 0; } const options = await optionsLookupToTypedObject(operationOptionsLookup, 'SynthesisOptions', additionalOptionsSchema); const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault); const { includesPlaceholderPattern } = await checkOutputFilenames(outputFilenames, true, true, true); let plainText = undefined; let textSegments; const plainTextParagraphBreaks = options.plainText?.paragraphBreaks || API.defaultSynthesisOptions.plainText.paragraphBreaks; const plainTextWhitespace = options.plainText?.whitespace || API.defaultSynthesisOptions.plainText.whitespace; if (operation == 'speak') { if (options.ssml) { textSegments = [mainArg]; } else { textSegments = splitToParagraphs(mainArg, plainTextParagraphBreaks, plainTextWhitespace); } plainText = mainArg; } else if (operation == 'speak-file') { const sourceFile = mainArg; if (!existsSync(sourceFile)) { throw new Error(`The given source file '${sourceFile}' was not found.`); } const sourceFileExtension = getLowercaseFileExtension(sourceFile); const fileContent = await readFileAsUtf8(sourceFile); if (options.ssml && sourceFileExtension != 'xml' && sourceFileExtension != 'ssml') { throw new Error(`SSML option is set, but source file doesn't have an 'xml' or 'ssml' extension.`); } if (sourceFileExtension == 'txt') { textSegments = splitToParagraphs(fileContent, plainTextParagraphBreaks, plainTextWhitespace); plainText = fileContent; } else if (sourceFileExtension == 'html' || sourceFileExtension == 'htm') { const textContent = await convertHtmlToText(fileContent); textSegments = splitToParagraphs(textContent, 'single', 'preserve'); } else if (sourceFileExtension == 'srt' || sourceFileExtension == 'vtt') { const fileContent = await readFileAsUtf8(sourceFile); //textSegments = subtitlesToTimeline(fileContent).map(entry => entry.text) textSegments = [subtitlesToText(fileContent)]; } else if (sourceFileExtension == 'xml' || sourceFileExtension == 'ssml') { options.ssml = true; textSegments = [fileContent]; } else { throw new Error(`'speak-file' only supports inputs with extensions 'txt', 'html', 'htm', 'xml', 'ssml', 'srt', 'vtt'`); } } else if (operation == 'speak-url') { if (options.ssml) { throw new Error(`speak-url doesn't accept SSML inputs`); } const url = mainArg; if (!url.startsWith('http://') && !url.startsWith('https://')) { throw new Error(`'${url}' is not a valid URL. Only 'http://' and 'https://' protocols are supported`); } const { fetchDocumentText } = await import('../utilities/WebReader.js'); const textContent = await fetchDocumentText(url); textSegments = splitToParagraphs(textContent, 'single', 'preserve'); } else if (operation == 'speak-wikipedia') { if (options.ssml) { throw new Error(`speak-wikipedia doesn't provide SSML inputs`); } const { parseWikipediaArticle } = await import('../utilities/WikipediaReader.js'); if (!options.language) { options.language = 'en'; } textSegments = await parseWikipediaArticle(mainArg, getShortLanguageCode(options.language)); } else { throw new Error(`Invalid operation specified: '${operation}'`); } async function onSegment(segmentData) { if (includesPlaceholderPattern) { logger.start('Write output files for segment'); } await writeOutputFilesForSegment(outputFilenames, segmentData.index, segmentData.total, segmentData.audio, segmentData.timeline, segmentData.transcript, segmentData.language, allowOverwrite); logger.end(); if (cliOptions.play) { let gainAmount = -3 - segmentData.peakDecibelsSoFar; //gainAmount = Math.min(gainAmount, 0) const audioWithAddedGain = applyGainDecibels(segmentData.audio, gainAmount); const segmentWordTimeline = segmentData.timeline.flatMap(sentenceTimeline => sentenceTimeline.timeline); await playAudioWithWordTimeline(audioWithAddedGain, segmentWordTimeline, segmentData.transcript, cliOptions.player); } } if (options.outputAudioFormat?.codec) { options.outputAudioFormat.codec = undefined; } const { audio: synthesizedAudio, timeline } = await API.synthesize(textSegments, options, onSegment, undefined); if (plainText) { addWordTextOffsetsToTimelineInPlace(timeline, plainText); } if (outputFilenames.length > 0) { logger.start('\nWrite output files'); } for (const outputFilename of outputFilenames) { if (isPlaceholderFilePath(outputFilename)) { continue; } const fileSaver = getFileSaver(outputFilename, allowOverwrite); await fileSaver(synthesizedAudio, timeline, textSegments.join('\n\n'), options.subtitles); } logger.end(); } export async function transcribe(operationData) { const logger = new Logger(); const { operationArgs, operationOptionsLookup, cliOptions } = operationData; const sourceFilename = operationArgs[0]; const outputFilenames = operationArgs.slice(1); if (sourceFilename == undefined) { throw new Error(`'transcribe' requires an argument containing the source file name.`); } if (!existsSync(sourceFilename)) { throw new Error(`The given source audio file '${sourceFilename}' was not found.`); } if (cliOptions.play == null) { cliOptions.play = outputFilenames.length === 0; } const options = await optionsLookupToTypedObject(operationOptionsLookup, 'RecognitionOptions'); const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault); const { includesPlaceholderPattern } = await checkOutputFilenames(outputFilenames, true, true, true); const { transcript, timeline, wordTimeline, language, inputRawAudio, isolatedRawAudio, backgroundRawAudio } = await API.recognize(sourceFilename, options); if (outputFilenames.length > 0) { logger.start('\nWrite output files'); } for (const outputFilename of outputFilenames) { if (isPlaceholderFilePath(outputFilename)) { continue; } const fileSaver = getFileSaver(outputFilename, allowOverwrite); await fileSaver(inputRawAudio, timeline, transcript, options.subtitles); await writeSourceSeparationOutputIfNeeded(outputFilename, isolatedRawAudio, backgroundRawAudio, allowOverwrite, true); } logger.end(); if (cliOptions.play) { let audioToPlay; if (isolatedRawAudio) { audioToPlay = isolatedRawAudio; } else { audioToPlay = inputRawAudio; } const normalizedAudioToPlay = normalizeAudioLevel(audioToPlay); await playAudioWithWordTimeline(normalizedAudioToPlay, wordTimeline, transcript, cliOptions.player); } } export async function align(operationData) { const logger = new Logger(); const { operationArgs, operationOptionsLookup, cliOptions } = operationData; const audioFilename = operationArgs[0]; const outputFilenames = operationArgs.slice(2); if (audioFilename == undefined) { throw new Error(`align requires an argument containing the audio file path.`); } if (!existsSync(audioFilename)) { throw new Error(`The given source file '${audioFilename}' was not found.`); } const alignmentReferenceFile = operationArgs[1]; if (alignmentReferenceFile == undefined) { throw new Error(`align requires a second argument containing the alignment reference file path.`); } if (!existsSync(alignmentReferenceFile)) { throw new Error(`The given reference file '${alignmentReferenceFile}' was not found.`); } const referenceFileExtension = getLowercaseFileExtension(alignmentReferenceFile); const fileContent = await readFileAsUtf8(alignmentReferenceFile); let text; if (referenceFileExtension == 'txt') { text = fileContent; } else if (referenceFileExtension == 'html' || referenceFileExtension == 'htm') { text = await convertHtmlToText(fileContent); } else if (referenceFileExtension == 'srt' || referenceFileExtension == 'vtt') { text = subtitlesToText(fileContent); } else { throw new Error(`align only supports reference files with extensions 'txt', 'html', 'htm', 'srt' or 'vtt'`); } if (cliOptions.play == null) { cliOptions.play = outputFilenames.length === 0; } const options = await optionsLookupToTypedObject(operationOptionsLookup, 'AlignmentOptions'); const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault); const { includesPlaceholderPattern } = await checkOutputFilenames(outputFilenames, true, true, true); const { timeline, wordTimeline, transcript, language, inputRawAudio, isolatedRawAudio, backgroundRawAudio } = await API.align(audioFilename, text, options); if (outputFilenames.length > 0) { logger.start('\nWrite output files'); } if (includesPlaceholderPattern) { for (let segmentIndex = 0; segmentIndex < timeline.length; segmentIndex++) { const segmentEntry = timeline[segmentIndex]; const segmentAudio = sliceRawAudioByTime(inputRawAudio, segmentEntry.startTime, segmentEntry.endTime); const sentenceTimeline = addTimeOffsetToTimeline(segmentEntry.timeline, -segmentEntry.startTime); await writeOutputFilesForSegment(outputFilenames, segmentIndex, timeline.length, segmentAudio, sentenceTimeline, segmentEntry.text, language, allowOverwrite); } } for (const outputFilename of outputFilenames) { if (isPlaceholderFilePath(outputFilename)) { continue; } const fileSaver = getFileSaver(outputFilename, allowOverwrite); await fileSaver(inputRawAudio, timeline, transcript, options.subtitles); await writeSourceSeparationOutputIfNeeded(outputFilename, isolatedRawAudio, backgroundRawAudio, allowOverwrite, true); } logger.end(); if (cliOptions.play) { let audioToPlay; if (isolatedRawAudio) { audioToPlay = isolatedRawAudio; } else { audioToPlay = inputRawAudio; } const normalizedAudioToPlay = normalizeAudioLevel(audioToPlay); await playAudioWithWordTimeline(normalizedAudioToPlay, wordTimeline, transcript, cliOptions.player); } } export async function alignTranslation(operationData) { const logger = new Logger(); const { operationArgs, operationOptionsLookup, cliOptions } = operationData; const audioFilename = operationArgs[0]; const outputFilenames = operationArgs.slice(2); if (audioFilename == undefined) { throw new Error(`align-translation requires a first argument containing the audio file path.`); } if (!existsSync(audioFilename)) { throw new Error(`The given source file '${audioFilename}' was not found.`); } const alignmentReferenceFile = operationArgs[1]; if (alignmentReferenceFile == undefined) { throw new Error(`align-translation requires a second argument containing the translated reference file path.`); } if (!existsSync(alignmentReferenceFile)) { throw new Error(`The given reference file '${alignmentReferenceFile}' was not found.`); } const referenceFileExtension = getLowercaseFileExtension(alignmentReferenceFile); const fileContent = await readFileAsUtf8(alignmentReferenceFile); let text; if (referenceFileExtension == 'txt') { text = fileContent; } else if (referenceFileExtension == 'html' || referenceFileExtension == 'htm') { text = await convertHtmlToText(fileContent); } else if (referenceFileExtension == 'srt' || referenceFileExtension == 'vtt') { text = subtitlesToText(fileContent); } else { throw new Error(`align-translation only supports reference files with extensions 'txt', 'html', 'htm', 'srt' or 'vtt'`); } if (cliOptions.play == null) { cliOptions.play = outputFilenames.length === 0; } const options = await optionsLookupToTypedObject(operationOptionsLookup, 'TranslationAlignmentOptions'); const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault); const { includesPlaceholderPattern } = await checkOutputFilenames(outputFilenames, true, true, true); const { timeline, wordTimeline, translatedTranscript, sourceLanguage, targetLanguage, inputRawAudio, isolatedRawAudio, backgroundRawAudio } = await API.alignTranslation(audioFilename, text, options); if (outputFilenames.length > 0) { logger.start('\nWrite output files'); } if (includesPlaceholderPattern) { for (let segmentIndex = 0; segmentIndex < timeline.length; segmentIndex++) { const segmentEntry = timeline[segmentIndex]; const segmentAudio = sliceRawAudioByTime(inputRawAudio, segmentEntry.startTime, segmentEntry.endTime); const sentenceTimeline = addTimeOffsetToTimeline(segmentEntry.timeline, -segmentEntry.startTime); await writeOutputFilesForSegment(outputFilenames, segmentIndex, timeline.length, segmentAudio, sentenceTimeline, segmentEntry.text, targetLanguage, allowOverwrite); } } for (const outputFilename of outputFilenames) { if (isPlaceholderFilePath(outputFilename)) { continue; } const fileSaver = getFileSaver(outputFilename, allowOverwrite); await fileSaver(inputRawAudio, timeline, translatedTranscript, options.subtitles); await writeSourceSeparationOutputIfNeeded(outputFilename, isolatedRawAudio, backgroundRawAudio, allowOverwrite, true); } logger.end(); if (cliOptions.play) { let audioToPlay; if (isolatedRawAudio) { audioToPlay = isolatedRawAudio; } else { audioToPlay = inputRawAudio; } const normalizedAudioToPlay = normalizeAudioLevel(audioToPlay); await playAudioWithWordTimeline(normalizedAudioToPlay, wordTimeline, translatedTranscript, cliOptions.player); } } export async function alignTranscriptAndTranslation(operationData) { const logger = new Logger(); const { operationArgs, operationOptionsLookup, cliOptions } = operationData; const audioFilename = operationArgs[0]; const outputFilenames = operationArgs.slice(3); if (audioFilename == undefined) { throw new Error(`align-transcript-and-translation requires a first argument containing the audio file path.`); } if (!existsSync(audioFilename)) { throw new Error(`The given source file '${audioFilename}' was not found.`); } const nativeTranscriptFilePath = operationArgs[1]; if (nativeTranscriptFilePath == undefined) { throw new Error(`align-transcript-and-translation requires a second argument containing the native language transcript file path.`); } if (!existsSync(nativeTranscriptFilePath)) { throw new Error(`The given transcript file '${nativeTranscriptFilePath}' was not found.`); } const translatedTranscriptFilePath = operationArgs[2]; if (translatedTranscriptFilePath == undefined) { throw new Error(`align-transcript-and-translation requires a third argument containing the translated language transcript file path.`); } if (!existsSync(translatedTranscriptFilePath)) { throw new Error(`The given translated transcript file '${nativeTranscriptFilePath}' was not found.`); } let transcript; { const nativeTranscriptFileExtension = getLowercaseFileExtension(nativeTranscriptFilePath); const fileContent = await readFileAsUtf8(nativeTranscriptFilePath); if (nativeTranscriptFileExtension == 'txt') { transcript = fileContent; } else if (nativeTranscriptFileExtension == 'html' || nativeTranscriptFileExtension == 'htm') { transcript = await convertHtmlToText(fileContent); } else if (nativeTranscriptFileExtension == 'srt' || nativeTranscriptFileExtension == 'vtt') { transcript = subtitlesToText(fileContent); } else { throw new Error(`align-transcript-and-translation only supports transcript files with extensions 'txt', 'html', 'htm', 'srt' or 'vtt'`); } } let translatedTranscript; { const translatedTranscriptFileExtension = getLowercaseFileExtension(translatedTranscriptFilePath); const fileContent = await readFileAsUtf8(translatedTranscriptFilePath); if (translatedTranscriptFileExtension == 'txt') { translatedTranscript = fileContent; } else if (translatedTranscriptFileExtension == 'html' || translatedTranscriptFileExtension == 'htm') { translatedTranscript = await convertHtmlToText(fileContent); } else if (translatedTranscriptFileExtension == 'srt' || translatedTranscriptFileExtension == 'vtt') { translatedTranscript = subtitlesToText(fileContent); } else { throw new Error(`align-transcript-and-translation only supports transcript files with extensions 'txt', 'html', 'htm', 'srt' or 'vtt'`); } } if (cliOptions.play == null) { cliOptions.play = outputFilenames.length === 0; } const options = await optionsLookupToTypedObject(operationOptionsLookup, 'TranscriptAndTranslationAlignmentOptions'); const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault); const { includesPlaceholderPattern } = await checkOutputFilenames(outputFilenames, true, true, true); const { timeline, wordTimeline, translatedTimeline, translatedWordTimeline, sourceLanguage, targetLanguage, inputRawAudio, isolatedRawAudio, backgroundRawAudio } = await API.alignTranscriptAndTranslation(audioFilename, transcript, translatedTranscript, options); if (outputFilenames.length > 0) { logger.start('\nWrite output files'); } for (const outputFilename of outputFilenames) { if (isPlaceholderFilePath(outputFilename)) { continue; } const fileSaver = getFileSaver(outputFilename, allowOverwrite); await fileSaver(inputRawAudio, timeline, transcript, options.subtitles); await writeSourceSeparationOutputIfNeeded(outputFilename, isolatedRawAudio, backgroundRawAudio, allowOverwrite, true); const fileExtension = getLowercaseFileExtension(outputFilename); if (['json', 'txt', 'srt', 'vtt'].includes(fileExtension)) { const pathWithoutExtension = outputFilename.substring(0, outputFilename.lastIndexOf('.')); const translatedOutputPath = `${pathWithoutExtension}.translated.${fileExtension}`; const translatedFileSaver = getFileSaver(translatedOutputPath, allowOverwrite); await translatedFileSaver(inputRawAudio, translatedTimeline, translatedTranscript, options.subtitles); } } logger.end(); if (cliOptions.play) { let audioToPlay; if (isolatedRawAudio) { audioToPlay = isolatedRawAudio; } else { audioToPlay = inputRawAudio; } const normalizedAudioToPlay = normalizeAudioLevel(audioToPlay); await playAudioWithWordTimeline(normalizedAudioToPlay, translatedWordTimeline, translatedTranscript, cliOptions.player); } } export async function alignTimelineTranslation(operationData) { const logger = new Logger(); const { operationArgs, operationOptionsLookup, cliOptions } = operationData; const timelineFilename = operationArgs[0]; const outputFilenames = operationArgs.slice(2); if (timelineFilename == undefined) { throw new Error(`align-timeline-translation requires a first argument containing the timeline file path.`); } if (getLowercaseFileExtension(timelineFilename) != 'json') { throw new Error(`align-timeline-translation only supports timeline files with extension 'json'`); } if (!existsSync(timelineFilename)) { throw new Error(`The given timeline file '${timelineFilename}' was not found.`); } const timeline = await readAndParseJsonFile(timelineFilename); const translationFilePath = operationArgs[1]; if (translationFilePath == undefined) { throw new Error(`align-timeline-translation requires a second argument containing the translated reference file path.`); } if (!existsSync(translationFilePath)) { throw new Error(`The given reference file '${translationFilePath}' was not found.`); } const translationFileExtension = getLowercaseFileExtension(translationFilePath); const translationFileContent = await readFileAsUtf8(translationFilePath); let translationText; if (translationFileExtension == 'txt') { translationText = translationFileContent; } else if (translationFileExtension == 'html' || translationFileExtension == 'htm') { translationText = await convertHtmlToText(translationFileContent); } else if (translationFileExtension == 'srt' || translationFileExtension == 'vtt') { translationText = subtitlesToText(translationFileContent); } else { throw new Error(`align only supports reference files with extensions 'txt', 'html', 'htm', 'srt' or 'vtt'`); } const options = await optionsLookupToTypedObject(operationOptionsLookup, 'TimelineTranslationAlignmentOptions'); const { timeline: translationTimeline, wordTimeline: translationWordTimeline, rawAudio } = await API.alignTimelineTranslation(timeline, translationText, options); if (outputFilenames.length > 0) { logger.start('\nWrite output files'); } const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault); for (const outputFilename of outputFilenames) { if (isPlaceholderFilePath(outputFilename)) { continue; } const fileSaver = getFileSaver(outputFilename, allowOverwrite); await fileSaver(getEmptyRawAudio(1, 16000), translationTimeline, translationText, options.subtitles); } logger.end(); if (cliOptions.play && rawAudio) { const normalizedAudioToPlay = normalizeAudioLevel(rawAudio); let transcriptToPlay; let timelineToPlay; transcriptToPlay = translationText; timelineToPlay = translationWordTimeline; await playAudioWithWordTimeline(normalizedAudioToPlay, timelineToPlay, transcriptToPlay, cliOptions.player); } } export async function translateText(operationData) { const logger = new Logger(); const { operationArgs, operationOptionsLookup, cliOptions } = operationData; const inputFilename = operationArgs[0]; const outputFilenames = operationArgs.slice(1); if (inputFilename == undefined) { throw new Error(`translate-text requires an argument containing the input file path.`); } if (!existsSync(inputFilename)) { throw new Error(`The given input file '${inputFilename}' was not found.`); } const inputFileExtension = getLowercaseFileExtension(inputFilename); const inputFileContent = await readFileAsUtf8(inputFilename); let inputText; if (inputFileExtension === 'txt') { inputText = inputFileContent; } else if (inputFileExtension === 'html' || inputFileExtension === 'htm') { inputText = await convertHtmlToText(inputFileContent); } else if (inputFileExtension == 'srt' || inputFileExtension == 'vtt') { inputText = subtitlesToText(inputFileContent); } else { throw new Error(`translate-text only supports input files with extensions 'txt', 'html', 'htm', 'srt' or 'vtt'`); } const options = await optionsLookupToTypedObject(operationOptionsLookup, 'TextTranslationOptions'); const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault); await checkOutputFilenames(outputFilenames, false, true, true); const { text, translatedText, translationPairs, sourceLanguage, targetLanguage, } = await API.translateText(inputText, options); if (outputFilenames.length > 0) { logger.start('\nWrite output files'); for (const outputFilename of outputFilenames) { if (isPlaceholderFilePath(outputFilename)) { continue; } const fileSaver = getFileSaver(outputFilename, allowOverwrite); await fileSaver(getEmptyRawAudio(1, 16000), translationPairs, translatedText, undefined); } logger.end(); } else { logger.log(``); logger.log(translatedText); } } export async function translateSpeech(operationData) { const logger = new Logger(); const { operationArgs, operationOptionsLookup, cliOptions } = operationData; const inputFilename = operationArgs[0]; const outputFilenames = operationArgs.slice(1); if (inputFilename == undefined) { throw new Error(`translate-speech requires an argument containing the input file path.`); } if (!existsSync(inputFilename)) { throw new Error(`The given input file '${inputFilename}' was not found.`); } if (cliOptions.play == null) { cliOptions.play = outputFilenames.length === 0; } const options = await optionsLookupToTypedObject(operationOptionsLookup, 'SpeechTranslationOptions'); const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault); await checkOutputFilenames(outputFilenames, true, true, true); const { transcript, timeline, wordTimeline, sourceLanguage, targetLanguage, inputRawAudio, isolatedRawAudio, backgroundRawAudio } = await API.translateSpeech(inputFilename, options); if (outputFilenames.length > 0) { logger.start('\nWrite output files'); } for (const outputFilename of outputFilenames) { if (isPlaceholderFilePath(outputFilename)) { continue; } const fileSaver = getFileSaver(outputFilename, allowOverwrite); await fileSaver(inputRawAudio, timeline, transcript, options.subtitles); await writeSourceSeparationOutputIfNeeded(outputFilename, isolatedRawAudio, backgroundRawAudio, allowOverwrite, true); } logger.end(); if (cliOptions.play) { let audioToPlay; if (isolatedRawAudio) { audioToPlay = isolatedRawAudio; } else { audioToPlay = inputRawAudio; } const normalizedAudioToPlay = normalizeAudioLevel(audioToPlay); let transcriptToPlay; let timelineToPlay; if (wordTimeline) { transcriptToPlay = transcript; timelineToPlay = wordTimeline; } else { timelineToPlay = timeline.map(entry => ({ type: 'word', text: entry.text.trim(), startTime: entry.startTime, endTime: entry.endTime })); transcriptToPlay = ''; for (const entry of timelineToPlay) { transcriptToPlay += entry.text; transcriptToPlay += ' '; } transcriptToPlay = transcriptToPlay.trim(); } await playAudioWithWordTimeline(normalizedAudioToPlay, timelineToPlay, transcriptToPlay, cliOptions.player); } } export async function detectLanguage(operationData, mode) { const logger = new Logger(); const { operationArgs, operationOptionsLookup, cliOptions } = operationData; const inputFilePath = operationArgs[0]; const outputFilenames = operationArgs.slice(1); if (!existsSync(inputFilePath)) { throw new Error(`The given input file '${inputFilePath}' was not found.`); } const inputFileExtension = getLowercaseFileExtension(inputFilePath); const supportedInputTextFormats = ['txt', 'srt', 'vtt']; let results; let allowOverwrite; if (mode == 'text' || (mode == 'auto' && supportedInputTextFormats.includes(inputFileExtension))) { if (inputFilePath == undefined) { throw new Error(`detect-text-language requires an argument containing the input file path.`); } if (!supportedInputTextFormats.includes(inputFileExtension)) { throw new Error(`'detect-text-language' doesn't support input file extension '${inputFileExtension}'`); } const options = await optionsLookupToTypedObject(operationOptionsLookup, 'TextLanguageDetectionOptions'); allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault); await checkOutputFilenames(outputFilenames, false, true, false); let text = await readFileAsUtf8(inputFilePath); if (inputFileExtension == 'srt' || inputFileExtension == 'vtt') { text = subtitlesToText(text); } const { detectedLanguage, detectedLanguageProbabilities } = await API.detectTextLanguage(text, options); results = detectedLanguageProbabilities; } else { if (inputFilePath == undefined) { throw new Error(`detect-speech-language requires an argument containing the input audio file path.`); } const options = await optionsLookupToTypedObject(operationOptionsLookup, 'SpeechLanguageDetectionOptions'); allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault); await checkOutputFilenames(outputFilenames, false, true, false); const { detectedLanguage, detectedLanguageProbabilities } = await API.detectSpeechLanguage(inputFilePath, options); results = detectedLanguageProbabilities; } if (outputFilenames.length > 0) { logger.start('\nWrite output files'); const resultsAsText = results.map(result => `${formatLanguageCodeWithName(result.language)}: ${result.probability.toFixed(5)}`).join('\n'); for (const outputFilename of outputFilenames) { const fileSaver = getFileSaver(outputFilename, allowOverwrite); await fileSaver(getEmptyRawAudio(0, 0), results, resultsAsText); } } else { const resultsAsText = results.slice(0, 10).map(result => `${formatLanguageCodeWithName(result.language)}: ${result.probability.toFixed(5)}`).join('\n'); logger.log('', 'output'); logger.log(resultsAsText, 'output'); } logger.end(); } export async function detectVoiceActivity(operationData) { const logger = new Logger(); const { operationArgs, operationOptionsLookup, cliOptions } = operationData; const audioFilename = operationArgs[0]; const outputFilenames = operationArgs.slice(1); if (audioFilename == undefined) { throw new Error(`detect-voice-activity requires an argument containing the audio file path.`); } if (!existsSync(audioFilename)) { throw new Error(`The given source audio file '${audioFilename}' was not found.`); } if (cliOptions.play == null) { cliOptions.play = outputFilenames.length === 0; } const options = await optionsLookupToTypedObject(operationOptionsLookup, 'VADOptions'); const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault); await checkOutputFilenames(outputFilenames, true, true, true); let { timeline, verboseTimeline, inputRawAudio, croppedRawAudio } = await API.detectVoiceActivity(audioFilename, options); if (outputFilenames.length > 0) { logger.start('\nWrite output files'); } for (const outputFilename of outputFilenames) { if (isPlaceholderFilePath(outputFilename)) { continue; } const fileSaver = getFileSaver(outputFilename, allowOverwrite); await fileSaver(inputRawAudio, timeline, '', { maxAddedDuration: 0 }); const fileExtension = getLowercaseFileExtension(outputFilename); if (supportedOutputMediaFileExtensions.includes(fileExtension)) { const pathWithoutExtension = outputFilename.substring(0, outputFilename.lastIndexOf('.')); const isolatedOutputFilePath = `${pathWithoutExtension}.cropped.${fileExtension}`; const fileSaver = getFileSaver(isolatedOutputFilePath, allowOverwrite); await fileSaver(croppedRawAudio, [], ''); } } logger.end(); if (cliOptions.play) { const normalizedAudio = normalizeAudioLevel(inputRawAudio); const timelineToPlay = verboseTimeline.map(entry => { return { ...entry, type: 'word' }; }); await playAudioWithWordTimeline(normalizedAudio, timelineToPlay, cliOptions.player); } } export async function denoise(operationData) { const logger = new Logger(); const { operationArgs, operationOptionsLookup, cliOptions } = operationData; const audioFilename = operationArgs[0]; const outputFilenames = operationArgs.slice(1); if (audioFilename == undefined) { throw new Error(`'denoise' requires an argument containing the audio file path.`); } if (!existsSync(audioFilename)) { throw new Error(`The given source audio file '${audioFilename}' was not found.`); } if (cliOptions.play == null) { cliOptions.play = outputFilenames.length === 0; } const options = await optionsLookupToTypedObject(operationOptionsLookup, 'DenoisingOptions'); const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault); await checkOutputFilenames(outputFilenames, true, false, false); const { denoisedAudio } = await API.denoise(audioFilename, options); if (outputFilenames.length > 0) { logger.start('\nWrite output files'); } for (const outputFilename of outputFilenames) { const fileSaver = getFileSaver(outputFilename, allowOverwrite); await fileSaver(denoisedAudio, [], ''); } logger.end(); if (cliOptions.play) { await playAudioSamplesWithKeyboardControls(denoisedAudio, cliOptions.player); } } export async function isolate(operationData) { const logger = new Logger(); const { operationArgs, operationOptionsLookup, cliOptions } = operationData; const audioFilename = operationArgs