echogarden
Version:
An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.
1,016 lines • 71.2 kB
JavaScript
import * as API from '../api/API.js';
import { parseCLIArguments } from './CLIParser.js';
import { parseJSONAndGetType, getWithDefault, parseJson, setupUnhandledExceptionListeners, splitFilenameOnExtendedExtension, stringifyAndFormatJson } from '../utilities/Utilities.js';
import { getOptionTypeFromSchema } from './CLIOptionsSchema.js';
import { parseConfigFile, parseJSONConfigFile } from './CLIConfigFile.js';
import chalk from 'chalk';
import { applyGainDecibels, encodeRawAudioToWave, getEmptyRawAudio, getRawAudioDuration, normalizeAudioLevel, sliceRawAudioByTime } from '../audio/AudioUtilities.js';
import { subtitlesToText, timelineToSubtitles } from '../subtitles/Subtitles.js';
import { Logger, resetActiveLogger } from '../utilities/Logger.js';
import { isMainThread, parentPort } from 'node:worker_threads';
import { encodeFromChannels, getDefaultFFMpegOptionsForSpeech } from '../codecs/FFMpegTranscoder.js';
import { splitToParagraphs, splitToWords } from '../nlp/Segmentation.js';
import { playAudioSamplesWithKeyboardControls, playAudioWithWordTimeline } from '../audio/AudioPlayer.js';
import { extendDeep } from '../utilities/ObjectUtilities.js';
import { addTimeOffsetToTimeline, addWordTextOffsetsToTimelineInPlace, roundTimelineProperties } from '../utilities/Timeline.js';
import { ensureDir, existsSync, readAndParseJsonFile, readdir, readFileAsUtf8, writeFileSafe } from '../utilities/FileSystem.js';
import { formatLanguageCodeWithName, getShortLanguageCode } from '../utilities/Locale.js';
import { ensureAndGetPackagesDir, getVersionTagFromPackageName, loadPackage, resolveVersionTagForUnversionedPackageName } from '../utilities/PackageManager.js';
import { removePackage } from '../utilities/PackageManager.js';
import { appName } from '../api/Common.js';
import { startServer } from '../server/Server.js';
import { OpenPromise } from '../utilities/OpenPromise.js';
import { getDirName, getFileNameWithoutExtension, getLowercaseFileExtension, joinPath, parsePath, resolveToModuleRootDir } from '../utilities/PathUtilities.js';
import { CLIOptionsKeys } from './CLIOptions.js';
import { convertHtmlToText, formatIntegerWithLeadingZeros, formatListWithQuotedElements } from '../utilities/StringUtilities.js';
//const log = logToStderr
async function startIfInWorkerThread() {
if (isMainThread || !parentPort) {
return;
}
setupUnhandledExceptionListeners();
const initOpenPromise = new OpenPromise();
parentPort.once('message', (message) => {
if (message.name == 'init') {
process.stderr.isTTY = message.stdErrIsTTY;
process.stderr.hasColors = () => message.hasColors;
process.stderr.write = (text) => {
parentPort.postMessage({ name: 'writeToStdErr', text });
return true;
};
initOpenPromise.resolve();
}
});
await initOpenPromise.promise;
start(process.argv.slice(2));
}
export async function start(processArgs) {
const logger = new Logger();
const operationData = {
operation: '',
operationArgs: [],
globalOptions: {},
cliOptions: {},
operationOptionsLookup: new Map(),
};
try {
const packageData = await readAndParseJsonFile(resolveToModuleRootDir('package.json'));
logger.log(chalk.magentaBright(`Echogarden v${packageData.version}\n`));
const operation = processArgs[0];
if (!operation || operation == 'help') {
logger.log(`Supported operations:\n\n${help.join('\n')}`);
process.exit(0);
}
if (operation == '--help' || operation == '-h') {
logger.log(`There's no operation called '${operation}'. Did you mean to run 'echogarden help'?`);
process.exit(1);
}
if (operation.startsWith('-')) {
logger.log(`Operation name '${operation}' is invalid. It cannot start with a hyphen.`);
process.exit(1);
}
const { operationArgs, parsedArgumentsLookup } = parseCLIArguments(processArgs.slice(1));
const globalOptionsLookup = new Map();
const cliOptionsLookup = new Map();
const operationsOptionsLookup = new Map();
if (!parsedArgumentsLookup.has('config')) {
const defaultConfigFile = `./${appName}.config`;
const defaultJsonConfigFile = defaultConfigFile + '.json';
if (existsSync(defaultConfigFile)) {
parsedArgumentsLookup.set('config', defaultConfigFile);
}
else if (existsSync(defaultJsonConfigFile)) {
parsedArgumentsLookup.set('config', defaultJsonConfigFile);
}
}
if (parsedArgumentsLookup.has('config')) {
const configFilePath = parsedArgumentsLookup.get('config');
parsedArgumentsLookup.delete('config');
let parsedConfigFile;
if (configFilePath.endsWith('.config')) {
parsedConfigFile = await parseConfigFile(configFilePath);
}
else if (configFilePath.endsWith('.config.json')) {
parsedConfigFile = await parseJSONConfigFile(configFilePath);
}
else {
throw new Error(`Specified config file '${configFilePath}' doesn't have a supported extension. Should be either '.config' or '.config.json'`);
}
let sectionName = operation;
if (sectionName.startsWith('speak-')) {
sectionName = 'speak';
}
if (parsedConfigFile.has('global')) {
for (const [key, value] of parsedConfigFile.get('global')) {
globalOptionsLookup.set(key, value);
}
}
if (parsedConfigFile.has('cli')) {
for (const [key, value] of parsedConfigFile.get('cli')) {
cliOptionsLookup.set(key, value);
}
}
if (parsedConfigFile.has(sectionName)) {
for (const [key, value] of parsedConfigFile.get(sectionName)) {
operationsOptionsLookup.set(key, value);
}
}
}
const globalOptionsKeys = API.listGlobalOptions();
const cliOptionsKeys = CLIOptionsKeys;
for (const [key, value] of parsedArgumentsLookup) {
if (globalOptionsKeys.includes(key)) {
globalOptionsLookup.set(key, value);
}
else if (cliOptionsKeys.includes(key)) {
cliOptionsLookup.set(key, value);
}
else {
operationsOptionsLookup.set(key, value);
}
}
operationData.operation = operation;
operationData.operationArgs = operationArgs;
operationData.globalOptions = await optionsLookupToTypedObject(globalOptionsLookup, 'GlobalOptions');
operationData.cliOptions = await optionsLookupToTypedObject(cliOptionsLookup, 'CLIOptions');
operationData.operationOptionsLookup = operationsOptionsLookup;
}
catch (e) {
resetActiveLogger();
logger.logTitledMessage(`Error`, e.message, chalk.redBright, 'error');
process.exit(1);
}
for (const key in operationData.globalOptions) {
const value = operationData.globalOptions[key];
API.setGlobalOption(key, value);
}
const debugMode = operationData.cliOptions.debug || false;
try {
await startWithArgs(operationData);
}
catch (e) {
resetActiveLogger();
if (debugMode) {
logger.log(e, 'error');
}
else {
logger.logTitledMessage(`Error`, e.message, chalk.redBright, 'error');
}
process.exit(1);
}
process.exit(0);
}
const executableName = `${chalk.cyanBright('echogarden')}`;
const help = [
`${executableName} ${chalk.magentaBright('speak')} text [output files...] [options...]`,
` Speak the given text\n`,
`${executableName} ${chalk.magentaBright('speak-file')} inputFile [output files...] [options...]`,
` Speak the given text file\n`,
`${executableName} ${chalk.magentaBright('speak-url')} url [output files...] [options...]`,
` Speak the HTML document on the given URL\n`,
`${executableName} ${chalk.magentaBright('speak-wikipedia')} articleName [output files...] [options...]`,
` Speak the given Wikipedia article. Language edition can be specified by --language=<langCode>\n`,
`${executableName} ${chalk.magentaBright('transcribe')} audioFile [output files...] [options...]`,
` Transcribe a spoken audio file\n`,
`${executableName} ${chalk.magentaBright('align')} audioFile transcriptFile [output files...] [options...]`,
` Align spoken audio file to its transcript\n`,
`${executableName} ${chalk.magentaBright('translate-text')} inputFile [output files...] [options...]`,
` Translate text to a different language\n`,
`${executableName} ${chalk.magentaBright('translate-speech')} audioFile [output files...] [options...]`,
` Transcribe spoken audio file directly to a different language\n`,
`${executableName} ${chalk.magentaBright('align-translation')} audioFile translatedTranscriptFile [output files...] [options...]`,
` Align spoken audio file to its translated transcript\n`,
`${executableName} ${chalk.magentaBright('align-transcript-and-translation')} audioFile transcriptFile translatedTranscriptFile [output files...] [options...]`,
` Align spoken audio file to both its transcript and its translated transcript using a two-stage approach.\n`,
`${executableName} ${chalk.magentaBright('align-timeline-translation')} timelineFile translatedFile [output files...] [options...]`,
` Align a given timeline file to its translated text\n`,
`${executableName} ${chalk.magentaBright('detect-text-language')} inputFile [output files...] [options...]`,
` Detect language of textual file\n`,
`${executableName} ${chalk.magentaBright('detect-speech-language')} audioFile [output files...] [options...]`,
` Detect language of spoken audio file\n`,
`${executableName} ${chalk.magentaBright('detect-voice-activity')} audioFile [output files...] [options...]`,
` Detect voice activity in audio file\n`,
`${executableName} ${chalk.magentaBright('denoise')} audioFile [output files...] [options...]`,
` Apply speech denoising to audio file\n`,
`${executableName} ${chalk.magentaBright('isolate')} audioFile [output files...] [options...]`,
` Extract isolated voice track from an audio file\n`,
`${executableName} ${chalk.magentaBright('list-engines')} operation`,
` List available engines for the specified operation\n`,
`${executableName} ${chalk.magentaBright('list-voices')} tts-engine [output files...] [options...]`,
` List available voices for the specified TTS engine\n`,
`${executableName} ${chalk.magentaBright('install')} [package names...] [options...]`,
` Install one or more Echogarden packages\n`,
`${executableName} ${chalk.magentaBright('uninstall')} [package names...] [options...]`,
` Uninstall one or more Echogarden packages\n`,
`${executableName} ${chalk.magentaBright('list-packages')} [options...]`,
` List installed Echogarden packages\n`,
`${executableName} ${chalk.magentaBright('serve')} [options...]`,
` Start a server\n`,
`Options reference: ${chalk.blueBright('https://bit.ly/echogarden-options')}`
];
async function startWithArgs(operationData) {
const logger = new Logger();
switch (operationData.operation) {
case 'speak':
case 'speak-file':
case 'speak-url':
case 'speak-wikipedia': {
await speak(operationData);
break;
}
case 'transcribe': {
await transcribe(operationData);
break;
}
case 'align': {
await align(operationData);
break;
}
case 'translate-text': {
await translateText(operationData);
break;
}
case 'translate-speech': {
await translateSpeech(operationData);
break;
}
case 'align-translation': {
await alignTranslation(operationData);
break;
}
case 'align-transcript-and-translation': {
await alignTranscriptAndTranslation(operationData);
break;
}
case 'align-timeline-translation': {
await alignTimelineTranslation(operationData);
break;
}
case 'detect-language': {
await detectLanguage(operationData, 'auto');
break;
}
case 'detect-speech-language': {
await detectLanguage(operationData, 'speech');
break;
}
case 'detect-text-language': {
await detectLanguage(operationData, 'text');
break;
}
case 'detect-voice-activity': {
await detectVoiceActivity(operationData);
break;
}
case 'denoise': {
await denoise(operationData);
break;
}
case 'isolate': {
await isolate(operationData);
break;
}
case 'list-engines': {
await listEngines(operationData);
break;
}
case 'list-voices': {
await listTTSVoices(operationData);
break;
}
case 'install': {
await installPackages(operationData);
break;
}
case 'uninstall': {
await uninstallPackages(operationData);
break;
}
case 'list-packages': {
await listPackages(operationData);
break;
}
case 'serve': {
await serve(operationData);
break;
}
default: {
logger.logTitledMessage(`Unknown operation`, operationData.operation, chalk.redBright, 'error');
process.exit(1);
}
}
}
export async function speak(operationData) {
const logger = new Logger();
const { operationArgs, operation, operationOptionsLookup, cliOptions } = operationData;
const mainArg = operationArgs[0];
const outputFilenames = operationArgs.slice(1);
if (mainArg == undefined) {
if (operation == 'speak') {
throw new Error(`'speak' requires an argument containing the text to speak.`);
}
else if (operation == 'speak-file') {
throw new Error(`'speak-file' requires an argument containing the file to speak.`);
}
else if (operation == 'speak-url') {
throw new Error(`'speak-url' requires an argument containing the url to speak.`);
}
else if (operation == 'speak-wikipedia') {
throw new Error(`'speak-wikipedia' requires an argument containing the name of the Wikipedia article to speak.`);
}
return;
}
const additionalOptionsSchema = new Map();
additionalOptionsSchema.set('play', { type: 'boolean' });
additionalOptionsSchema.set('overwrite', { type: 'boolean' });
if (cliOptions.play == null) {
cliOptions.play = outputFilenames.length === 0;
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'SynthesisOptions', additionalOptionsSchema);
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault);
const { includesPlaceholderPattern } = await checkOutputFilenames(outputFilenames, true, true, true);
let plainText = undefined;
let textSegments;
const plainTextParagraphBreaks = options.plainText?.paragraphBreaks || API.defaultSynthesisOptions.plainText.paragraphBreaks;
const plainTextWhitespace = options.plainText?.whitespace || API.defaultSynthesisOptions.plainText.whitespace;
if (operation == 'speak') {
if (options.ssml) {
textSegments = [mainArg];
}
else {
textSegments = splitToParagraphs(mainArg, plainTextParagraphBreaks, plainTextWhitespace);
}
plainText = mainArg;
}
else if (operation == 'speak-file') {
const sourceFile = mainArg;
if (!existsSync(sourceFile)) {
throw new Error(`The given source file '${sourceFile}' was not found.`);
}
const sourceFileExtension = getLowercaseFileExtension(sourceFile);
const fileContent = await readFileAsUtf8(sourceFile);
if (options.ssml && sourceFileExtension != 'xml' && sourceFileExtension != 'ssml') {
throw new Error(`SSML option is set, but source file doesn't have an 'xml' or 'ssml' extension.`);
}
if (sourceFileExtension == 'txt') {
textSegments = splitToParagraphs(fileContent, plainTextParagraphBreaks, plainTextWhitespace);
plainText = fileContent;
}
else if (sourceFileExtension == 'html' || sourceFileExtension == 'htm') {
const textContent = await convertHtmlToText(fileContent);
textSegments = splitToParagraphs(textContent, 'single', 'preserve');
}
else if (sourceFileExtension == 'srt' || sourceFileExtension == 'vtt') {
const fileContent = await readFileAsUtf8(sourceFile);
//textSegments = subtitlesToTimeline(fileContent).map(entry => entry.text)
textSegments = [subtitlesToText(fileContent)];
}
else if (sourceFileExtension == 'xml' || sourceFileExtension == 'ssml') {
options.ssml = true;
textSegments = [fileContent];
}
else {
throw new Error(`'speak-file' only supports inputs with extensions 'txt', 'html', 'htm', 'xml', 'ssml', 'srt', 'vtt'`);
}
}
else if (operation == 'speak-url') {
if (options.ssml) {
throw new Error(`speak-url doesn't accept SSML inputs`);
}
const url = mainArg;
if (!url.startsWith('http://') && !url.startsWith('https://')) {
throw new Error(`'${url}' is not a valid URL. Only 'http://' and 'https://' protocols are supported`);
}
const { fetchDocumentText } = await import('../utilities/WebReader.js');
const textContent = await fetchDocumentText(url);
textSegments = splitToParagraphs(textContent, 'single', 'preserve');
}
else if (operation == 'speak-wikipedia') {
if (options.ssml) {
throw new Error(`speak-wikipedia doesn't provide SSML inputs`);
}
const { parseWikipediaArticle } = await import('../utilities/WikipediaReader.js');
if (!options.language) {
options.language = 'en';
}
textSegments = await parseWikipediaArticle(mainArg, getShortLanguageCode(options.language));
}
else {
throw new Error(`Invalid operation specified: '${operation}'`);
}
async function onSegment(segmentData) {
if (includesPlaceholderPattern) {
logger.start('Write output files for segment');
}
await writeOutputFilesForSegment(outputFilenames, segmentData.index, segmentData.total, segmentData.audio, segmentData.timeline, segmentData.transcript, segmentData.language, allowOverwrite);
logger.end();
if (cliOptions.play) {
let gainAmount = -3 - segmentData.peakDecibelsSoFar;
//gainAmount = Math.min(gainAmount, 0)
const audioWithAddedGain = applyGainDecibels(segmentData.audio, gainAmount);
const segmentWordTimeline = segmentData.timeline.flatMap(sentenceTimeline => sentenceTimeline.timeline);
await playAudioWithWordTimeline(audioWithAddedGain, segmentWordTimeline, segmentData.transcript, cliOptions.player);
}
}
if (options.outputAudioFormat?.codec) {
options.outputAudioFormat.codec = undefined;
}
const { audio: synthesizedAudio, timeline } = await API.synthesize(textSegments, options, onSegment, undefined);
if (plainText) {
addWordTextOffsetsToTimelineInPlace(timeline, plainText);
}
if (outputFilenames.length > 0) {
logger.start('\nWrite output files');
}
for (const outputFilename of outputFilenames) {
if (isPlaceholderFilePath(outputFilename)) {
continue;
}
const fileSaver = getFileSaver(outputFilename, allowOverwrite);
await fileSaver(synthesizedAudio, timeline, textSegments.join('\n\n'), options.subtitles);
}
logger.end();
}
export async function transcribe(operationData) {
const logger = new Logger();
const { operationArgs, operationOptionsLookup, cliOptions } = operationData;
const sourceFilename = operationArgs[0];
const outputFilenames = operationArgs.slice(1);
if (sourceFilename == undefined) {
throw new Error(`'transcribe' requires an argument containing the source file name.`);
}
if (!existsSync(sourceFilename)) {
throw new Error(`The given source audio file '${sourceFilename}' was not found.`);
}
if (cliOptions.play == null) {
cliOptions.play = outputFilenames.length === 0;
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'RecognitionOptions');
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault);
const { includesPlaceholderPattern } = await checkOutputFilenames(outputFilenames, true, true, true);
const { transcript, timeline, wordTimeline, language, inputRawAudio, isolatedRawAudio, backgroundRawAudio } = await API.recognize(sourceFilename, options);
if (outputFilenames.length > 0) {
logger.start('\nWrite output files');
}
for (const outputFilename of outputFilenames) {
if (isPlaceholderFilePath(outputFilename)) {
continue;
}
const fileSaver = getFileSaver(outputFilename, allowOverwrite);
await fileSaver(inputRawAudio, timeline, transcript, options.subtitles);
await writeSourceSeparationOutputIfNeeded(outputFilename, isolatedRawAudio, backgroundRawAudio, allowOverwrite, true);
}
logger.end();
if (cliOptions.play) {
let audioToPlay;
if (isolatedRawAudio) {
audioToPlay = isolatedRawAudio;
}
else {
audioToPlay = inputRawAudio;
}
const normalizedAudioToPlay = normalizeAudioLevel(audioToPlay);
await playAudioWithWordTimeline(normalizedAudioToPlay, wordTimeline, transcript, cliOptions.player);
}
}
export async function align(operationData) {
const logger = new Logger();
const { operationArgs, operationOptionsLookup, cliOptions } = operationData;
const audioFilename = operationArgs[0];
const outputFilenames = operationArgs.slice(2);
if (audioFilename == undefined) {
throw new Error(`align requires an argument containing the audio file path.`);
}
if (!existsSync(audioFilename)) {
throw new Error(`The given source file '${audioFilename}' was not found.`);
}
const alignmentReferenceFile = operationArgs[1];
if (alignmentReferenceFile == undefined) {
throw new Error(`align requires a second argument containing the alignment reference file path.`);
}
if (!existsSync(alignmentReferenceFile)) {
throw new Error(`The given reference file '${alignmentReferenceFile}' was not found.`);
}
const referenceFileExtension = getLowercaseFileExtension(alignmentReferenceFile);
const fileContent = await readFileAsUtf8(alignmentReferenceFile);
let text;
if (referenceFileExtension == 'txt') {
text = fileContent;
}
else if (referenceFileExtension == 'html' || referenceFileExtension == 'htm') {
text = await convertHtmlToText(fileContent);
}
else if (referenceFileExtension == 'srt' || referenceFileExtension == 'vtt') {
text = subtitlesToText(fileContent);
}
else {
throw new Error(`align only supports reference files with extensions 'txt', 'html', 'htm', 'srt' or 'vtt'`);
}
if (cliOptions.play == null) {
cliOptions.play = outputFilenames.length === 0;
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'AlignmentOptions');
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault);
const { includesPlaceholderPattern } = await checkOutputFilenames(outputFilenames, true, true, true);
const { timeline, wordTimeline, transcript, language, inputRawAudio, isolatedRawAudio, backgroundRawAudio } = await API.align(audioFilename, text, options);
if (outputFilenames.length > 0) {
logger.start('\nWrite output files');
}
if (includesPlaceholderPattern) {
for (let segmentIndex = 0; segmentIndex < timeline.length; segmentIndex++) {
const segmentEntry = timeline[segmentIndex];
const segmentAudio = sliceRawAudioByTime(inputRawAudio, segmentEntry.startTime, segmentEntry.endTime);
const sentenceTimeline = addTimeOffsetToTimeline(segmentEntry.timeline, -segmentEntry.startTime);
await writeOutputFilesForSegment(outputFilenames, segmentIndex, timeline.length, segmentAudio, sentenceTimeline, segmentEntry.text, language, allowOverwrite);
}
}
for (const outputFilename of outputFilenames) {
if (isPlaceholderFilePath(outputFilename)) {
continue;
}
const fileSaver = getFileSaver(outputFilename, allowOverwrite);
await fileSaver(inputRawAudio, timeline, transcript, options.subtitles);
await writeSourceSeparationOutputIfNeeded(outputFilename, isolatedRawAudio, backgroundRawAudio, allowOverwrite, true);
}
logger.end();
if (cliOptions.play) {
let audioToPlay;
if (isolatedRawAudio) {
audioToPlay = isolatedRawAudio;
}
else {
audioToPlay = inputRawAudio;
}
const normalizedAudioToPlay = normalizeAudioLevel(audioToPlay);
await playAudioWithWordTimeline(normalizedAudioToPlay, wordTimeline, transcript, cliOptions.player);
}
}
export async function alignTranslation(operationData) {
const logger = new Logger();
const { operationArgs, operationOptionsLookup, cliOptions } = operationData;
const audioFilename = operationArgs[0];
const outputFilenames = operationArgs.slice(2);
if (audioFilename == undefined) {
throw new Error(`align-translation requires a first argument containing the audio file path.`);
}
if (!existsSync(audioFilename)) {
throw new Error(`The given source file '${audioFilename}' was not found.`);
}
const alignmentReferenceFile = operationArgs[1];
if (alignmentReferenceFile == undefined) {
throw new Error(`align-translation requires a second argument containing the translated reference file path.`);
}
if (!existsSync(alignmentReferenceFile)) {
throw new Error(`The given reference file '${alignmentReferenceFile}' was not found.`);
}
const referenceFileExtension = getLowercaseFileExtension(alignmentReferenceFile);
const fileContent = await readFileAsUtf8(alignmentReferenceFile);
let text;
if (referenceFileExtension == 'txt') {
text = fileContent;
}
else if (referenceFileExtension == 'html' || referenceFileExtension == 'htm') {
text = await convertHtmlToText(fileContent);
}
else if (referenceFileExtension == 'srt' || referenceFileExtension == 'vtt') {
text = subtitlesToText(fileContent);
}
else {
throw new Error(`align-translation only supports reference files with extensions 'txt', 'html', 'htm', 'srt' or 'vtt'`);
}
if (cliOptions.play == null) {
cliOptions.play = outputFilenames.length === 0;
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'TranslationAlignmentOptions');
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault);
const { includesPlaceholderPattern } = await checkOutputFilenames(outputFilenames, true, true, true);
const { timeline, wordTimeline, translatedTranscript, sourceLanguage, targetLanguage, inputRawAudio, isolatedRawAudio, backgroundRawAudio } = await API.alignTranslation(audioFilename, text, options);
if (outputFilenames.length > 0) {
logger.start('\nWrite output files');
}
if (includesPlaceholderPattern) {
for (let segmentIndex = 0; segmentIndex < timeline.length; segmentIndex++) {
const segmentEntry = timeline[segmentIndex];
const segmentAudio = sliceRawAudioByTime(inputRawAudio, segmentEntry.startTime, segmentEntry.endTime);
const sentenceTimeline = addTimeOffsetToTimeline(segmentEntry.timeline, -segmentEntry.startTime);
await writeOutputFilesForSegment(outputFilenames, segmentIndex, timeline.length, segmentAudio, sentenceTimeline, segmentEntry.text, targetLanguage, allowOverwrite);
}
}
for (const outputFilename of outputFilenames) {
if (isPlaceholderFilePath(outputFilename)) {
continue;
}
const fileSaver = getFileSaver(outputFilename, allowOverwrite);
await fileSaver(inputRawAudio, timeline, translatedTranscript, options.subtitles);
await writeSourceSeparationOutputIfNeeded(outputFilename, isolatedRawAudio, backgroundRawAudio, allowOverwrite, true);
}
logger.end();
if (cliOptions.play) {
let audioToPlay;
if (isolatedRawAudio) {
audioToPlay = isolatedRawAudio;
}
else {
audioToPlay = inputRawAudio;
}
const normalizedAudioToPlay = normalizeAudioLevel(audioToPlay);
await playAudioWithWordTimeline(normalizedAudioToPlay, wordTimeline, translatedTranscript, cliOptions.player);
}
}
export async function alignTranscriptAndTranslation(operationData) {
const logger = new Logger();
const { operationArgs, operationOptionsLookup, cliOptions } = operationData;
const audioFilename = operationArgs[0];
const outputFilenames = operationArgs.slice(3);
if (audioFilename == undefined) {
throw new Error(`align-transcript-and-translation requires a first argument containing the audio file path.`);
}
if (!existsSync(audioFilename)) {
throw new Error(`The given source file '${audioFilename}' was not found.`);
}
const nativeTranscriptFilePath = operationArgs[1];
if (nativeTranscriptFilePath == undefined) {
throw new Error(`align-transcript-and-translation requires a second argument containing the native language transcript file path.`);
}
if (!existsSync(nativeTranscriptFilePath)) {
throw new Error(`The given transcript file '${nativeTranscriptFilePath}' was not found.`);
}
const translatedTranscriptFilePath = operationArgs[2];
if (translatedTranscriptFilePath == undefined) {
throw new Error(`align-transcript-and-translation requires a third argument containing the translated language transcript file path.`);
}
if (!existsSync(translatedTranscriptFilePath)) {
throw new Error(`The given translated transcript file '${nativeTranscriptFilePath}' was not found.`);
}
let transcript;
{
const nativeTranscriptFileExtension = getLowercaseFileExtension(nativeTranscriptFilePath);
const fileContent = await readFileAsUtf8(nativeTranscriptFilePath);
if (nativeTranscriptFileExtension == 'txt') {
transcript = fileContent;
}
else if (nativeTranscriptFileExtension == 'html' || nativeTranscriptFileExtension == 'htm') {
transcript = await convertHtmlToText(fileContent);
}
else if (nativeTranscriptFileExtension == 'srt' || nativeTranscriptFileExtension == 'vtt') {
transcript = subtitlesToText(fileContent);
}
else {
throw new Error(`align-transcript-and-translation only supports transcript files with extensions 'txt', 'html', 'htm', 'srt' or 'vtt'`);
}
}
let translatedTranscript;
{
const translatedTranscriptFileExtension = getLowercaseFileExtension(translatedTranscriptFilePath);
const fileContent = await readFileAsUtf8(translatedTranscriptFilePath);
if (translatedTranscriptFileExtension == 'txt') {
translatedTranscript = fileContent;
}
else if (translatedTranscriptFileExtension == 'html' || translatedTranscriptFileExtension == 'htm') {
translatedTranscript = await convertHtmlToText(fileContent);
}
else if (translatedTranscriptFileExtension == 'srt' || translatedTranscriptFileExtension == 'vtt') {
translatedTranscript = subtitlesToText(fileContent);
}
else {
throw new Error(`align-transcript-and-translation only supports transcript files with extensions 'txt', 'html', 'htm', 'srt' or 'vtt'`);
}
}
if (cliOptions.play == null) {
cliOptions.play = outputFilenames.length === 0;
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'TranscriptAndTranslationAlignmentOptions');
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault);
const { includesPlaceholderPattern } = await checkOutputFilenames(outputFilenames, true, true, true);
const { timeline, wordTimeline, translatedTimeline, translatedWordTimeline, sourceLanguage, targetLanguage, inputRawAudio, isolatedRawAudio, backgroundRawAudio } = await API.alignTranscriptAndTranslation(audioFilename, transcript, translatedTranscript, options);
if (outputFilenames.length > 0) {
logger.start('\nWrite output files');
}
for (const outputFilename of outputFilenames) {
if (isPlaceholderFilePath(outputFilename)) {
continue;
}
const fileSaver = getFileSaver(outputFilename, allowOverwrite);
await fileSaver(inputRawAudio, timeline, transcript, options.subtitles);
await writeSourceSeparationOutputIfNeeded(outputFilename, isolatedRawAudio, backgroundRawAudio, allowOverwrite, true);
const fileExtension = getLowercaseFileExtension(outputFilename);
if (['json', 'txt', 'srt', 'vtt'].includes(fileExtension)) {
const pathWithoutExtension = outputFilename.substring(0, outputFilename.lastIndexOf('.'));
const translatedOutputPath = `${pathWithoutExtension}.translated.${fileExtension}`;
const translatedFileSaver = getFileSaver(translatedOutputPath, allowOverwrite);
await translatedFileSaver(inputRawAudio, translatedTimeline, translatedTranscript, options.subtitles);
}
}
logger.end();
if (cliOptions.play) {
let audioToPlay;
if (isolatedRawAudio) {
audioToPlay = isolatedRawAudio;
}
else {
audioToPlay = inputRawAudio;
}
const normalizedAudioToPlay = normalizeAudioLevel(audioToPlay);
await playAudioWithWordTimeline(normalizedAudioToPlay, translatedWordTimeline, translatedTranscript, cliOptions.player);
}
}
export async function alignTimelineTranslation(operationData) {
const logger = new Logger();
const { operationArgs, operationOptionsLookup, cliOptions } = operationData;
const timelineFilename = operationArgs[0];
const outputFilenames = operationArgs.slice(2);
if (timelineFilename == undefined) {
throw new Error(`align-timeline-translation requires a first argument containing the timeline file path.`);
}
if (getLowercaseFileExtension(timelineFilename) != 'json') {
throw new Error(`align-timeline-translation only supports timeline files with extension 'json'`);
}
if (!existsSync(timelineFilename)) {
throw new Error(`The given timeline file '${timelineFilename}' was not found.`);
}
const timeline = await readAndParseJsonFile(timelineFilename);
const translationFilePath = operationArgs[1];
if (translationFilePath == undefined) {
throw new Error(`align-timeline-translation requires a second argument containing the translated reference file path.`);
}
if (!existsSync(translationFilePath)) {
throw new Error(`The given reference file '${translationFilePath}' was not found.`);
}
const translationFileExtension = getLowercaseFileExtension(translationFilePath);
const translationFileContent = await readFileAsUtf8(translationFilePath);
let translationText;
if (translationFileExtension == 'txt') {
translationText = translationFileContent;
}
else if (translationFileExtension == 'html' || translationFileExtension == 'htm') {
translationText = await convertHtmlToText(translationFileContent);
}
else if (translationFileExtension == 'srt' || translationFileExtension == 'vtt') {
translationText = subtitlesToText(translationFileContent);
}
else {
throw new Error(`align only supports reference files with extensions 'txt', 'html', 'htm', 'srt' or 'vtt'`);
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'TimelineTranslationAlignmentOptions');
const { timeline: translationTimeline, wordTimeline: translationWordTimeline, rawAudio } = await API.alignTimelineTranslation(timeline, translationText, options);
if (outputFilenames.length > 0) {
logger.start('\nWrite output files');
}
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault);
for (const outputFilename of outputFilenames) {
if (isPlaceholderFilePath(outputFilename)) {
continue;
}
const fileSaver = getFileSaver(outputFilename, allowOverwrite);
await fileSaver(getEmptyRawAudio(1, 16000), translationTimeline, translationText, options.subtitles);
}
logger.end();
if (cliOptions.play && rawAudio) {
const normalizedAudioToPlay = normalizeAudioLevel(rawAudio);
let transcriptToPlay;
let timelineToPlay;
transcriptToPlay = translationText;
timelineToPlay = translationWordTimeline;
await playAudioWithWordTimeline(normalizedAudioToPlay, timelineToPlay, transcriptToPlay, cliOptions.player);
}
}
export async function translateText(operationData) {
const logger = new Logger();
const { operationArgs, operationOptionsLookup, cliOptions } = operationData;
const inputFilename = operationArgs[0];
const outputFilenames = operationArgs.slice(1);
if (inputFilename == undefined) {
throw new Error(`translate-text requires an argument containing the input file path.`);
}
if (!existsSync(inputFilename)) {
throw new Error(`The given input file '${inputFilename}' was not found.`);
}
const inputFileExtension = getLowercaseFileExtension(inputFilename);
const inputFileContent = await readFileAsUtf8(inputFilename);
let inputText;
if (inputFileExtension === 'txt') {
inputText = inputFileContent;
}
else if (inputFileExtension === 'html' || inputFileExtension === 'htm') {
inputText = await convertHtmlToText(inputFileContent);
}
else if (inputFileExtension == 'srt' || inputFileExtension == 'vtt') {
inputText = subtitlesToText(inputFileContent);
}
else {
throw new Error(`translate-text only supports input files with extensions 'txt', 'html', 'htm', 'srt' or 'vtt'`);
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'TextTranslationOptions');
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault);
await checkOutputFilenames(outputFilenames, false, true, true);
const { text, translatedText, translationPairs, sourceLanguage, targetLanguage, } = await API.translateText(inputText, options);
if (outputFilenames.length > 0) {
logger.start('\nWrite output files');
for (const outputFilename of outputFilenames) {
if (isPlaceholderFilePath(outputFilename)) {
continue;
}
const fileSaver = getFileSaver(outputFilename, allowOverwrite);
await fileSaver(getEmptyRawAudio(1, 16000), translationPairs, translatedText, undefined);
}
logger.end();
}
else {
logger.log(``);
logger.log(translatedText);
}
}
export async function translateSpeech(operationData) {
const logger = new Logger();
const { operationArgs, operationOptionsLookup, cliOptions } = operationData;
const inputFilename = operationArgs[0];
const outputFilenames = operationArgs.slice(1);
if (inputFilename == undefined) {
throw new Error(`translate-speech requires an argument containing the input file path.`);
}
if (!existsSync(inputFilename)) {
throw new Error(`The given input file '${inputFilename}' was not found.`);
}
if (cliOptions.play == null) {
cliOptions.play = outputFilenames.length === 0;
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'SpeechTranslationOptions');
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault);
await checkOutputFilenames(outputFilenames, true, true, true);
const { transcript, timeline, wordTimeline, sourceLanguage, targetLanguage, inputRawAudio, isolatedRawAudio, backgroundRawAudio } = await API.translateSpeech(inputFilename, options);
if (outputFilenames.length > 0) {
logger.start('\nWrite output files');
}
for (const outputFilename of outputFilenames) {
if (isPlaceholderFilePath(outputFilename)) {
continue;
}
const fileSaver = getFileSaver(outputFilename, allowOverwrite);
await fileSaver(inputRawAudio, timeline, transcript, options.subtitles);
await writeSourceSeparationOutputIfNeeded(outputFilename, isolatedRawAudio, backgroundRawAudio, allowOverwrite, true);
}
logger.end();
if (cliOptions.play) {
let audioToPlay;
if (isolatedRawAudio) {
audioToPlay = isolatedRawAudio;
}
else {
audioToPlay = inputRawAudio;
}
const normalizedAudioToPlay = normalizeAudioLevel(audioToPlay);
let transcriptToPlay;
let timelineToPlay;
if (wordTimeline) {
transcriptToPlay = transcript;
timelineToPlay = wordTimeline;
}
else {
timelineToPlay = timeline.map(entry => ({
type: 'word',
text: entry.text.trim(),
startTime: entry.startTime,
endTime: entry.endTime
}));
transcriptToPlay = '';
for (const entry of timelineToPlay) {
transcriptToPlay += entry.text;
transcriptToPlay += ' ';
}
transcriptToPlay = transcriptToPlay.trim();
}
await playAudioWithWordTimeline(normalizedAudioToPlay, timelineToPlay, transcriptToPlay, cliOptions.player);
}
}
export async function detectLanguage(operationData, mode) {
const logger = new Logger();
const { operationArgs, operationOptionsLookup, cliOptions } = operationData;
const inputFilePath = operationArgs[0];
const outputFilenames = operationArgs.slice(1);
if (!existsSync(inputFilePath)) {
throw new Error(`The given input file '${inputFilePath}' was not found.`);
}
const inputFileExtension = getLowercaseFileExtension(inputFilePath);
const supportedInputTextFormats = ['txt', 'srt', 'vtt'];
let results;
let allowOverwrite;
if (mode == 'text' || (mode == 'auto' && supportedInputTextFormats.includes(inputFileExtension))) {
if (inputFilePath == undefined) {
throw new Error(`detect-text-language requires an argument containing the input file path.`);
}
if (!supportedInputTextFormats.includes(inputFileExtension)) {
throw new Error(`'detect-text-language' doesn't support input file extension '${inputFileExtension}'`);
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'TextLanguageDetectionOptions');
allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault);
await checkOutputFilenames(outputFilenames, false, true, false);
let text = await readFileAsUtf8(inputFilePath);
if (inputFileExtension == 'srt' || inputFileExtension == 'vtt') {
text = subtitlesToText(text);
}
const { detectedLanguage, detectedLanguageProbabilities } = await API.detectTextLanguage(text, options);
results = detectedLanguageProbabilities;
}
else {
if (inputFilePath == undefined) {
throw new Error(`detect-speech-language requires an argument containing the input audio file path.`);
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'SpeechLanguageDetectionOptions');
allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault);
await checkOutputFilenames(outputFilenames, false, true, false);
const { detectedLanguage, detectedLanguageProbabilities } = await API.detectSpeechLanguage(inputFilePath, options);
results = detectedLanguageProbabilities;
}
if (outputFilenames.length > 0) {
logger.start('\nWrite output files');
const resultsAsText = results.map(result => `${formatLanguageCodeWithName(result.language)}: ${result.probability.toFixed(5)}`).join('\n');
for (const outputFilename of outputFilenames) {
const fileSaver = getFileSaver(outputFilename, allowOverwrite);
await fileSaver(getEmptyRawAudio(0, 0), results, resultsAsText);
}
}
else {
const resultsAsText = results.slice(0, 10).map(result => `${formatLanguageCodeWithName(result.language)}: ${result.probability.toFixed(5)}`).join('\n');
logger.log('', 'output');
logger.log(resultsAsText, 'output');
}
logger.end();
}
export async function detectVoiceActivity(operationData) {
const logger = new Logger();
const { operationArgs, operationOptionsLookup, cliOptions } = operationData;
const audioFilename = operationArgs[0];
const outputFilenames = operationArgs.slice(1);
if (audioFilename == undefined) {
throw new Error(`detect-voice-activity requires an argument containing the audio file path.`);
}
if (!existsSync(audioFilename)) {
throw new Error(`The given source audio file '${audioFilename}' was not found.`);
}
if (cliOptions.play == null) {
cliOptions.play = outputFilenames.length === 0;
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'VADOptions');
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault);
await checkOutputFilenames(outputFilenames, true, true, true);
let { timeline, verboseTimeline, inputRawAudio, croppedRawAudio } = await API.detectVoiceActivity(audioFilename, options);
if (outputFilenames.length > 0) {
logger.start('\nWrite output files');
}
for (const outputFilename of outputFilenames) {
if (isPlaceholderFilePath(outputFilename)) {
continue;
}
const fileSaver = getFileSaver(outputFilename, allowOverwrite);
await fileSaver(inputRawAudio, timeline, '', { maxAddedDuration: 0 });
const fileExtension = getLowercaseFileExtension(outputFilename);
if (supportedOutputMediaFileExtensions.includes(fileExtension)) {
const pathWithoutExtension = outputFilename.substring(0, outputFilename.lastIndexOf('.'));
const isolatedOutputFilePath = `${pathWithoutExtension}.cropped.${fileExtension}`;
const fileSaver = getFileSaver(isolatedOutputFilePath, allowOverwrite);
await fileSaver(croppedRawAudio, [], '');
}
}
logger.end();
if (cliOptions.play) {
const normalizedAudio = normalizeAudioLevel(inputRawAudio);
const timelineToPlay = verboseTimeline.map(entry => {
return { ...entry, type: 'word' };
});
await playAudioWithWordTimeline(normalizedAudio, timelineToPlay, cliOptions.player);
}
}
export async function denoise(operationData) {
const logger = new Logger();
const { operationArgs, operationOptionsLookup, cliOptions } = operationData;
const audioFilename = operationArgs[0];
const outputFilenames = operationArgs.slice(1);
if (audioFilename == undefined) {
throw new Error(`'denoise' requires an argument containing the audio file path.`);
}
if (!existsSync(audioFilename)) {
throw new Error(`The given source audio file '${audioFilename}' was not found.`);
}
if (cliOptions.play == null) {
cliOptions.play = outputFilenames.length === 0;
}
const options = await optionsLookupToTypedObject(operationOptionsLookup, 'DenoisingOptions');
const allowOverwrite = getWithDefault(cliOptions.overwrite, overwriteByDefault);
await checkOutputFilenames(outputFilenames, true, false, false);
const { denoisedAudio } = await API.denoise(audioFilename, options);
if (outputFilenames.length > 0) {
logger.start('\nWrite output files');
}
for (const outputFilename of outputFilenames) {
const fileSaver = getFileSaver(outputFilename, allowOverwrite);
await fileSaver(denoisedAudio, [], '');
}
logger.end();
if (cliOptions.play) {
await playAudioSamplesWithKeyboardControls(denoisedAudio, cliOptions.player);
}
}
export async function isolate(operationData) {
const logger = new Logger();
const { operationArgs, operationOptionsLookup, cliOptions } = operationData;
const audioFilename = operationArgs