echogarden
Version:
An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.
92 lines • 4.42 kB
JavaScript
import chalk from 'chalk';
import { ensureRawAudio } from '../audio/AudioUtilities.js';
import { formatLanguageCodeWithName, getShortLanguageCode, parseLangIdentifier } from '../utilities/Locale.js';
import { Logger } from '../utilities/Logger.js';
import { extendDeep } from '../utilities/ObjectUtilities.js';
import { addWordTextOffsetsToTimelineInPlace, wordTimelineToSegmentSentenceTimeline } from '../utilities/Timeline.js';
import * as API from './API.js';
export async function alignTimelineTranslation(inputTimeline, translatedTranscript, options) {
const logger = new Logger();
const startTimestamp = logger.getTimestamp();
options = extendDeep(defaultTimelineTranslationAlignmentOptions, options);
let rawAudio;
if (options.audio) {
rawAudio = await ensureRawAudio(options.audio);
}
let sourceLanguage = options.sourceLanguage;
if (options.sourceLanguage) {
const languageData = await parseLangIdentifier(options.sourceLanguage);
sourceLanguage = languageData.Name;
logger.end();
logger.logTitledMessage('Source language specified', formatLanguageCodeWithName(sourceLanguage));
}
else {
logger.start('No source language specified. Detect source language');
const timelineText = inputTimeline.map(entry => entry.text).join(' ');
const { detectedLanguage } = await API.detectTextLanguage(timelineText, options.languageDetection || {});
sourceLanguage = detectedLanguage;
logger.end();
logger.logTitledMessage('Source language detected', formatLanguageCodeWithName(detectedLanguage));
}
let targetLanguage;
if (options.targetLanguage) {
const languageData = await parseLangIdentifier(options.targetLanguage);
targetLanguage = languageData.Name;
logger.end();
logger.logTitledMessage('Target language specified', formatLanguageCodeWithName(targetLanguage));
}
else {
logger.start('No target language specified. Detect target language');
const { detectedLanguage } = await API.detectTextLanguage(translatedTranscript, options.languageDetection || {});
targetLanguage = detectedLanguage;
logger.end();
logger.logTitledMessage('Target language detected', formatLanguageCodeWithName(detectedLanguage));
}
logger.start(`Load ${options.engine} module`);
let mappedWordTimeline;
switch (options.engine) {
case 'e5': {
const { alignTimelineToTextSemantically, e5SupportedLanguages } = await import('../alignment/SemanticTextAlignment.js');
const shortSourceLanguageCode = getShortLanguageCode(sourceLanguage);
if (!e5SupportedLanguages.includes(shortSourceLanguageCode)) {
throw new Error(`Source language ${formatLanguageCodeWithName(sourceLanguage)} is not supported by the E5 embedding model.`);
}
const shortTargetLanguageCode = getShortLanguageCode(targetLanguage);
if (!e5SupportedLanguages.includes(shortTargetLanguageCode)) {
throw new Error(`Target language ${formatLanguageCodeWithName(targetLanguage)} is not supported by the E5 embedding model.`);
}
logger.end();
mappedWordTimeline = await alignTimelineToTextSemantically(inputTimeline, translatedTranscript, targetLanguage);
break;
}
default: {
throw new Error(`Unsupported engine: ${options.engine}`);
}
}
logger.start(`Postprocess timeline`);
addWordTextOffsetsToTimelineInPlace(mappedWordTimeline, translatedTranscript);
const { segmentTimeline: mappedTimeline } = await wordTimelineToSegmentSentenceTimeline(mappedWordTimeline, translatedTranscript, targetLanguage);
logger.end();
logger.logDuration(`Total timeline translation alignment time`, startTimestamp, chalk.magentaBright);
logger.end();
return {
timeline: mappedTimeline,
wordTimeline: mappedWordTimeline,
sourceLanguage,
targetLanguage,
rawAudio,
};
}
// Constants
const defaultTimelineTranslationAlignmentOptions = {
engine: 'e5',
sourceLanguage: undefined,
targetLanguage: undefined,
audio: undefined,
languageDetection: undefined,
subtitles: undefined,
e5: {
model: 'small-fp16',
}
};
//# sourceMappingURL=TimelineTranslationAlignment.js.map