echogarden
Version:
An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.
212 lines • 9.52 kB
JavaScript
import { addMissingPunctuationWordsToWordSequence, segmentWordSequence, WordSequence } from '@echogarden/text-segmentation';
import { applyWhitespaceProcessing, isWord, isWordOrSymbolWord, splitToParagraphs } from '../nlp/Segmentation.js';
import { deepClone } from './ObjectUtilities.js';
import { getUTF32Chars } from './StringUtilities.js';
import { roundToDigits } from './Utilities.js';
export function addTimeOffsetToTimeline(targetTimeline, timeOffset) {
if (!targetTimeline) {
return targetTimeline;
}
const newTimeline = deepClone(targetTimeline);
for (const segmentTimelineEntry of newTimeline) {
segmentTimelineEntry.startTime = Math.max(segmentTimelineEntry.startTime + timeOffset, 0);
segmentTimelineEntry.endTime = Math.max(segmentTimelineEntry.endTime + timeOffset, 0);
if (segmentTimelineEntry.timeline) {
segmentTimelineEntry.timeline = addTimeOffsetToTimeline(segmentTimelineEntry.timeline, timeOffset);
}
}
return newTimeline;
}
export function multiplyTimelineByFactor(targetTimeline, factor) {
const newTimeline = deepClone(targetTimeline);
for (const segmentTimelineEntry of newTimeline) {
segmentTimelineEntry.startTime = segmentTimelineEntry.startTime * factor;
segmentTimelineEntry.endTime = segmentTimelineEntry.endTime * factor;
if (segmentTimelineEntry.timeline) {
segmentTimelineEntry.timeline = multiplyTimelineByFactor(segmentTimelineEntry.timeline, factor);
}
}
return newTimeline;
}
export function roundTimelineProperties(targetTimeline, decimalDigits = 2) {
const roundedTimeline = deepClone(targetTimeline);
for (const entry of roundedTimeline) {
if (entry.startTime) {
entry.startTime = roundToDigits(entry.startTime, decimalDigits);
}
if (entry.endTime) {
entry.endTime = roundToDigits(entry.endTime, decimalDigits);
}
if (entry.confidence) {
entry.confidence = roundToDigits(entry.confidence, decimalDigits);
}
if (entry.timeline) {
entry.timeline = roundTimelineProperties(entry.timeline);
}
}
return roundedTimeline;
}
export async function wordTimelineToSegmentSentenceTimeline(wordTimelineWithOffsets, transcript, language, paragraphBreaks = 'double', whitespace = 'collapse') {
const wordSequence = new WordSequence();
for (const wordEntry of wordTimelineWithOffsets) {
const wordStartOffset = wordEntry.startOffsetUtf16;
const wordEndOffset = wordEntry.endOffsetUtf16;
const isPunctuation = !isWordOrSymbolWord(wordEntry.text);
wordSequence.addWord(wordEntry.text, wordStartOffset, isPunctuation);
}
const { wordSequenceWithPunctuation, originalWordsReverseMapping } = addMissingPunctuationWordsToWordSequence(wordSequence, transcript);
const segmentedWordSequence = await segmentWordSequence(wordSequenceWithPunctuation);
const paragraphs = splitToParagraphs(transcript, paragraphBreaks, 'preserve');
const sentenceIndexesForParagraph = paragraphs.map(_ => []);
{
let sentenceIndex = 0;
let charOffset = 0;
for (let paragraphIndex = 0; paragraphIndex < paragraphs.length; paragraphIndex++) {
const paragraph = paragraphs[paragraphIndex];
const paragraphStartOffset = charOffset;
const paragraphEndOffset = paragraphStartOffset + paragraph.length;
while (sentenceIndex < segmentedWordSequence.sentences.length) {
const sentenceEntry = segmentedWordSequence.sentences[sentenceIndex];
const sentenceStartOffset = sentenceEntry.charRange.start;
const sentenceEndOffset = sentenceEntry.charRange.end;
if (sentenceStartOffset < paragraphEndOffset) {
sentenceIndexesForParagraph[paragraphIndex].push(sentenceIndex);
sentenceIndex++;
}
else {
break;
}
}
if (sentenceIndex === segmentedWordSequence.sentences.length) {
break;
}
charOffset += paragraph.length;
}
}
const segmentTimeline = [];
let wordIndex = 0;
for (let paragraphIndex = 0; paragraphIndex < paragraphs.length; paragraphIndex++) {
const paragraph = paragraphs[paragraphIndex];
const sentencesIndexes = sentenceIndexesForParagraph[paragraphIndex];
const sentenceEntries = sentencesIndexes.map(index => segmentedWordSequence.sentences[index]);
const sentenceTimeline = [];
for (const sentenceEntry of sentenceEntries) {
const wordTimeline = [];
for (const _ of sentenceEntry.words.entries) {
const originalWordIndex = originalWordsReverseMapping.get(wordIndex);
if (originalWordIndex !== undefined) {
const wordTimelineEntry = wordTimelineWithOffsets[originalWordIndex];
wordTimeline.push(wordTimelineEntry);
}
wordIndex += 1;
}
const sentenceTimelineEntry = {
type: 'sentence',
text: sentenceEntry.text,
startTime: wordTimeline[0]?.startTime,
endTime: wordTimeline[wordTimeline.length - 1]?.endTime,
timeline: wordTimeline,
};
sentenceTimeline.push(sentenceTimelineEntry);
}
const segmentTimelineEntry = {
type: 'segment',
text: applyWhitespaceProcessing(paragraph, whitespace),
startTime: sentenceTimeline[0]?.startTime,
endTime: sentenceTimeline[sentenceTimeline.length - 1]?.endTime,
timeline: sentenceTimeline,
};
segmentTimeline.push(segmentTimelineEntry);
}
return { segmentTimeline };
}
export function addWordTextOffsetsToTimelineInPlace(timeline, text) {
const { utf16To32Mapping } = getUTF32Chars(text);
let currentOffset = 0;
function processTimeline(timeline) {
let lastEndOffset = 0;
for (const entry of timeline) {
if (entry.type === 'word') {
let word = entry.text;
word = word.trim().replaceAll(/\s+/g, ' ');
const wordParts = word.split(' ');
let startOffset;
let endOffset;
for (let i = 0; i < wordParts.length; i++) {
const wordPart = wordParts[i];
const wordPartOffset = text.indexOf(wordPart, currentOffset);
if (wordPartOffset === -1) {
continue;
}
currentOffset = wordPartOffset + wordParts[i].length;
if (i === 0) {
startOffset = wordPartOffset;
}
endOffset = currentOffset;
}
entry.startOffsetUtf16 = startOffset ?? lastEndOffset;
entry.endOffsetUtf16 = endOffset ?? lastEndOffset;
entry.startOffsetUtf32 = utf16To32Mapping[entry.startOffsetUtf16];
entry.endOffsetUtf32 = utf16To32Mapping[entry.endOffsetUtf16];
if (endOffset !== undefined) {
lastEndOffset = endOffset;
}
}
else if (entry.timeline) {
processTimeline(entry.timeline);
}
}
}
return processTimeline(timeline);
}
function replaceSentenceEndersWithinWordsWithMaskingCharacter(transcript, wordTimeline, maskingCharacter) {
if (maskingCharacter.length !== 1) {
throw new Error(`Masking character must be of length 1`);
}
let modifiedTranscript = transcript;
const sentenceEnders = ['.', '。', '?', '?', '!', '!', '|'];
for (const wordEntry of wordTimeline) {
const wordText = wordEntry.text;
if (!isWord(wordText)) {
continue;
}
let newWordText = '';
let charIndex = 0;
for (const char of wordText) {
const isFirstChar = charIndex === 0;
const isLastChar = charIndex + char.length === wordText.length;
const isFirstOrLastChar = isFirstChar || isLastChar;
if (!isLastChar && sentenceEnders.includes(char)) {
for (let i = 0; i < char.length; i++) {
newWordText += maskingCharacter;
}
}
else {
newWordText += char;
}
charIndex += char.length;
}
if (newWordText !== wordText) {
const wordStartOffset = wordEntry.startOffsetUtf16;
const wordEndOffset = wordEntry.endOffsetUtf16;
modifiedTranscript =
modifiedTranscript.substring(0, wordStartOffset) +
newWordText +
modifiedTranscript.substring(wordEndOffset);
}
}
return modifiedTranscript;
}
export function extractEntries(timeline, predicate) {
const timelineWordEntries = [];
for (const entry of timeline) {
if (predicate(entry)) {
timelineWordEntries.push(entry);
}
else if (entry.timeline) {
timelineWordEntries.push(...extractEntries(entry.timeline, predicate));
}
}
return timelineWordEntries;
}
//# sourceMappingURL=Timeline.js.map