UNPKG

echogarden

Version:

An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.

212 lines 9.52 kB
import { addMissingPunctuationWordsToWordSequence, segmentWordSequence, WordSequence } from '@echogarden/text-segmentation'; import { applyWhitespaceProcessing, isWord, isWordOrSymbolWord, splitToParagraphs } from '../nlp/Segmentation.js'; import { deepClone } from './ObjectUtilities.js'; import { getUTF32Chars } from './StringUtilities.js'; import { roundToDigits } from './Utilities.js'; export function addTimeOffsetToTimeline(targetTimeline, timeOffset) { if (!targetTimeline) { return targetTimeline; } const newTimeline = deepClone(targetTimeline); for (const segmentTimelineEntry of newTimeline) { segmentTimelineEntry.startTime = Math.max(segmentTimelineEntry.startTime + timeOffset, 0); segmentTimelineEntry.endTime = Math.max(segmentTimelineEntry.endTime + timeOffset, 0); if (segmentTimelineEntry.timeline) { segmentTimelineEntry.timeline = addTimeOffsetToTimeline(segmentTimelineEntry.timeline, timeOffset); } } return newTimeline; } export function multiplyTimelineByFactor(targetTimeline, factor) { const newTimeline = deepClone(targetTimeline); for (const segmentTimelineEntry of newTimeline) { segmentTimelineEntry.startTime = segmentTimelineEntry.startTime * factor; segmentTimelineEntry.endTime = segmentTimelineEntry.endTime * factor; if (segmentTimelineEntry.timeline) { segmentTimelineEntry.timeline = multiplyTimelineByFactor(segmentTimelineEntry.timeline, factor); } } return newTimeline; } export function roundTimelineProperties(targetTimeline, decimalDigits = 2) { const roundedTimeline = deepClone(targetTimeline); for (const entry of roundedTimeline) { if (entry.startTime) { entry.startTime = roundToDigits(entry.startTime, decimalDigits); } if (entry.endTime) { entry.endTime = roundToDigits(entry.endTime, decimalDigits); } if (entry.confidence) { entry.confidence = roundToDigits(entry.confidence, decimalDigits); } if (entry.timeline) { entry.timeline = roundTimelineProperties(entry.timeline); } } return roundedTimeline; } export async function wordTimelineToSegmentSentenceTimeline(wordTimelineWithOffsets, transcript, language, paragraphBreaks = 'double', whitespace = 'collapse') { const wordSequence = new WordSequence(); for (const wordEntry of wordTimelineWithOffsets) { const wordStartOffset = wordEntry.startOffsetUtf16; const wordEndOffset = wordEntry.endOffsetUtf16; const isPunctuation = !isWordOrSymbolWord(wordEntry.text); wordSequence.addWord(wordEntry.text, wordStartOffset, isPunctuation); } const { wordSequenceWithPunctuation, originalWordsReverseMapping } = addMissingPunctuationWordsToWordSequence(wordSequence, transcript); const segmentedWordSequence = await segmentWordSequence(wordSequenceWithPunctuation); const paragraphs = splitToParagraphs(transcript, paragraphBreaks, 'preserve'); const sentenceIndexesForParagraph = paragraphs.map(_ => []); { let sentenceIndex = 0; let charOffset = 0; for (let paragraphIndex = 0; paragraphIndex < paragraphs.length; paragraphIndex++) { const paragraph = paragraphs[paragraphIndex]; const paragraphStartOffset = charOffset; const paragraphEndOffset = paragraphStartOffset + paragraph.length; while (sentenceIndex < segmentedWordSequence.sentences.length) { const sentenceEntry = segmentedWordSequence.sentences[sentenceIndex]; const sentenceStartOffset = sentenceEntry.charRange.start; const sentenceEndOffset = sentenceEntry.charRange.end; if (sentenceStartOffset < paragraphEndOffset) { sentenceIndexesForParagraph[paragraphIndex].push(sentenceIndex); sentenceIndex++; } else { break; } } if (sentenceIndex === segmentedWordSequence.sentences.length) { break; } charOffset += paragraph.length; } } const segmentTimeline = []; let wordIndex = 0; for (let paragraphIndex = 0; paragraphIndex < paragraphs.length; paragraphIndex++) { const paragraph = paragraphs[paragraphIndex]; const sentencesIndexes = sentenceIndexesForParagraph[paragraphIndex]; const sentenceEntries = sentencesIndexes.map(index => segmentedWordSequence.sentences[index]); const sentenceTimeline = []; for (const sentenceEntry of sentenceEntries) { const wordTimeline = []; for (const _ of sentenceEntry.words.entries) { const originalWordIndex = originalWordsReverseMapping.get(wordIndex); if (originalWordIndex !== undefined) { const wordTimelineEntry = wordTimelineWithOffsets[originalWordIndex]; wordTimeline.push(wordTimelineEntry); } wordIndex += 1; } const sentenceTimelineEntry = { type: 'sentence', text: sentenceEntry.text, startTime: wordTimeline[0]?.startTime, endTime: wordTimeline[wordTimeline.length - 1]?.endTime, timeline: wordTimeline, }; sentenceTimeline.push(sentenceTimelineEntry); } const segmentTimelineEntry = { type: 'segment', text: applyWhitespaceProcessing(paragraph, whitespace), startTime: sentenceTimeline[0]?.startTime, endTime: sentenceTimeline[sentenceTimeline.length - 1]?.endTime, timeline: sentenceTimeline, }; segmentTimeline.push(segmentTimelineEntry); } return { segmentTimeline }; } export function addWordTextOffsetsToTimelineInPlace(timeline, text) { const { utf16To32Mapping } = getUTF32Chars(text); let currentOffset = 0; function processTimeline(timeline) { let lastEndOffset = 0; for (const entry of timeline) { if (entry.type === 'word') { let word = entry.text; word = word.trim().replaceAll(/\s+/g, ' '); const wordParts = word.split(' '); let startOffset; let endOffset; for (let i = 0; i < wordParts.length; i++) { const wordPart = wordParts[i]; const wordPartOffset = text.indexOf(wordPart, currentOffset); if (wordPartOffset === -1) { continue; } currentOffset = wordPartOffset + wordParts[i].length; if (i === 0) { startOffset = wordPartOffset; } endOffset = currentOffset; } entry.startOffsetUtf16 = startOffset ?? lastEndOffset; entry.endOffsetUtf16 = endOffset ?? lastEndOffset; entry.startOffsetUtf32 = utf16To32Mapping[entry.startOffsetUtf16]; entry.endOffsetUtf32 = utf16To32Mapping[entry.endOffsetUtf16]; if (endOffset !== undefined) { lastEndOffset = endOffset; } } else if (entry.timeline) { processTimeline(entry.timeline); } } } return processTimeline(timeline); } function replaceSentenceEndersWithinWordsWithMaskingCharacter(transcript, wordTimeline, maskingCharacter) { if (maskingCharacter.length !== 1) { throw new Error(`Masking character must be of length 1`); } let modifiedTranscript = transcript; const sentenceEnders = ['.', '。', '?', '?', '!', '!', '|']; for (const wordEntry of wordTimeline) { const wordText = wordEntry.text; if (!isWord(wordText)) { continue; } let newWordText = ''; let charIndex = 0; for (const char of wordText) { const isFirstChar = charIndex === 0; const isLastChar = charIndex + char.length === wordText.length; const isFirstOrLastChar = isFirstChar || isLastChar; if (!isLastChar && sentenceEnders.includes(char)) { for (let i = 0; i < char.length; i++) { newWordText += maskingCharacter; } } else { newWordText += char; } charIndex += char.length; } if (newWordText !== wordText) { const wordStartOffset = wordEntry.startOffsetUtf16; const wordEndOffset = wordEntry.endOffsetUtf16; modifiedTranscript = modifiedTranscript.substring(0, wordStartOffset) + newWordText + modifiedTranscript.substring(wordEndOffset); } } return modifiedTranscript; } export function extractEntries(timeline, predicate) { const timelineWordEntries = []; for (const entry of timeline) { if (predicate(entry)) { timelineWordEntries.push(entry); } else if (entry.timeline) { timelineWordEntries.push(...extractEntries(entry.timeline, predicate)); } } return timelineWordEntries; } //# sourceMappingURL=Timeline.js.map