UNPKG

echogarden

Version:

An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.

github.com/echogarden-project/echogarden

echogarden-project/echogarden

259 lines • 11.4 kB

JavaScript

import { applyWhitespaceProcessing, isWord, isWordOrSymbolWord, splitToSentences, splitToWords } from '../nlp/Segmentation.js'; import { deepClone } from './ObjectUtilities.js'; import { getUTF32Chars, splitAndPreserveSeparators } from './StringUtilities.js'; import { roundToDigits } from './Utilities.js'; export function addTimeOffsetToTimeline(targetTimeline, timeOffset) { if (!targetTimeline) { return targetTimeline; } const newTimeline = deepClone(targetTimeline); for (const segmentTimelineEntry of newTimeline) { segmentTimelineEntry.startTime = Math.max(segmentTimelineEntry.startTime + timeOffset, 0); segmentTimelineEntry.endTime = Math.max(segmentTimelineEntry.endTime + timeOffset, 0); if (segmentTimelineEntry.timeline) { segmentTimelineEntry.timeline = addTimeOffsetToTimeline(segmentTimelineEntry.timeline, timeOffset); } } return newTimeline; } export function multiplyTimelineByFactor(targetTimeline, factor) { const newTimeline = deepClone(targetTimeline); for (const segmentTimelineEntry of newTimeline) { segmentTimelineEntry.startTime = segmentTimelineEntry.startTime * factor; segmentTimelineEntry.endTime = segmentTimelineEntry.endTime * factor; if (segmentTimelineEntry.timeline) { segmentTimelineEntry.timeline = multiplyTimelineByFactor(segmentTimelineEntry.timeline, factor); } } return newTimeline; } export function roundTimelineProperties(targetTimeline, decimalDigits = 2) { const roundedTimeline = deepClone(targetTimeline); for (const entry of roundedTimeline) { if (entry.startTime) { entry.startTime = roundToDigits(entry.startTime, decimalDigits); } if (entry.endTime) { entry.endTime = roundToDigits(entry.endTime, decimalDigits); } if (entry.confidence) { entry.confidence = roundToDigits(entry.confidence, decimalDigits); } if (entry.timeline) { entry.timeline = roundTimelineProperties(entry.timeline); } } return roundedTimeline; } export async function wordTimelineToSegmentSentenceTimeline(wordTimeline, transcript, language, paragraphBreaks = 'double', whitespace = 'collapse') { let segments = []; { // Ensure word entries with words that include potential sentence ending characters, // like '.', '?' or '!', aren't causing the sentence segmentation // to identify them as sentence breaks. const maskedTranscript = replaceSentenceEndersWithinWordsWithMaskingCharacter(transcript, wordTimeline, '_'); // Split to segments and sentences, based on the masked transcript, // don't apply any whitespace processing yet. let paragraphs; if (paragraphBreaks === 'single') { paragraphs = splitAndPreserveSeparators(maskedTranscript, /(\r?\n)+/g); } else if (paragraphBreaks === 'double') { paragraphs = splitAndPreserveSeparators(maskedTranscript, /(\r?\n)(\r?\n)+/g); } else { throw new Error(`Invalid paragraph break type: '${paragraphBreaks}'`); } const maskedSegments = paragraphs.map(paragraph => splitToSentences(paragraph, language)); // Restore the sentence text the original text, using the original transcript, // and apply whitespace processing to each sentence. let offset = 0; for (const segment of maskedSegments) { const newSegment = []; for (let sentenceIndex = 0; sentenceIndex < segment.length; sentenceIndex++) { const sentence = segment[sentenceIndex]; const sentenceLength = sentence.length; const restoredSentence = transcript.substring(offset, offset + sentenceLength); const restoredAndProcessedSentence = applyWhitespaceProcessing(restoredSentence, whitespace).trim(); if (restoredAndProcessedSentence.length > 0) { newSegment.push(restoredAndProcessedSentence); } offset += sentenceLength; } segments.push(newSegment); } segments = segments.filter(segment => segment.length > 0); } // Create a new text based on the processed sentences, new segment and sentence timeline, // and store mapping between character indexes and the corresponding sentence they belong to. let text = ''; const charIndexToSentenceEntryMapping = []; const segmentTimeline = []; for (const segment of segments) { const sentencesInSegment = []; const segmentEntry = { type: 'segment', text: '', startTime: -1, endTime: -1, timeline: sentencesInSegment }; for (const sentence of segment) { const sentenceEntry = { type: 'sentence', text: sentence, startTime: -1, endTime: -1, timeline: [] }; for (const char of sentence + ' ') { text += char; charIndexToSentenceEntryMapping.push(sentenceEntry); } sentencesInSegment.push(sentenceEntry); } segmentTimeline.push(segmentEntry); } // Add the word entries to their corresponding sentence timelines { let wordSearchStartOffset = 0; for (let wordIndex = 0; wordIndex < wordTimeline.length; wordIndex++) { const wordEntry = wordTimeline[wordIndex]; const wordText = wordEntry.text; if (!isWordOrSymbolWord(wordText)) { continue; } const indexOfWordInText = text.indexOf(wordText, wordSearchStartOffset); if (indexOfWordInText == -1) { throw new Error(`Couldn't find the word '${wordText}' in the text at start position ${wordSearchStartOffset}`); } const targetSentenceEntry = charIndexToSentenceEntryMapping[indexOfWordInText]; targetSentenceEntry.timeline.push(deepClone(wordEntry)); wordSearchStartOffset = indexOfWordInText + wordText.length; } } // Produce a new segment/sentence timeline with rewritten entries, // that match the assigned words. const newSegmentTimeline = []; for (const segmentEntry of segmentTimeline) { const oldSentenceTimeline = segmentEntry.timeline; const newSentenceTimeline = []; for (const sentenceEntry of oldSentenceTimeline) { const wordTimeline = sentenceEntry.timeline; if (!wordTimeline || wordTimeline.length === 0) { continue; } sentenceEntry.startTime = wordTimeline[0].startTime; sentenceEntry.endTime = wordTimeline[wordTimeline.length - 1].endTime; newSentenceTimeline.push(sentenceEntry); } if (newSentenceTimeline.length === 0) { continue; } segmentEntry.text = newSentenceTimeline.map(sentenceEntry => sentenceEntry.text).join(' '); segmentEntry.startTime = newSentenceTimeline[0].startTime; segmentEntry.endTime = newSentenceTimeline[newSentenceTimeline.length - 1].endTime; newSegmentTimeline.push(segmentEntry); } return { segmentTimeline: newSegmentTimeline }; } export function addWordTextOffsetsToTimeline(timeline, text, currentOffset = 0) { const { mapping } = getUTF32Chars(text); for (const entry of timeline) { if (entry.type == 'word') { let word = entry.text; word = word.trim().replaceAll(/\s+/g, ' '); const wordParts = word.split(' '); let startOffset; let endOffset; for (let i = 0; i < wordParts.length; i++) { let wordPart = wordParts[i]; let wordPartOffset = text.indexOf(wordPart, currentOffset); if (wordPartOffset == -1) { continue; } currentOffset = wordPartOffset + wordParts[i].length; if (i == 0) { startOffset = wordPartOffset; } endOffset = currentOffset; } entry.startOffsetUtf16 = startOffset; entry.endOffsetUtf16 = endOffset; entry.startOffsetUtf32 = startOffset != undefined ? mapping[startOffset] : undefined; entry.endOffsetUtf32 = endOffset != undefined ? mapping[endOffset] : undefined; } else if (entry.timeline) { currentOffset = addWordTextOffsetsToTimeline(entry.timeline, text, currentOffset); } } return currentOffset; } function replaceSentenceEndersWithinWordsWithMaskingCharacter(transcript, wordTimeline, maskingCharacter) { if (maskingCharacter.length !== 1) { throw new Error(`Masking character must be of length 1`); } let modifiedTranscript = transcript; const sentenceEnders = ['.', '。', '?', '？', '!', '！', '|']; for (const wordEntry of wordTimeline) { const wordText = wordEntry.text; if (!isWord(wordText)) { continue; } let newWordText = ''; let charIndex = 0; for (const char of wordText) { const isFirstChar = charIndex === 0; const isLastChar = charIndex + char.length === wordText.length; const isFirstOrLastChar = isFirstChar || isLastChar; if (!isLastChar && sentenceEnders.includes(char)) { for (let i = 0; i < char.length; i++) { newWordText += maskingCharacter; } } else { newWordText += char; } charIndex += char.length; } if (newWordText !== wordText) { const wordStartOffset = wordEntry.startOffsetUtf16; const wordEndOffset = wordEntry.endOffsetUtf16; modifiedTranscript = modifiedTranscript.substring(0, wordStartOffset) + newWordText + modifiedTranscript.substring(wordEndOffset); } } return modifiedTranscript; } export function extractEntries(timeline, predicate) { const timelineWordEntries = []; for (const entry of timeline) { if (predicate(entry)) { timelineWordEntries.push(entry); } else if (entry.timeline) { timelineWordEntries.push(...extractEntries(entry.timeline, predicate)); } } return timelineWordEntries; } //// export async function testTimelineFix() { let transcript = 'Hello world how are you? Do you want to play chess?'; const words = (await splitToWords(transcript, 'en')).filter(word => word.trim() !== ''); let wordTimeline = words.map(wordText => ({ type: 'word', text: wordText, startTime: 0, endTime: 0, })); addWordTextOffsetsToTimeline(wordTimeline, transcript); wordTimeline[1].text = 'wor.d'; wordTimeline[8].text = 'wa.t'; transcript = transcript.replace('world', 'wor.d').replace('want', 'wa.t'); const result = await wordTimelineToSegmentSentenceTimeline(wordTimeline, transcript, 'en'); const x = 1; } //# sourceMappingURL=Timeline.js.map