UNPKG

echogarden

Version:

An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.

159 lines 5.84 kB
export function includesAnyOf(str, substrings) { return indexOfAnyOf(str, substrings) >= 0; } export function indexOfAnyOf(str, substrings) { for (const substring of substrings) { const index = str.indexOf(substring); if (index >= 0) { return index; } } return -1; } export function startsWithAnyOf(str, prefixes) { for (const prefix of prefixes) { if (str.startsWith(prefix)) { return true; } } return false; } export function formatHMS(timeHMS, decimalSeparator = '.') { return `${formatIntegerWithLeadingZeros(timeHMS.hours, 2)}:${formatIntegerWithLeadingZeros(timeHMS.minutes, 2)}:${formatIntegerWithLeadingZeros(timeHMS.seconds, 2)}${decimalSeparator}${formatIntegerWithLeadingZeros(timeHMS.milliseconds, 3)}`; } export function formatMS(timeMS, decimalSeparator = '.') { return `${formatIntegerWithLeadingZeros(timeMS.minutes, 2)}:${formatIntegerWithLeadingZeros(timeMS.seconds, 2)}${decimalSeparator}${formatIntegerWithLeadingZeros(timeMS.milliseconds, 3)}`; } export function formatIntegerWithLeadingZeros(num, minDigitCount) { num = Math.floor(num); let numAsString = `${num}`; while (numAsString.length < minDigitCount) { numAsString = `0${numAsString}`; } return numAsString; } export function formatListWithQuotedElements(strings, quoteSymbol = `'`) { return strings.map(str => `${quoteSymbol}${str}${quoteSymbol}`).join(', '); } export function getUTF32Chars(str) { const utf32chars = []; const utf16To32Mapping = []; let utf32Index = 0; for (const utf32char of str) { utf32chars.push(utf32char); for (let i = 0; i < utf32char.length; i++) { utf16To32Mapping.push(utf32Index); } utf32Index += 1; } utf16To32Mapping.push(utf32Index); return { utf32chars, utf16To32Mapping }; } export function containsInvalidCodepoint(str) { for (const char of str) { if (char.codePointAt(0) === 65533) { return true; } } return false; } export function splitAndPreserveSeparators(text, separatorRegex) { if (!separatorRegex.flags.includes('g')) { throw new Error('Separator regular expression must have a global flag'); } // Use the match method to find all matches for the separators const matches = text.match(separatorRegex); // If no matches are found, return the original text as a single element array if (!matches) { return [text]; } // Initialize the result array const result = []; // Initialize the start position let lastIndex = 0; // Iterate through the matches matches.forEach(match => { // Get the index of the current match const matchIndex = text.indexOf(match, lastIndex); // Add the substring before the match to the result, joined with the match itself result.push(text.substring(lastIndex, matchIndex) + match); // Update the last index to the end of the current match lastIndex = matchIndex + match.length; }); // Add the remaining substring after the last match to the result { const remainingText = text.substring(lastIndex); if (remainingText.length > 0) { result.push(remainingText); } } return result; } export function getTokenRepetitionScore(tokens) { const maxCycleLength = Math.floor(tokens.length / 2); const matchLengthForCycleLength = [0]; for (let cycleLength = 1; cycleLength <= maxCycleLength; cycleLength++) { let matchCount = 0; for (let leftIndex = cycleLength; leftIndex < tokens.length; leftIndex++) { const referenceIndex = leftIndex - cycleLength; if (tokens[leftIndex] !== tokens[referenceIndex]) { break; } matchCount += 1; } const score = matchCount; matchLengthForCycleLength.push(score); } let longestMatchLength = -Infinity; let longestCycleRepetitionCount = -Infinity; for (let i = 1; i <= matchLengthForCycleLength.length; i++) { const matchLength = matchLengthForCycleLength[i]; if (matchLength > longestMatchLength) { longestMatchLength = matchLength; } const cycleCount = (matchLength / i) + 1; if (cycleCount > longestCycleRepetitionCount) { longestCycleRepetitionCount = cycleCount; } } return { longestMatchLength, longestCycleRepetitionCount }; } export async function convertHtmlToText(html) { const { htmlToText } = await import('html-to-text'); const text = htmlToText(html, { wordwrap: false, selectors: [ { selector: 'a', options: { ignoreHref: true } }, { selector: 'img', format: 'skip' }, { selector: 'h1', options: { uppercase: false } }, { selector: 'h2', options: { uppercase: false } }, { selector: 'h3', options: { uppercase: false } }, { selector: 'h4', options: { uppercase: false } }, { selector: 'table', options: { uppercaseHeaderCells: false } } ] }); return text || ''; } export function substituteCharactersUsingLookup(text, substitutionLookup) { let resultText = ''; for (const char of text) { const substitution = substitutionLookup[char]; if (substitution !== undefined) { resultText += substitution; } else { resultText += char; } } return resultText; } export function substituteStringUsingLookup(text, substitutionLookup) { const substitution = substitutionLookup[text]; if (substitution !== undefined) { return substitution; } else { return text; } } //# sourceMappingURL=StringUtilities.js.map