UNPKG

@lobehub/tts

Version:

A high-quality & reliable TTS React Hooks library

36 lines (34 loc) 1.69 kB
import { markdownToTxt } from "markdown-to-txt"; //#region src/core/utils/splitTextIntoSegments.ts const toHalfWidthAndCleanSpace = (str) => { return markdownToTxt(str).replaceAll(/[\uFF01-\uFF5E]/g, (ch) => String.fromCharCode(ch.charCodeAt(0) - 65248)).replaceAll(" ", " ").replaceAll("。", ".").replaceAll(",", ",").replaceAll("!", "!").replaceAll("?", "?").replaceAll(";", ";").replaceAll(":", ":").replaceAll("(", "(").replaceAll(")", ")").replaceAll("【", "[").replaceAll("】", "]").replaceAll("《", "<").replaceAll("》", ">").replaceAll("“", "\"").replaceAll("”", "\"").replaceAll("‘", "'").replaceAll("’", "'").replaceAll("\n", ". ").replaceAll(/\s+/g, " "); }; const splitTextIntoSegments = (text, chunkSize = 100) => { text = toHalfWidthAndCleanSpace(text); const chunks = []; const paragraphs = text.split("\n"); let currentChunk = ""; function addChunk(chunk) { if (chunk.trim()) chunks.push(chunk.trim()); } for (const paragraph of paragraphs) { if (currentChunk.length + paragraph.length + 1 > chunkSize && currentChunk.length > 0) { addChunk(currentChunk); currentChunk = ""; } if (paragraph.length > chunkSize) { const sentences = paragraph.match(/[^!.?]+[!.?]+/g) || [paragraph]; for (const sentence of sentences) { if (currentChunk.length + sentence.length + 1 > chunkSize && currentChunk.length > 0) { addChunk(currentChunk); currentChunk = ""; } currentChunk += (currentChunk ? " " : "") + sentence.trim(); } } else currentChunk += (currentChunk ? "\n" : "") + paragraph; } if (currentChunk) addChunk(currentChunk); return chunks; }; //#endregion export { splitTextIntoSegments };