UNPKG

split-helper

Version:
87 lines 3.08 kB
import _ from 'lodash'; export function splitNormalize(text) { text = text || ''; const fixEls = [' ', '..', '. .', '. ...']; while (fixEls.some((fe) => text.includes(fe))) { text = text .replaceAll(' ', ' ') .replaceAll('..', '.') .replaceAll('. ...', '. ') .replaceAll('. .', '. ') // additional .replaceAll(':.', ':') .replaceAll('!.', '!') .replaceAll('?.', '?') .replaceAll(';.', ';'); } return pointSpaceNormalize(text); } export function pointSpaceNormalize(text) { let position = 0; const point = '.'; while (position > -1 && position < text.length) { const nextPosition = position + 1; const nextChar = text[nextPosition]; const isLetter = nextChar?.toUpperCase() !== nextChar?.toLowerCase(); const isUpper = nextChar === nextChar?.toUpperCase(); if (isLetter && isUpper) { text = text.substring(0, nextPosition) + ' ' + text.substring(nextPosition, text.length); } position = text.indexOf(point, nextPosition + 1); } return text; } export function getMicroSplits(text, limitLength, separator) { if (!separator) { return [text]; } const res = _.flatten(text.split(separator).map((t) => { if (t.length >= limitLength) { return getMicroSplits(t, limitLength, getNextSeparator(separator)); } return t + separator; })).map((x) => splitNormalize(x)); // TODO: if startsWith lowercase - union with previos item return res; } export function getSplittedTexts(text, limitLength) { if (text.length <= limitLength) { return [text]; } const microSplits = getMicroSplits(text, limitLength, '. '); return groupByLimit(microSplits, limitLength); } export function groupByLimit(splits, limitLength, joiner = ' ') { const macroSplits = []; let currentSplitIndex = 0; while (currentSplitIndex < splits.length) { let nextSplitIndex = currentSplitIndex + 1; while (splits.slice(currentSplitIndex, nextSplitIndex).join(joiner).length < limitLength && nextSplitIndex < splits.length) { nextSplitIndex++; } const noLimit = splits.slice(currentSplitIndex, currentSplitIndex + nextSplitIndex).join(joiner).length < limitLength || nextSplitIndex - currentSplitIndex <= 1; if (!noLimit) { nextSplitIndex--; } macroSplits.push(splits.slice(currentSplitIndex, nextSplitIndex).join(joiner)); currentSplitIndex = nextSplitIndex; } return macroSplits; } export function getNextSeparator(separator) { switch (separator) { case '.': case '. ': return '!'; case '!': return '?'; case '?': return ';'; case ';': return '˿'; } return null; } //# sourceMappingURL=index.js.map