UNPKG

pinyin-split

Version:

Split up any kind of Pinyin into an array of syllables.

46 lines (45 loc) 1.65 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.split = void 0; const syllables_1 = __importDefault(require("./syllables")); const normalize = (text) => { text = text.normalize('NFD').replace(/\u0304|\u0301|\u030c|\u0300/g, ''); return text.normalize('NFC').replace(/(\w|ü)[1-5]/gi, '$1').toLowerCase(); }; function split(text, everything = false, wrapInList = false) { const list = Array(); let prevWordFound = false; let wordEnd = text.length; while (wordEnd > 0) { let count = wordEnd; let wordFound = false; while (count > 0) { const word = text.substring(wordEnd - count, wordEnd); if (syllables_1.default.includes(normalize(word))) { wordFound = true; list.push(wrapInList ? [word] : word); wordEnd -= (count - 1); break; } count--; } if (!wordFound && everything) { const prevIndex = list.length - 1; const prevEntry = list[prevIndex]; if (wordEnd === text.length || typeof prevEntry === 'object' || prevWordFound) { list.push(text[wordEnd - 1]); } else if (typeof prevEntry === 'string') { list[prevIndex] = text[wordEnd - 1] + prevEntry; } } wordEnd--; prevWordFound = wordFound; } return list.reverse(); } exports.split = split; exports.default = split;