pinyin-split
Version:
Split up any kind of Pinyin into an array of syllables.
46 lines (45 loc) • 1.65 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.split = void 0;
const syllables_1 = __importDefault(require("./syllables"));
const normalize = (text) => {
text = text.normalize('NFD').replace(/\u0304|\u0301|\u030c|\u0300/g, '');
return text.normalize('NFC').replace(/(\w|ü)[1-5]/gi, '$1').toLowerCase();
};
function split(text, everything = false, wrapInList = false) {
const list = Array();
let prevWordFound = false;
let wordEnd = text.length;
while (wordEnd > 0) {
let count = wordEnd;
let wordFound = false;
while (count > 0) {
const word = text.substring(wordEnd - count, wordEnd);
if (syllables_1.default.includes(normalize(word))) {
wordFound = true;
list.push(wrapInList ? [word] : word);
wordEnd -= (count - 1);
break;
}
count--;
}
if (!wordFound && everything) {
const prevIndex = list.length - 1;
const prevEntry = list[prevIndex];
if (wordEnd === text.length || typeof prevEntry === 'object' || prevWordFound) {
list.push(text[wordEnd - 1]);
}
else if (typeof prevEntry === 'string') {
list[prevIndex] = text[wordEnd - 1] + prevEntry;
}
}
wordEnd--;
prevWordFound = wordFound;
}
return list.reverse();
}
exports.split = split;
exports.default = split;