UNPKG

pinyin-split

Version:

Split up any kind of Pinyin into an array of syllables.

39 lines (38 loc) 1.36 kB
import syllables from './syllables'; const normalize = (text) => { text = text.normalize('NFD').replace(/\u0304|\u0301|\u030c|\u0300/g, ''); return text.normalize('NFC').replace(/(\w|ü)[1-5]/gi, '$1').toLowerCase(); }; export function split(text, everything = false, wrapInList = false) { const list = Array(); let prevWordFound = false; let wordEnd = text.length; while (wordEnd > 0) { let count = wordEnd; let wordFound = false; while (count > 0) { const word = text.substring(wordEnd - count, wordEnd); if (syllables.includes(normalize(word))) { wordFound = true; list.push(wrapInList ? [word] : word); wordEnd -= (count - 1); break; } count--; } if (!wordFound && everything) { const prevIndex = list.length - 1; const prevEntry = list[prevIndex]; if (wordEnd === text.length || typeof prevEntry === 'object' || prevWordFound) { list.push(text[wordEnd - 1]); } else if (typeof prevEntry === 'string') { list[prevIndex] = text[wordEnd - 1] + prevEntry; } } wordEnd--; prevWordFound = wordFound; } return list.reverse(); } export default split;