UNPKG

romanize-string

Version:

A fully typed, general-purpose utility for unidirectional string transliteration (non-Latin script => Latin script).

36 lines (35 loc) 1.04 kB
import { getRoman } from "cantonese-romanisation"; const hanziRegex = /[\p{Script=Han}]/u; export const romanizeCantonese = (input) => { let transliteration = ""; let buffer = ""; let lastWasHanzi = null; for (const char of input) { const isHanzi = hanziRegex.test(char); if (lastWasHanzi === null) { lastWasHanzi = isHanzi; buffer = char; } else if (isHanzi === lastWasHanzi) { buffer += char; } else { transliteration += lastWasHanzi ? getRoman(buffer) .map((options) => options[0]) .join(" ") : buffer; buffer = char; lastWasHanzi = isHanzi; } } // Handle final buffer if (buffer) { transliteration += lastWasHanzi ? getRoman(buffer) .map((options) => options[0]) .join(" ") : buffer; } return transliteration.replace(/\s+/g, " ").trim(); };