romanize-string
Version:
A fully typed, general-purpose utility for unidirectional string transliteration (non-Latin script => Latin script).
40 lines (39 loc) • 1.27 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.romanizeCantonese = void 0;
const cantonese_romanisation_1 = require("cantonese-romanisation");
const hanziRegex = /[\p{Script=Han}]/u;
const romanizeCantonese = (input) => {
let transliteration = "";
let buffer = "";
let lastWasHanzi = null;
for (const char of input) {
const isHanzi = hanziRegex.test(char);
if (lastWasHanzi === null) {
lastWasHanzi = isHanzi;
buffer = char;
}
else if (isHanzi === lastWasHanzi) {
buffer += char;
}
else {
transliteration += lastWasHanzi
? (0, cantonese_romanisation_1.getRoman)(buffer)
.map((options) => options[0])
.join(" ")
: buffer;
buffer = char;
lastWasHanzi = isHanzi;
}
}
// Handle final buffer
if (buffer) {
transliteration += lastWasHanzi
? (0, cantonese_romanisation_1.getRoman)(buffer)
.map((options) => options[0])
.join(" ")
: buffer;
}
return transliteration.replace(/\s+/g, " ").trim();
};
exports.romanizeCantonese = romanizeCantonese;