romanize-string
Version:
A fully typed, general-purpose utility for unidirectional string transliteration (non-Latin script => Latin script).
36 lines (35 loc) • 1.04 kB
JavaScript
import { getRoman } from "cantonese-romanisation";
const hanziRegex = /[\p{Script=Han}]/u;
export const romanizeCantonese = (input) => {
let transliteration = "";
let buffer = "";
let lastWasHanzi = null;
for (const char of input) {
const isHanzi = hanziRegex.test(char);
if (lastWasHanzi === null) {
lastWasHanzi = isHanzi;
buffer = char;
}
else if (isHanzi === lastWasHanzi) {
buffer += char;
}
else {
transliteration += lastWasHanzi
? getRoman(buffer)
.map((options) => options[0])
.join(" ")
: buffer;
buffer = char;
lastWasHanzi = isHanzi;
}
}
// Handle final buffer
if (buffer) {
transliteration += lastWasHanzi
? getRoman(buffer)
.map((options) => options[0])
.join(" ")
: buffer;
}
return transliteration.replace(/\s+/g, " ").trim();
};