romanize-string
Version:
A fully typed, general-purpose utility for unidirectional string transliteration (non-Latin script => Latin script).
16 lines (15 loc) • 611 B
JavaScript
import { initKuroshiro, kuroshiro } from "../utils/kuroshiro.js";
export const romanizeJapanese = async (input) => {
if (!kuroshiro._analyzer) {
await initKuroshiro();
}
const transliteration = await kuroshiro.convert(input, {
to: "romaji",
mode: "spaced",
});
const standardizedTransliteration = transliteration
.replace(/\b(\w+)\s+(ta|te|nai|masu|desu|da)\b/g, "$1$2") // Join common verb splits like "megumare ta" -> "megumareta".
.replace(/\s+/g, " ")
.replace(/\s+([.,!?!?。、])/g, "$1");
return standardizedTransliteration;
};