romanize-string
Version:
A fully typed, general-purpose utility for unidirectional string transliteration (non-Latin script => Latin script).
54 lines (53 loc) • 2 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.romanizeIndic = void 0;
const sanscript_1 = __importDefault(require("@indic-transliteration/sanscript"));
const languageSchemeMap = {
hi: "devanagari",
bn: "bengali",
te: "telugu",
ta: "tamil_extended",
gu: "gujarati",
mr: "devanagari",
pa: "gurmukhi",
kn: "kannada",
};
const romanizeIndic = (input, language, omitDiacritics) => {
// Replace ।, ॥, ૰, and the Gurmukhi abbreviation sign with full-stop.
const normalizedInput = input.replace(/[\u0964\u0965\u0A76\u0AF0]/g, ".");
// Determine appropriate transliteration scheme
let transliterationScheme = "iast";
if (omitDiacritics) {
if (["te", "ta", "kn"].includes(language)) {
transliterationScheme = "itrans_dravidian";
}
else {
transliterationScheme = "hk";
}
}
const transliteration = sanscript_1.default.t(normalizedInput, languageSchemeMap[language], transliterationScheme);
// Remove the Bengali nukta, which is often present as an artifact of the transliteration
const normalizedOutput = transliteration.replace(/\u09BC/g, "");
if (omitDiacritics) {
const asciiNormalized = normalizedOutput
.replace(/A/g, "aa")
.replace(/I/g, "ii")
.replace(/U/g, "uu")
.replace(/R/g, "ri")
.replace(/E/g, "ee")
.replace(/O/g, "oo")
.replace(/M/g, "m") // anusvara
.replace(/H/g, "h") // visarga
.replace(/N/g, "n") // retroflex nasal
.replace(/~n/g, "n") // palatal nasal
.replace(/chh/g, "ch"); // optional simplification
return asciiNormalized;
}
else {
return normalizedOutput;
}
};
exports.romanizeIndic = romanizeIndic;