UNPKG

romanize-string

Version:

A fully typed, general-purpose utility for unidirectional string transliteration (non-Latin script => Latin script).

54 lines (53 loc) 2 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.romanizeIndic = void 0; const sanscript_1 = __importDefault(require("@indic-transliteration/sanscript")); const languageSchemeMap = { hi: "devanagari", bn: "bengali", te: "telugu", ta: "tamil_extended", gu: "gujarati", mr: "devanagari", pa: "gurmukhi", kn: "kannada", }; const romanizeIndic = (input, language, omitDiacritics) => { // Replace ।, ॥, ૰, and the Gurmukhi abbreviation sign with full-stop. const normalizedInput = input.replace(/[\u0964\u0965\u0A76\u0AF0]/g, "."); // Determine appropriate transliteration scheme let transliterationScheme = "iast"; if (omitDiacritics) { if (["te", "ta", "kn"].includes(language)) { transliterationScheme = "itrans_dravidian"; } else { transliterationScheme = "hk"; } } const transliteration = sanscript_1.default.t(normalizedInput, languageSchemeMap[language], transliterationScheme); // Remove the Bengali nukta, which is often present as an artifact of the transliteration const normalizedOutput = transliteration.replace(/\u09BC/g, ""); if (omitDiacritics) { const asciiNormalized = normalizedOutput .replace(/A/g, "aa") .replace(/I/g, "ii") .replace(/U/g, "uu") .replace(/R/g, "ri") .replace(/E/g, "ee") .replace(/O/g, "oo") .replace(/M/g, "m") // anusvara .replace(/H/g, "h") // visarga .replace(/N/g, "n") // retroflex nasal .replace(/~n/g, "n") // palatal nasal .replace(/chh/g, "ch"); // optional simplification return asciiNormalized; } else { return normalizedOutput; } }; exports.romanizeIndic = romanizeIndic;